From e842ac9ed78236dcf4e782582347929ecf472809 Mon Sep 17 00:00:00 2001 From: million Date: Mon, 30 Sep 2024 19:30:07 +0800 Subject: [PATCH] init --- .gitignore | 2 ++ config.json | 22 +++++++++++++++ engines/crawler_bbc_search.py | 0 engines/crawler_google_search.py | 6 +++++ engines/crawler_webb_site.py | 0 engines/crawler_wikipedia.py | 0 iCloudEngine | 1 + iCloudEngine.git.bat | 2 ++ main.py | 46 ++++++++++++++++++++++++++++++++ submit_test.py | 27 +++++++++++++++++++ utils/logger.py | 42 +++++++++++++++++++++++++++++ 11 files changed, 148 insertions(+) create mode 100644 .gitignore create mode 100644 config.json create mode 100644 engines/crawler_bbc_search.py create mode 100644 engines/crawler_google_search.py create mode 100644 engines/crawler_webb_site.py create mode 100644 engines/crawler_wikipedia.py create mode 160000 iCloudEngine create mode 100644 iCloudEngine.git.bat create mode 100644 main.py create mode 100644 submit_test.py create mode 100644 utils/logger.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8ef114b --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/logs +__pycache__ \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..9f23778 --- /dev/null +++ b/config.json @@ -0,0 +1,22 @@ +{ + "abp": { + "api_base": "https://stag-abp-api.iconsz.com", + "tenant": "ics", + "username": "ics", + "password": "1qaz@WSX", + "client_id": "engine_CrawlerEngines", + "client_secret": "7ii7U9AabN2*CrawlerEngines", + "scope": "FX" + }, + "engine": { + + }, + "queue_processor": { + "get_pending_queue_interval_seconds": 2 + }, + "queue_client": { + "check_queue_interval_seconds": 1, + "call_wait_return_default_timeout_seconds": 60 + } + } + \ No newline at end of file diff --git a/engines/crawler_bbc_search.py b/engines/crawler_bbc_search.py new file mode 100644 index 0000000..e69de29 diff --git a/engines/crawler_google_search.py b/engines/crawler_google_search.py new file mode 100644 index 0000000..62acfce --- /dev/null +++ b/engines/crawler_google_search.py @@ -0,0 +1,6 @@ +class CrawlerGoogleSearch: + def process(inputData): + print("CrawlerGoogleSearch / inputData", inputData) + # TODO 具体的实现逻辑 + + return {"full_name": "xxx", "date_of_birth": "1956-01-01"} diff --git a/engines/crawler_webb_site.py b/engines/crawler_webb_site.py new file mode 100644 index 0000000..e69de29 diff --git a/engines/crawler_wikipedia.py b/engines/crawler_wikipedia.py new file mode 100644 index 0000000..e69de29 diff --git a/iCloudEngine b/iCloudEngine new file mode 160000 index 0000000..646c442 --- /dev/null +++ b/iCloudEngine @@ -0,0 +1 @@ +Subproject commit 646c4424169c93da36fdf568904efeaf644af9b4 diff --git a/iCloudEngine.git.bat b/iCloudEngine.git.bat new file mode 100644 index 0000000..5a5c75f --- /dev/null +++ b/iCloudEngine.git.bat @@ -0,0 +1,2 @@ +git clone https://gitea.iconsz.com/iCON/iCloudEngine.git +pause \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..069f750 --- /dev/null +++ b/main.py @@ -0,0 +1,46 @@ +from engines.crawler_google_search import CrawlerGoogleSearch +from utils.logger import logger +import os, sys, time, traceback, json + +base_dir = os.path.dirname(os.path.abspath(__file__)) +relative_path = os.path.join(base_dir, "iCloudEngine/src") +sys.path.append(relative_path) + +from queue_processor import QueueProcessor + + +class Main(QueueProcessor): + def processor_handle(self, input): + print("input:", input) # {'keyword': '林郑月娥' } + + currentEngineId = input["currentEngineId"] + inputData = json.loads(input["inputData"]) + # keyword = inputData["keyword"] + # print("keyword:", keyword) + match currentEngineId: + case 9000: # crawler_bbc_search + return CrawlerGoogleSearch.process(inputData) + case 10000: # crawler_bbc_search + print(2) + case 11000: # crawler_wikipedia + print(3) + case 12000: # crawler_webb_site + print(4) + + +if __name__ == "__main__": + try: + # Main().startV2([9000]) + Main().startV2() + + except Exception as e: + logger.warning(f"excute exception:{e}") + exc_type, exc_value, exc_traceback = sys.exc_info() + traceback_details = traceback.extract_tb(exc_traceback) + filename, line_number, function_name, text = traceback_details[-1] + logger.error(f"Exception occurred in {filename} at line {line_number}: {e}") + logger.error(f"Function name: {function_name}") + logger.error(f"Text: {text}") + + finally: + logger.warning("application completed") diff --git a/submit_test.py b/submit_test.py new file mode 100644 index 0000000..2a67f02 --- /dev/null +++ b/submit_test.py @@ -0,0 +1,27 @@ +import os, sys + +base_dir = os.path.dirname(os.path.abspath(__file__)) +relative_path = os.path.join(base_dir, "iCloudEngine/src") +sys.path.append(relative_path) + +from queue_client import QueueClient + + +class submit_test: + def submit(code, keyword): + client = QueueClient() + returnData = client.call(code, {"keyword": keyword}) + print(returnData) + + +# 创建一个测试队列:9000 / GoogleSearch +submit_test.submit(9000, "林郑月娥") + +# 创建一个测试队列:10000 / BBCSearch +submit_test.submit(10000, "林郑月娥") + +# 创建一个测试队列:11000 / Wikipedia +submit_test.submit(11000, "林郑月娥") + +# 创建一个测试队列:12000 / WebbSite +submit_test.submit(12000, "林郑月娥") diff --git a/utils/logger.py b/utils/logger.py new file mode 100644 index 0000000..8de3ada --- /dev/null +++ b/utils/logger.py @@ -0,0 +1,42 @@ +import logging +import os +from datetime import datetime +import requests + +if not os.path.exists("logs"): + os.makedirs("logs") + +# 配置日志的基本设置 +logging.basicConfig( + filename=os.path.join("logs", datetime.now().strftime("%Y-%m-%d") + ".log"), + level=logging.WARNING, + format="%(asctime)s:%(levelname)s:%(message)s", + filemode="a", + encoding="utf-8", +) + +# 设置requests库的日志级别为WARNING或更高,以确保不记录INFO或DEBUG级别的日志 +logging.getLogger("requests").setLevel(logging.WARNING) +logging.getLogger("urllib3").setLevel(logging.WARNING) +logging.getLogger("elasticsearch").setLevel(logging.WARNING) + + +class logger: + # def debug(msg: str): + # logging.debug(msg) + + # def info(msg: str): + # logging.info(msg) + + def warning(msg, printLog=True): + logging.warning(msg) + if printLog == True: + print(msg) + + def error(msg: str, printLog=True): + logging.error(msg) + if printLog == True: + print(msg) + + # def critical(msg: str): + # logging.critical(msg)