1
0
Fork 0
CrawlerEngines/main.py

54 lines
2.0 KiB
Python

from engines.crawler_google_search import CrawlerGoogleSearch
from engines.crawler_newscn_search import Crawler_NewsCN
from engines.info_extractor import InfoExtractor
from utils.logger import logger
import os, sys, time, traceback, json
base_dir = os.path.dirname(os.path.abspath(__file__))
relative_path = os.path.join(base_dir, "iCloudEngine/src")
sys.path.append(relative_path)
from queue_processor import QueueProcessor
infoExtractor = InfoExtractor()
crawlerGoogleSearch = CrawlerGoogleSearch()
crawler_NewsCN = Crawler_NewsCN()
class Main(QueueProcessor):
def processor_handle(self, input):
print("input:", input) # {'keyword': '林郑月娥' }
currentEngineId = input["currentEngineId"]
inputData = json.loads(input["inputData"])
match currentEngineId:
case 3000: # InfoExtractor 实体信息提取
return infoExtractor.process(inputData)
case 9000: # crawler_google_search google在线所搜
return crawlerGoogleSearch.process(inputData)
case 10000: # crawler_bbc_search bbc在线所搜
print(2)
case 11000: # crawler_wikipedia
print(3)
case 12000: # crawler_webb_site
print(4)
case 13000: # crawler_NewsCN 新华网英文站在线搜索
return crawler_NewsCN.process(inputData)
if __name__ == "__main__":
try:
# Main().startV2([9000])
Main().startV2()
except Exception as e:
logger.warning(f"excute exception:{e}")
exc_type, exc_value, exc_traceback = sys.exc_info()
traceback_details = traceback.extract_tb(exc_traceback)
filename, line_number, function_name, text = traceback_details[-1]
logger.error(f"Exception occurred in {filename} at line {line_number}: {e}")
logger.error(f"Function name: {function_name}")
logger.error(f"Text: {text}")
finally:
logger.warning("application completed")