1
0
Fork 0
CrawlerEngines/main.py

58 lines
2.1 KiB
Python

from engines.crawler_google_search import CrawlerGoogleSearch
from engines.crawler_newscn_search import Crawler_NewsCN
from engines.info_extractor import InfoExtractor
from engines.crawler_bbc_search import Crawler_BBCSearch
from utils.logger import logger
import os, sys, time, traceback, json
from utils.logger import logger
base_dir = os.path.dirname(os.path.abspath(__file__))
relative_path = os.path.join(base_dir, "iCloudEngine/src")
sys.path.append(relative_path)
from queue_processor import QueueProcessor
class Main(QueueProcessor):
def processor_handle(self, input):
print("input:", input)
currentEngineId = input["currentEngineId"]
inputData = json.loads(input["inputData"])
match currentEngineId:
case 3000: # InfoExtractor 实体信息提取
return InfoExtractor().process(inputData)
case 9000: # crawler_google_search google在线搜索
return CrawlerGoogleSearch().process(inputData)
case 10000: # crawler_bbc_search bbc在线搜索
return Crawler_BBCSearch().process(inputData)
case 11000: # crawler_wikipedia
print(3)
case 12000: # crawler_webb_site
print(4)
case 13000: # crawler_NewsCN 新华网英文站在线搜索
# crawler_NewsCN = Crawler_NewsCN()
return Crawler_NewsCN().process(inputData)
if __name__ == "__main__":
logger.warning("application start")
try:
# Main().startV2([9000])
Main().startV2()
except Exception as e:
logger.warning(f"excute exception:{e}")
exc_type, exc_value, exc_traceback = sys.exc_info()
traceback_details = traceback.extract_tb(exc_traceback)
filename, line_number, function_name, text = traceback_details[-1]
logger.error(f"Exception occurred in {filename} at line {line_number}: {e}")
logger.error(f"Function name: {function_name}")
logger.error(f"Text: {text}")
import traceback
logger.warning(f"Crawler_NewsCN {traceback.format_exc()}")
finally:
logger.warning("application completed")