diff --git a/engines/crawler_newscn_search.py b/engines/crawler_newscn_search.py index 21f865f..45195df 100644 --- a/engines/crawler_newscn_search.py +++ b/engines/crawler_newscn_search.py @@ -161,11 +161,18 @@ class Crawler_NewsCN: WebDriverWait(self.driver, 10).until(EC.title_contains("Xinhua")) # time.sleep(1) - region_code = urllib.parse.urlparse(url).path.split("/")[1] - if region_code in XINHUA_OVERSEAS_REGIONS: - blog = self.__retrieve_overseas_blog() - else: + # 特殊情况:https://german.news.cn/20241016/93ca92839e1b44dc8f6dca21f9c80902/c.html + # region_code = urllib.parse.urlparse(url).path.split("/")[1] + # if region_code in XINHUA_OVERSEAS_REGIONS: + # blog = self.__retrieve_overseas_blog() + # else: + # blog = self.__retrieve_china_blog() + + if self.driver.find_elements(By.CLASS_NAME, "conBox"): blog = self.__retrieve_china_blog() + else: + if self.driver.find_elements(By.CLASS_NAME, "main.clearfix"): + blog = self.__retrieve_overseas_blog() # div = WebDriverWait(self.driver, 10).until( # EC.presence_of_element_located((By.CLASS_NAME, "detailContent")) diff --git a/submit_test.py b/submit_test.py index b578811..1bd9d21 100644 --- a/submit_test.py +++ b/submit_test.py @@ -20,8 +20,8 @@ class submit_test: # # # 创建一个测试队列:10000 / BBCSearch submit_test.submit(10000, {"keyword": "习近平"}) submit_test.submit(10000, {"keyword": "US election"}) -# submit_test.submit(10000, {"keyword": "US election"}) -# submit_test.submit(10000, {"keyword": "US election"}) +submit_test.submit(10000, {"keyword": "Yahya Sinwar"}) +submit_test.submit(10000, {"keyword": "Bin Laden"}) # # 创建一个测试队列:11000 / Wikipedia # submit_test.submit(11000, {"keyword": keyword}) @@ -31,4 +31,8 @@ submit_test.submit(10000, {"keyword": "US election"}) # 创建一个测试队列:13000 / Crawler_NewsCN 新华网英文站在线搜索 submit_test.submit(13000, {"keyword": "china", "lang": "en"}) +submit_test.submit(13000, {"keyword": "fujian", "lang": "en"}) +submit_test.submit(13000, {"keyword": "shenzhen", "lang": "en"}) submit_test.submit(13000, {"keyword": "中国", "lang": "cn"}) +submit_test.submit(13000, {"keyword": "吴邦国", "lang": "cn"}) +submit_test.submit(13000, {"keyword": "李强", "lang": "cn"})