1
0
Fork 0
dev
million 2024-10-18 21:39:46 +08:00
parent 36353926de
commit 3d0e534375
2 changed files with 17 additions and 6 deletions

View File

@ -161,11 +161,18 @@ class Crawler_NewsCN:
WebDriverWait(self.driver, 10).until(EC.title_contains("Xinhua"))
# time.sleep(1)
region_code = urllib.parse.urlparse(url).path.split("/")[1]
if region_code in XINHUA_OVERSEAS_REGIONS:
blog = self.__retrieve_overseas_blog()
else:
# 特殊情况https://german.news.cn/20241016/93ca92839e1b44dc8f6dca21f9c80902/c.html
# region_code = urllib.parse.urlparse(url).path.split("/")[1]
# if region_code in XINHUA_OVERSEAS_REGIONS:
# blog = self.__retrieve_overseas_blog()
# else:
# blog = self.__retrieve_china_blog()
if self.driver.find_elements(By.CLASS_NAME, "conBox"):
blog = self.__retrieve_china_blog()
else:
if self.driver.find_elements(By.CLASS_NAME, "main.clearfix"):
blog = self.__retrieve_overseas_blog()
# div = WebDriverWait(self.driver, 10).until(
# EC.presence_of_element_located((By.CLASS_NAME, "detailContent"))

View File

@ -20,8 +20,8 @@ class submit_test:
# # # 创建一个测试队列10000 / BBCSearch
submit_test.submit(10000, {"keyword": "习近平"})
submit_test.submit(10000, {"keyword": "US election"})
# submit_test.submit(10000, {"keyword": "US election"})
# submit_test.submit(10000, {"keyword": "US election"})
submit_test.submit(10000, {"keyword": "Yahya Sinwar"})
submit_test.submit(10000, {"keyword": "Bin Laden"})
# # 创建一个测试队列11000 / Wikipedia
# submit_test.submit(11000, {"keyword": keyword})
@ -31,4 +31,8 @@ submit_test.submit(10000, {"keyword": "US election"})
# 创建一个测试队列13000 / Crawler_NewsCN 新华网英文站在线搜索
submit_test.submit(13000, {"keyword": "china", "lang": "en"})
submit_test.submit(13000, {"keyword": "fujian", "lang": "en"})
submit_test.submit(13000, {"keyword": "shenzhen", "lang": "en"})
submit_test.submit(13000, {"keyword": "中国", "lang": "cn"})
submit_test.submit(13000, {"keyword": "吴邦国", "lang": "cn"})
submit_test.submit(13000, {"keyword": "李强", "lang": "cn"})