forked from iCON/CrawlerEngines
dev
parent
576340ef20
commit
680ff44218
Binary file not shown.
|
|
@ -7,6 +7,7 @@ import urllib.parse
|
||||||
import json
|
import json
|
||||||
import time
|
import time
|
||||||
import re
|
import re
|
||||||
|
import os
|
||||||
|
|
||||||
BASE_URL = "https://english.news.cn"
|
BASE_URL = "https://english.news.cn"
|
||||||
XINHUA_OVERSEAS_REGIONS = ["asiapacific", "europe", "africa", "northamerica"]
|
XINHUA_OVERSEAS_REGIONS = ["asiapacific", "europe", "africa", "northamerica"]
|
||||||
|
|
@ -116,7 +117,7 @@ class Crawler_NewsCN:
|
||||||
invalid_chars_pattern = r'[\\/:*?"<>|]'
|
invalid_chars_pattern = r'[\\/:*?"<>|]'
|
||||||
blog_title = re.sub(invalid_chars_pattern, "", blog_title)
|
blog_title = re.sub(invalid_chars_pattern, "", blog_title)
|
||||||
|
|
||||||
file = open(f"./saved_articles/Xinhua_{blog_title}.json", "w")
|
file = open(os.path.join("", "Xinhua_{blog_title}.json"), "w")
|
||||||
json.dump(blog, file)
|
json.dump(blog, file)
|
||||||
file.close()
|
file.close()
|
||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue