import wikipedia import wikipediaapi from utils.logger import logger MAX_SEARCH_RESULT = 1 NUMBER_CHARS_RETURN = 10000 class Crawler_Wikipedia: def __search_topic_and_save(self, keyword, wiki_wiki): try: page = wikipedia.page(keyword) except wikipedia.exceptions.DisambiguationError as e: queries='\n'.join(str(e).split('\n')[1:]) queries=queries.split('\n') page_py = wiki_wiki.page(queries[0]) return page_py.summary[0:NUMBER_CHARS_RETURN] def search_and_save(self, keyword, lang): wikipedia.set_lang(lang) wiki_wiki = wikipediaapi.Wikipedia('icon (icon@iconsz.com)', language=lang) content_list = [self.__search_topic_and_save(keyword, wiki_wiki)] logger.warning(f"Crawler_Wikipedia topic:{topic}") return content_list # print(content_list) def process(self, inputData): logger.warning("Crawler_Wikipedia / inputData", inputData) keyword = inputData["keyword"] # lang: en for English, zh for Chinese lang = inputData["lang"] result = [] try: result = self.search_and_save(keyword, lang) # print("result", result) except Exception: import traceback logger.warning(f"Crawler_NewsCN {traceback.format_exc()}") logger.warning( f"Crawler_Wikipedia process completed】, keyword={keyword}, result len={len(result)}" ) return result if __name__ == "__main__": crwaler = Crawler_Wikipedia() crwaler.search_and_save('prc', 'en')