def parse(self, html):
"""
This method initiates parsing of HTML content, cleans resulting
content as needed, and notifies the parser instance of
resulting instances via the handle_article callback.
"""
self.soup = BeautifulSoup(html, "html.parser")
# This parses any global, non-itemized attributes from the page.
self._parse_globals()
# Now parse out listed articles:
for div in self.soup.findAll(ScholarArticleParser._tag_results_checker):
self._parse_article(div)
self._clean_article()
if self.article['title']:
self.handle_article(self.article)
评论列表
文章目录