def get_soup(game_html):
"""
Uses Beautiful soup to parses the html document.
Some parsers work for some pages but don't work for others....I'm not sure why so I just try them all here in order
:param game_html: html doc
:return: "soupified" html and player_shifts portion of html (it's a bunch of td tags)
"""
strainer = SoupStrainer('td', attrs={'class': re.compile(r'bborder')})
soup = BeautifulSoup(game_html.text, "lxml", parse_only=strainer)
soup = soup.select('td.+.bborder')
if len(soup) == 0:
soup = BeautifulSoup(game_html.text, "html.parser", parse_only=strainer)
soup = soup.select('td.+.bborder')
if len(soup) == 0:
soup = BeautifulSoup(game_html.text, "html5lib")
soup = soup.select('td.+.bborder')
return soup
评论列表
文章目录