def get_links ( url ):
'''
Get all the links off of the page:
gd2.mlb.com/components/game/mlb/year/month/day/
And finds the links for the games that have the following
format:
gid_year_mm_dd_team1mlb_team2mlb
'''
f = get_page (url)
if f==False: return False
# Compile the regex to match links outside of the loop for
# performance
links = []
regex = re.compile("\"gid_(.*?)\"", re.IGNORECASE)
# Find all links on page and if they are links to games then add to list
for link in BeautifulSoup(f, "lxml",parse_only=SoupStrainer('a', href=True) ):
match = regex.findall(str(link))
if match:
links.extend(match)
return links
评论列表
文章目录