def get_lyrics_with_urls(urls):
# TODO
ret = []
for url in urls:
time.sleep(3)
print(url)
response = urlopen(url, timeout=5)
content = response.read()
for lyrics in bs(content, "html.parser", parse_only=SoupStrainer('p')):
if(lyrics.has_attr('style')):
lyrics = re.sub('</?br/?>', '\n', str(lyrics))
lyrics = re.sub('<.*?>', '', str(lyrics))
lyrics = re.sub('\n', ' \n', str(lyrics))
ret.append(lyrics)
print(lyrics)
print(str(get_sentiment(lyrics)))
return ret
评论列表
文章目录