def download_lyrics(artist, url):
print url
time.sleep(random() + 2)
page = urllib2.urlopen(url).read()
soup = BeautifulSoup(page, 'html.parser')
# Get the song title
song_title = soup.find('title').get_text().split(' - ')[1].lower().replace('/', ' ').replace(' ', '_')
# Get the lyrics div
lyrics = soup.findAll('div', {'class': ''})
for i in lyrics:
lyrics = i.get_text().strip()
if len(lyrics) > 10:
with open('artists/' + artist + '/' + song_title + '.txt', 'wb') as w:
cleaned_lyrics = lyrics.replace('\r\n', ' *BREAK* ').replace('\n', ' *BREAK* ').replace(' ', ' ')
w.write(cleaned_lyrics.encode('utf-8'))
评论列表
文章目录