def download_songs(url):
time.sleep(random.random() * 0.5)
try:
page = urllib2.urlopen(url).read()
soup = BeautifulSoup(page, 'html.parser')
# Get the artist name
artist_name = soup.findAll('h1')[0].get_text()[:-7].lower().replace(' ', '_')
# Store all songs for a given artist
with open('artist_data/'+artist_name+'.txt', 'wb') as w:
for song in soup.findAll('a', {'target': '_blank'}):
if 'lyrics/' in song['href']:
song_url = song['href'][1:].strip()
w.write(song_url + '\n')
except urllib2.HTTPError:
print '404 not found'
评论列表
文章目录