get_all_songs.py 文件源码-python代码片段

get_all_songs.py 文件源码

python

阅读 48 收藏 0 点赞 0 评论 0

def download_songs(url):
  time.sleep(random.random() * 0.5)
  try:
    page = urllib2.urlopen(url).read()
    soup = BeautifulSoup(page, 'html.parser')

    # Get the artist name
    artist_name = soup.findAll('h1')[0].get_text()[:-7].lower().replace(' ', '_')

    # Store all songs for a given artist
    with open('artist_data/'+artist_name+'.txt', 'wb') as w:
      for song in soup.findAll('a', {'target': '_blank'}):
        if 'lyrics/' in song['href']:
          song_url = song['href'][1:].strip()
          w.write(song_url + '\n')
  except urllib2.HTTPError:
    print '404 not found'