get_song_lyrics.py 文件源码-python代码片段

get_song_lyrics.py 文件源码

python

阅读 33 收藏 0 点赞 0 评论 0

def download_lyrics(artist, url):
  print url
  time.sleep(random() + 2)
  page = urllib2.urlopen(url).read()
  soup = BeautifulSoup(page, 'html.parser')

  # Get the song title
  song_title = soup.find('title').get_text().split(' - ')[1].lower().replace('/', ' ').replace(' ', '_')

  # Get the lyrics div
  lyrics = soup.findAll('div', {'class': ''})

  for i in lyrics:
    lyrics = i.get_text().strip()
    if len(lyrics) > 10:
      with open('artists/' + artist + '/' + song_title + '.txt', 'wb') as w:
        cleaned_lyrics = lyrics.replace('\r\n', ' *BREAK* ').replace('\n', ' *BREAK* ').replace('  ', ' ')
        w.write(cleaned_lyrics.encode('utf-8'))