get_lyrics.py 文件源码-python代码片段

def lyricswikia(artist, song):
    # original code found @
    # https://github.com/geekpradd/PyLyrics/blob/master/PyLyrics/functions.py
    song = song.split(' - ', 1)[0]
    artist = artist.replace(' ', '_')
    song = song.replace(' ', '_')
    url = 'http://lyrics.wikia.com/{0}:{1}'.format(artist, song)
    print('Trying:', url)
    r = requests.get(url)
    s = BeautifulSoup(r.text, 'html.parser')
    # Get main lyrics holder
    lyrics = s.find("div", {'class': 'lyricbox'})
    if lyrics is not None:
        # Remove Scripts
        [s.extract() for e in lyrics('script')]

        # Remove Comments
        comments = lyrics.findAll(text=lambda text: isinstance(text, Comment))
        [comment.extract() for comment in comments]

        # Remove unecessary tags
        for tag in ['div', 'i', 'b', 'a']:
            for match in lyrics.findAll(tag):
                match.replaceWithChildren()
        # Get output as a string and remove non unicode characters and replace
        # <br> with newlines
        lyrics = str(lyrics).encode('utf-8', errors='replace')[22:-6:].decode(
            "utf-8").replace('\n', '').replace('<br/>', '\n')
    try:
        return lyrics
    except:
        return lyrics.encode('utf-8')