utils.py 文件源码-python代码片段

utils.py 文件源码

python

阅读 32 收藏 0 点赞 0 评论 0

def get_title_from_webpage(url):
    """ Fetch <title> of a html site for title element
    :url: str (http url)
    :returns: str
    """

    # LOL SECURITY
    ssl._create_default_https_context = ssl._create_unverified_context

    try:
        h = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/601.3.9 (KHTML, like Gecko) Version/9.0.2 Safari/601.3.9'}
        u = urllib2.Request(url, headers=h)
        u = urllib2.urlopen(u)
        soup = BeautifulSoup(u, "html.parser")
        s = soup.title.string.replace('\n', ' ').replace('\r', '').lstrip().rstrip()
        s = s.lstrip()
        return s
    except (AttributeError, MemoryError, ssl.CertificateError, IOError) as e:
        return "No title"
    except ValueError:
        return False