xtr.py 文件源码-python代码片段

xtr.py 文件源码

python

阅读 27 收藏 0 点赞 0 评论 0

项目：python-search-engine 作者: ncouture 项目源码文件源码

def parse_that(url):
    resp = requests.get(url)
    url = url
    raw = resp.text
    tree = get_etree(raw)
    title = doctitle(tree)
    links = get_links(tree, url)
    keywords = get_url_keywords(url)
    meta_description = meta_name_description(tree)
    html = get_clean_html(tree)
    text_content = get_clean_html(tree, text_only=True)
    return {'rank': 0,
            'title': title,
            'url': url,
            'description': meta_description,
            'keywords': keywords,
            'raw': raw,
            'text': text_content,
            'internal_links': links['internal'],
            'external_links': links['external']}