import_data.py 文件源码-python代码片段

import_data.py 文件源码

python

阅读 25 收藏 0 点赞 0 评论 0

项目：find-that-charity 作者: TechforgoodCAST 项目源码文件源码

def parse_url(url):
    if url is None:
        return None

    url = url.strip()

    if validators.url(url):
        return url

    if validators.url("http://%s" % url):
        return "http://%s" % url

    if url in ["n.a", 'non.e', '.0', '-.-', '.none', '.nil', 'N/A', 'TBC',
               'under construction', '.n/a', '0.0', '.P', b'', 'no.website']:
        return None

    for i in ['http;//', 'http//', 'http.//', 'http:\\\\',
              'http://http://', 'www://', 'www.http://']:
        url = url.replace(i, 'http://')
    url = url.replace('http:/www', 'http://www')

    for i in ['www,', ':www', 'www:', 'www/', 'www\\\\', '.www']:
        url = url.replace(i, 'www.')

    url = url.replace(',', '.')
    url = url.replace('..', '.')

    if validators.url(url):
        return url

    if validators.url("http://%s" % url):
        return "http://%s" % url