urlextract.py 文件源码-python代码片段

def _download_tlds_list(self):
        """
        Function downloads list of TLDs from IANA 'https://data.iana.org/TLD/tlds-alpha-by-domain.txt'.

        :return: True if list was downloaded, False in case of an error
        :rtype: bool
        """
        url_list = 'https://data.iana.org/TLD/tlds-alpha-by-domain.txt'

        # check if we can write cache file
        if os.access(self._tld_list_path, os.F_OK) and not os.access(self._tld_list_path, os.W_OK):
            print("ERROR: Cache file is not writable for current user. ({})".format(self._tld_list_path))
            return False

        req = urllib.request.Request(url_list)
        req.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.0; WOW64; rv:24.0) Gecko/20100101 Firefox/24.0')
        with open(self._tld_list_path, 'w') as ftld:
            try:
                with urllib.request.urlopen(req) as f:
                    page = f.read().decode('utf-8')
                    ftld.write(page)
            except HTTPError as e:
                print("ERROR: Can not download list ot TLDs. (HTTPError: {})".format(e.reason))
                return False
            except URLError as e:
                print("ERROR: Can not download list ot TLDs. (URLError: {})".format(e.reason))
                return False
        return True