def reduce_domains(domains):
# reduce 'www.google.com' to 'google.com'
# remove invalid domains
tld_content = pkgutil.get_data('gfwlist2pac', 'resources/tld.txt')
tlds = set(tld_content.splitlines(False))
new_domains = set()
for domain in domains:
domain_parts = domain.split('.')
last_root_domain = None
for i in xrange(0, len(domain_parts)):
root_domain = '.'.join(domain_parts[len(domain_parts) - i - 1:])
if i == 0:
if not tlds.__contains__(root_domain):
# root_domain is not a valid tld
break
last_root_domain = root_domain
if tlds.__contains__(root_domain):
continue
else:
break
if last_root_domain is not None:
new_domains.add(last_root_domain)
return new_domains
评论列表
文章目录