def distanceDomain(domain, DomainDict, ccTldDict, tldDict):
similarDomain = ""
minDistance = sys.maxint
level = domain.split(".")
if len(level) <=1:
return ("not a domain", sys.maxint)
(domain2LD, domain3LD, domain2LDs, domain3LDs) = extractLevelDomain(domain, ccTldDict, tldDict)
for popularDomain in DomainDict:
distance = Levenshtein.distance(domain2LD.decode('utf-8'), popularDomain.decode('utf-8'))
if distance < minDistance:
minDistance = distance
similarDomain = popularDomain
#debug
#sys.stdout.write("subdomain: %s, similarDomain: %s, minDistance: %d\n" % (subdomain, similarDomain, minDistance))
if len(similarDomain) > 0:
return (similarDomain, minDistance/float(len(similarDomain)))
else:
return (domain2LD, 0)
# check whether a domain contains invalid TLD
评论列表
文章目录