def gather():
url_regex = '(?:http.*://)?(?P<host>[^:/ ]+).?(?P<port>[0-9]*).*'
base_url = "http://data.phishtank.com/data/online-valid.csv.bz2"
attack_type = "undefined"
res = get_url(base_url)
results = bz2.decompress(res.content)
for line in results.split("\n")[1:]:
if line == "":
continue
line = line.split(",")
site_url = line[1]
m = re.search(url_regex, site_url)
host = m.group('host')
ip_address = get_ip(host)
if ip_address == "undefined":
who_is, country = "undefined", "undefined"
else:
who_is, country = get_who_is_and_country(ip_address)
doc = {
'IP': ip_address,
'SourceInfo': base_url,
'Type': attack_type,
'Country': country,
'Domain': host,
'URL': host,
'WhoIsInfo': who_is,
}
pprint(doc)
phishtank.py 文件源码
python
阅读 20
收藏 0
点赞 0
评论 0
评论列表
文章目录