def parse_page(self, response):
# from scrapy.shell import inspect_response
# inspect_response(response, self)
for row in response.xpath("//table/tr")[2:]:
# Item creation and deployment
item = ProxyfetcherItem()
item["ip"] = row.xpath("td")[1].re("document.write\('(.+?)'")[0].strip()
# The port is "encoded" as hexadecimal
item["port"] = str(int(row.xpath("td")[2].re("gp.dep\('(.+?)'")[0], 16))
item["country"] = row.xpath("td[5]/text()").extract()[0]
item["con_type"] = 'http'
item["full_address"] = "{}:{}".format(item["ip"], item["port"])
yield item.status_check(item)
gatherproxy_spider.py 文件源码
python
阅读 23
收藏 0
点赞 0
评论 0
评论列表
文章目录