def get_first_page(self, response):
request_state = self.if_too_many_request(response.body, 'first_page')
registrant = response.meta['registrant']
if (request_state == False):
s = Selector(text=response.body)
content = u'//table[@class="sf-grid" and @id = "sf-grid"]/tr/td[@class = "lf"]/a/img[@alt="..."]/../@href'
domain_url_list = s.xpath(content).extract()
content2 = u'//table[@class="sf-grid" and @id = "sf-grid"]/tr'
s_list = s.xpath(content2)
domain_url_list2 = []
for s in s_list:
url2 = s.xpath('td[@class = "lf"]/a/img[@alt="..."]/../@href').extract()[0]
domain_url_list2.append(url2)
for url in domain_url_list2:
cookie = get_cookie()
url = "https://www.benmi.com" + url
item = RwhoisRegistrantItem()
item['registrant'] = registrant
yield scrapy.Request(url, headers=self.head, meta={'cookie': cookie, 'item': item},
cookies={"__cfduid": cookie[1], "cf_clearance": cookie[2],
"BenmiUserInfo2": "Benmi-UN=hahaha321",
"SITEINFO": "66b/UN0Nvf1MujwHhivXoluFewMFC48CdOZ9YpNXKEg=; "},
callback=self.get_domain_name, dont_filter=True)
registrant_spider.py 文件源码
python
阅读 19
收藏 0
点赞 0
评论 0
评论列表
文章目录