def parse(self, response):
response_domain = urlparse(response.url).netloc
appItemList = []
cookie = {}
xpath_rule = self.scrape_rules['xpath']
for key in xpath_rule.keys():
if key in response_domain:
appItemList.extend(
self.parse_xpath(response, xpath_rule[key]))
break
custom_parser_rule = self.scrape_rules['custom_parser']
for key in custom_parser_rule.keys():
if key in response_domain:
appItemList.extend(
getattr(custom_parser, custom_parser_rule[key])(response))
break
#if "appchina" in response_domain:
# xpath = "//a[@id='pc-download' and @class='free']/@href"
# appItemList.extend(self.parse_xpath(response, xpath))
#elif "hiapk" in response_domain:
# xpath = "//a[@class='linkbtn d1']/@href"
# appItemList.extend(self.parse_xpath(response, xpath))
#elif "android.d.cn" in response_domain:
# xpath = "//a[@class='down']/@href"
# appItemList.extend(self.parse_xpath(response, xpath))
#elif "anzhi" in response_domain:
# xpath = "//div[@id='btn']/a/@onclick"
# appItemList.extend(self.parse_anzhi(response, xpath))
#else:
# pass
sel = Selector(response)
for url in sel.xpath('//a/@href').extract():
url = urljoin(response.url, url)
yield Request(url, meta=cookie, callback=self.parse)
for item in appItemList:
yield item
#def parse_appchina(self, response):
# appItemList = []
# hxs = HtmlXPathSelector(response)
# for url in hxs.select(
# "//a[@id='pc-download' and @class='free']/@href"
# ).extract():
# url = urljoin(response.url, url)
# log.msg("Catch an application: %s" % url, level=log.INFO)
# appItem = AppItem()
# appItem['url'] = url
# appItemList.append(appItem)
# return appItemList
android_apps_spider.py 文件源码
python
阅读 27
收藏 0
点赞 0
评论 0
评论列表
文章目录