def parse_xpath(self, response, xpath):
appItemList = []
sel = Selector(response)
for url in sel.xpath(xpath).extract():
url = urljoin(response.url, url)
log.msg("Catch an application: %s" % url, level=log.INFO)
appItem = AppItem()
appItem['url'] = url
appItemList.append(appItem)
return appItemList
#def parse_anzhi(self, response, xpath):
# appItemList = []
# hxs = HtmlXPathSelector(response)
# for script in hxs.select(xpath).extract():
# id = re.search(r"\d+", script).group()
# url = "http://www.anzhi.com/dl_app.php?s=%s&n=5" % (id,)
# appItem = AppItem()
# appItem['url'] = url
# appItemList.append(appItem)
# return appItemList
android_apps_spider.py 文件源码
python
阅读 19
收藏 0
点赞 0
评论 0
评论列表
文章目录