def parse_article(self,response):
json_result = str(response.body,encoding="utf8").replace('false','0').replace('true','1')
dict_result = eval(json_result)
for one in dict_result['data']:
item = ArticleItem()
item['author_id'] = response.meta['author_id']
item['title'] = one['title']
item['article_id'] = one['id']
item['content'] = one['content']
#pdb.set_trace()
item['cretated_time'] = one['created']
item['updated_time'] = one['updated']
item['voteup_count'] = one['voteup_count']
item['comment_count'] = one['comment_count']
yield item
if dict_result['paging']['is_end'] == 0:
offset = response.meta['offset'] + 20
next_page = re.findall('(.*offset=)\d+',response.url)[0]
yield Request(next_page + str(offset),callback=self.parse_article,meta={'author_id':response.meta['author_id'],'offset':offset})
评论列表
文章目录