def parsePatentDetail(self, response):
sipo = response.meta['sipo']
sipocrawler = response.meta['sipocrawler']
detail = json.loads(response.body_as_unicode())
sipocrawler['abstract'] = BeautifulSoup(detail.get('abstractInfoDTO').get('abIndexList')[0].get('value'),
'lxml').text.replace('\n', '').strip()
sipocrawler['invention_name'] = detail.get('abstractInfoDTO').get('tioIndex').get('value')
for abitem in detail.get('abstractInfoDTO').get('abstractItemList'):
ItemCollection.resolveData(sipocrawler, abitem.get('indexCnName'), abitem.get('value'))
lawinfo = response.meta.get('lawinfo')
formdata = url_config.relatedInfo.get('formdata')
formdata.__setitem__('literaInfo.nrdAn', lawinfo.get('nrdAn'))
formdata.__setitem__('literaInfo.nrdPn', lawinfo.get('nrdPn'))
yield FormRequest(
url=url_config.relatedInfo.get('url'),
method='POST',
dont_filter=True, # ???????????????????????????
formdata=formdata,
callback=self.parseRelatedInfo,
meta={'sipo': sipo, 'sipocrawler': sipocrawler}
)
# ??????
评论列表
文章目录