def refine_item(self, response, item):
birth_death = text.clean_extract(response,
'.//div[@id="maincontent"]/p[1]/em'
).split('<br>')[0]
birth_death = re.subn(r'<[^>]+>', '', birth_death)[0].split('d.')
if len(birth_death) == 2:
birth, death = birth_death
birth = birth[len('b.'):].strip()
death = death.strip()
item['birth'] = birth if birth != '?' else None
item['death'] = death if death != '?' else None
return super(MunksrollSpider, self).refine_item(response, item)
评论列表
文章目录