def process_content(html,item_dict):
root = Xhtml.fromstring(html)
# ??????
try:
content = root.xpath('//*[@class="article-content"]')[0]
except IndexError:
return ''
#
item_dict['cover'] = None
imgs = root.xpath('//img[@src]')
if imgs:
for img in imgs:
src=img.attrib['src'].strip()
if (not item_dict['cover']) and src[-3:].lower() in ['jpg','png','gif'] :
item_dict['cover']='http:'+src
# ????
coverelement = Element('img')
coverelement.set('src', item_dict['cover'])
content.insert(0, coverelement)
elif src[:22]=="data:image/png;base64,":
img.set("src","")
else:
pass
item_dict['content'] = Xhtml.tostring(content, encoding='unicode')
#
print "++++\tGet jaq items\t++++"
print item_dict['cover']
print item_dict['created']
print item_dict['title']
print item_dict['desc']
print item_dict['link']
return item_dict
评论列表
文章目录