zhihu.py 文件源码-python代码片段

zhihu.py 文件源码

python

阅读 35 收藏 0 点赞 0 评论 0

def process_content(jsonBody,item_dict):
    entry = json.loads(jsonBody)
    content=Xhtml.fromstring(entry['body'])
    # get author
    # print item_dict['json_url']
    try:
        author=content.xpath('//span[@class="author"]/text()')[0].strip()
    except IndexError:
        author = ''
    try:
        bio=content.xpath('//span[@class="bio"]/text()')[0].strip()
    except IndexError:
        bio=''
    item_dict['author'] = author + bio

    coverelement = Element('img')
    coverelement.set('src', item_dict['cover'])
    content.insert(0, coverelement)

    item_dict['content'] = Xhtml.tostring(content, encoding='unicode')
    #
    print "++++\tGet zhihu items\t++++"
    print item_dict['cover']
    print item_dict['created']
    print item_dict['title']
    print item_dict['author']
    print item_dict['link']
    return item_dict