newsevent.py 文件源码

python
阅读 22 收藏 0 点赞 0 评论 0

项目:stock 作者: datablood 项目源码 文件源码
def guba_sina(show_content=False):
    """
       ??sina???????????
    Parameter
    --------
        show_content:?????????False

    Return
    --------
    DataFrame
        title, ????
        content, ?????show_content=True?????
        ptime, ????
        rcounts,????
    """

    from pandas.io.common import urlopen
    try:
        with urlopen(nv.GUBA_SINA_URL%(ct.P_TYPE['http'],
                                       ct.DOMAINS['sina'])) as resp:
            lines = resp.read()
        html = lxml.html.document_fromstring(lines)
        res = html.xpath('//ul[@class=\"list_05\"]/li')
        heads = html.xpath('//div[@class=\"tit_04\"]')
        data = []
        for head in heads[:1]:
            title = head.xpath('a/text()')[0]
            url = head.xpath('a/@href')[0]
            ds = [title]
            ds.extend(_guba_content(url))
            data.append(ds)
        for row in res:
            title = row.xpath('a[2]/text()')[0]
            url = row.xpath('a[2]/@href')[0]
            ds = [title]
            ds.extend(_guba_content(url))
            data.append(ds)
        df = pd.DataFrame(data, columns=nv.GUBA_SINA_COLS)
        df['rcounts'] = df['rcounts'].astype(float)
        return df if show_content is True else df.drop('content', axis=1)
    except Exception as er:
        print(str(er))
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号