wikify.py 文件源码

python
阅读 26 收藏 0 点赞 0 评论 0

项目:cdata 作者: cnschema 项目源码 文件源码
def wikidata_search(query, lang="zh", output_lang="en",  searchtype="item",  max_result=1):
    """
        wikification: search wikipedia pages for the given query
        https://www.wikidata.org/w/api.php?action=help&modules=wbsearchentities

        result format
        {
            searchinfo: - {
            search: "birthday"
            },
            search: - [
            - {
                repository: "",
                id: "P3150",
                concepturi: "http://www.wikidata.org/entity/P3150",
                url: "//www.wikidata.org/wiki/Property:P3150",
                title: "Property:P3150",
                pageid: 28754653,
                datatype: "wikibase-item",
                label: "birthday",
                description: "item for day and month on which the subject was born. Used when full "date of birth" (P569) isn't known.",
                match: - {
                type: "label",
                language: "en",
                text: "birthday"
            }
        }
    """
    query = any2unicode(query)
    params = {
        "action":"wbsearchentities",
        "search": query,
        "format":"json",
        "language":lang,
        "uselang":output_lang,
        "type":searchtype
    }
    urlBase = "https://www.wikidata.org/w/api.php?"
    url = urlBase + urllib.urlencode(any2utf8(params))
    #logging.info(url)
    r = requests.get(url)
    results = json.loads(r.content).get("search",[])
    #logging.info(items)

    property_list = [
        {"name":"name", "alternateName":["label"]},
        {"name":"url", "alternateName":["concepturi"]},
        {"name":"identifier", "alternateName":["id"]},
        {"name":"description"},
    ]
    items = []
    ret = {"query": query, "itemList":items}
    for result in results[0:max_result]:
        #logging.info(result)
        item = json_dict_copy(result, property_list)
        items.append(item)
    return ret
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号