my_system.py 文件源码-python代码片段

my_system.py 文件源码
python
阅读 22 收藏 0 点赞 0 评论 0
def getCandidatesForLemma(lemma, min_size, max_size):
    hits=[]
    for match in ["phrase", "conjunct"]:
        url="http://lotus.lodlaundromat.org/retrieve?size=" + str(max_size) + "&match=" + match + "&rank=psf&noblank=true&" + urllib.parse.urlencode({"string": lemma, "predicate": "label", "subject": "\"http://dbpedia.org/resource\""})
        r = requests.get(url=url)
        content = r.json()

        these_hits=content["hits"]
        hits=hits + these_hits
        if content["numhits"]>=min_size or len(lemma.split(' '))==1:
            break

    subjects={}
    for hit in hits:
        lev_sim=Levenshtein.ratio(hit["string"].lower(), lemma.lower())
        if "Disambiguation" not in hit["subject"].lower() and "Category" not in hit["subject"]:
            if hit["subject"] not in subjects:
                #subjects[hit["subject"]]=hit["length"]*len(lemma.split())
                subjects[hit["subject"]]={"ss": lev_sim, "count": 1}
            else:
                subjects[hit["subject"]]["ss"]=max(subjects[hit["subject"]]["ss"], lev_sim)
                subjects[hit["subject"]]["count"]+=1
    return subjects