keyphrase.py 文件源码-python代码片段

keyphrase.py 文件源码

python

阅读 34 收藏 0 点赞 0 评论 0

项目：minke 作者: DistrictDataLabs 项目源码文件源码

def extract_candidate_words(sents, tags=GOODTAGS, tagged=False, **kwargs):
    """
    Extracts key words based on a list of good part of speech tags.
    If the sentences are already tokenized and tagged, pass in: tagged=True
    """
    normalizer = Normalizer(**kwargs)

    for sent in sents:
        # Tokenize and tag sentences if necessary
        if not tagged:
            sent = nltk.pos_tag(nltk.wordpunct_tokenize(sent))

        # Identify only good words by their tag
        for token, tag in sent:
            if tag in tags:
                for token in normalizer.normalize([token]):
                    yield token


##########################################################################
## Key phrase by text scoring mechanisms
##########################################################################