def extract_unigram_feats(document, unigrams, handle_negation=False):
"""
Populate a dictionary of unigram features, reflecting the presence/absence in
the document of each of the tokens in `unigrams`.
:param document: a list of words/tokens.
:param unigrams: a list of words/tokens whose presence/absence has to be
checked in `document`.
:param handle_negation: if `handle_negation == True` apply `mark_negation`
method to `document` before checking for unigram presence/absence.
:return: a dictionary of unigram features {unigram : boolean}.
>>> words = ['ice', 'police', 'riot']
>>> document = 'ice is melting due to global warming'.split()
>>> sorted(extract_unigram_feats(document, words).items())
[('contains(ice)', True), ('contains(police)', False), ('contains(riot)', False)]
"""
features = {}
if handle_negation:
document = mark_negation(document)
for word in unigrams:
features['contains({0})'.format(word)] = word in set(document)
return features
评论列表
文章目录