def extract_bigram_feats(document, bigrams):
"""
Populate a dictionary of bigram features, reflecting the presence/absence in
the document of each of the tokens in `bigrams`. This extractor function only
considers contiguous bigrams obtained by `nltk.bigrams`.
:param document: a list of words/tokens.
:param unigrams: a list of bigrams whose presence/absence has to be
checked in `document`.
:return: a dictionary of bigram features {bigram : boolean}.
>>> bigrams = [('global', 'warming'), ('police', 'prevented'), ('love', 'you')]
>>> document = 'ice is melting due to global warming'.split()
>>> sorted(extract_bigram_feats(document, bigrams).items())
[('contains(global - warming)', True), ('contains(love - you)', False),
('contains(police - prevented)', False)]
"""
features = {}
for bigr in bigrams:
features['contains({0} - {1})'.format(bigr[0], bigr[1])] = bigr in nltk.bigrams(document)
return features
#////////////////////////////////////////////////////////////
#{ Helper Functions
#////////////////////////////////////////////////////////////
util.py 文件源码
python
阅读 33
收藏 0
点赞 0
评论 0
评论列表
文章目录