def __init__(self,
stopwords=NLTKStopwords(),
min_support=MIN_SUPPORT,
max_words=MAX_WORDS,
min_psupport=MIN_PSUPPORT,
min_compact_support=MIN_COMPACT_SUPPORT,
max_compact_distance=MAX_COMPACT_DISTANCE,
adj_key=StemKey(),
adj_win_size=ADJ_NEARBY_DISTANCE ,
match=85,
compactness=True,
redundancy=True,
infrequent=True):
"""
Model to extract aspects using the algorithm by Hu et al. (2004)
stopwords : iterable of strings to use as stopwords
min_support : int, minimum support of an item set
(positive: percentage, negative: absolute
number of transactions)
min_compact_support : int minimum number of compact sentences
of an aspect
max_words : int, maximum number of word on each aspect,
max_compact_distance : int, maximum distance between consecutive
words in an aspect
adj_win_size : int, maximum distance to look for
adjectives near an aspect on a sentence
min_psupport : int, minimum pure support of an aspect
adj_key : lambda function to extract adjectives
match : int, minimum similarity ratio (0-100] for
matching (use <100 for fuzzy) default=
compactness : boolean, True to run "compactness pruning"
redundancy : boolean, True to run "redundancy pruning"
infrequent : boolean, True to also extract infrequent
aspects
"""
self.params = {"stopwords": stopwords,
"min_support": min_support,
"max_words": max_words,
"min_psupport": min_psupport,
"min_compact_support": min_compact_support,
"max_compact_distance": max_compact_distance,
"adj_key": adj_key,
"adj_win_size": adj_win_size,
"match": match,
"compactness": compactness,
"redundancy": redundancy,
"infrequent": infrequent}
评论列表
文章目录