hu2004.py 文件源码-python代码片段

def __init__(self,
                 stopwords=NLTKStopwords(),
                 min_support=MIN_SUPPORT,
                 max_words=MAX_WORDS,
                 min_psupport=MIN_PSUPPORT,
                 min_compact_support=MIN_COMPACT_SUPPORT,
                 max_compact_distance=MAX_COMPACT_DISTANCE,
                 adj_key=StemKey(),
                 adj_win_size=ADJ_NEARBY_DISTANCE ,
                 match=85,
                 compactness=True,
                 redundancy=True,
                 infrequent=True):
        """
        Model to extract aspects using the algorithm by Hu et al. (2004)

            stopwords             : iterable of strings to use as stopwords
            min_support           : int, minimum support of an item set
                                    (positive: percentage, negative: absolute
                                    number of transactions)
            min_compact_support   : int minimum number of compact sentences
                                    of an aspect
            max_words             : int, maximum number of word on each aspect,
            max_compact_distance  : int, maximum distance between consecutive
                                    words in an aspect
            adj_win_size          : int, maximum distance to look for
                                    adjectives near an aspect on a sentence
            min_psupport          : int, minimum pure support of an aspect
            adj_key               : lambda function to extract adjectives
            match                 : int, minimum similarity ratio (0-100] for
                                    matching (use <100 for fuzzy) default=
            compactness           : boolean, True to run "compactness pruning"
            redundancy            : boolean, True to run "redundancy pruning"
            infrequent            : boolean, True to also extract infrequent
                                    aspects
        """
        self.params = {"stopwords": stopwords,
                       "min_support": min_support,
                       "max_words": max_words,
                       "min_psupport": min_psupport,
                       "min_compact_support": min_compact_support,
                       "max_compact_distance": max_compact_distance,
                       "adj_key": adj_key,
                       "adj_win_size": adj_win_size,
                       "match": match,
                       "compactness": compactness,
                       "redundancy": redundancy,
                       "infrequent": infrequent}