def __init__(self, language='en', window_width=2, collapse_fes=True, target_size=None):
""" Initializes the extractor.
:param language: The language of the sentences that will be used
:param window_width: how many tokens to look before and after a each
token when building its features.
:param collapse_fes: Whether to collapse FEs to a single token
or to keep them split.
"""
self.language = language
self.tagger = TTPosTagger(language)
self.window_width = window_width
self.collapse_fes = collapse_fes
self.unk_feature = 'UNK'
self.vectorizer = DictVectorizer()
self.target_size = target_size
self.reducer = TruncatedSVD(target_size) if target_size else None
self.vocabulary = set()
self.label_index = {}
self.lu_index = {}
self.stopwords = set(w.lower() for w in StopWords().words(language))
self.start()
评论列表
文章目录