def fit_nmf(self, df):
'''
Function to run NMF clustering on dataframe
INPUT:
df: pandas Dataframe containing 'lemmatized_text' column for TF-IDF
'''
self.optimize_nmf(df)
self.nmf = NMF(n_components=self.optimum_topics, alpha=self.nmf_alpha,
l1_ratio=self.nmf_l1_ratio, random_state=self.random_state).fit(self.tfidf_matrix)
self.W_matrix = self.nmf.transform(self.tfidf_matrix)
sums = self.W_matrix.sum(axis=1)
self.W_pct = self.W_matrix / sums[:, None]
self.labels = self.W_pct >= 0.20
print "Reconstruction Error: {}".format(self.nmf.reconstruction_err_)
评论列表
文章目录