def kmeans(class_num):
"""
kmeans ??
:param class_num: ????
:return:class_list[[??1???2],[??1???2]]
"""
class_list=list();
sentences_words,sentences=loadFile()
vectorizer = CountVectorizer() # ??????????????????????a[i][j] ??j??i???????
transformer = TfidfTransformer() # ??????????tf-idf??
# ???fit_transform???tf-idf????fit_transform??????????
#?????words_list ???["? ? ?? ???","?? ??"] ?????????????list
tfidf = transformer.fit_transform(vectorizer.fit_transform(sentences_words))
#weight ???shape=[????????] ???????
weight = tfidf.toarray() # ?tf-idf?????????a[i][j]??j??i?????tf-idf??
clf = KMeans(n_clusters=class_num)
s = clf.fit(weight)
for i in range(class_num):
class_list.append(list())
print clf.labels_
for i in range(len(clf.labels_)):#clf.labels_ ??????????[1,3,2,5,0,3,5,4,1] ???????????
class_label=clf.labels_[i]
class_list[class_label].append(sentences[i])
#print "#######?"+str(clf.labels_[i])+"?"+words_list[i]
return class_list;
评论列表
文章目录