def sk_kmeans(core): #, kval=3
solrURL = "http://localhost:8983/solr/" + core
solrInstance = Solr(solrURL)
list_of_points = []
docs = solrInstance.query_iterator(query="*:*", start=0)
for doc in docs:
list_of_points.append(Vector(doc['id'], doc))
list_of_Dicts = (point.features for point in list_of_points)
df = pd.DataFrame(list_of_Dicts)
df = df.fillna(0)
silhouettes = {}
for k in range(2, 10):
kmeans = KMeans(n_clusters=k,
init='k-means++',
max_iter=300, # k-means convergence
n_init=10, # find global minima
n_jobs=-2, # parallelize
)
labels = kmeans.fit_predict(df)
silhouettes[k] = silhouette_score(df, labels)
return str(silhouettes)
评论列表
文章目录