def cluster():
df_train = pd.read_csv(Train_CSV_Path,header=0)
destination = []
for i in range(len(df_train)):
destination.append(list(eval(df_train['DESTINATION'][i])))
destination = np.array(destination)
bw = estimate_bandwidth(
destination,
quantile = 0.1,
n_samples = 1000
)
ms = MeanShift(
bandwidth = bw,
bin_seeding = True,
min_bin_freq = 5
)
ms.fit(destination)
cluster_centers = ms.cluster_centers_
with h5py.File('cluster.h5','w') as f:
f.create_dataset('cluster',data = cluster_centers)
return cluster_centers
评论列表
文章目录