def cluster_service(path, service, cluster_size, prev_metadata=None):
filename = os.path.join(path, service["preprocessed_filename"])
df = pd.read_csv(filename, sep="\t", index_col='time', parse_dates=True)
initial_idx = None
if prev_metadata:
initial_idx = get_initial_clustering(service["name"], prev_metadata, df.columns)
# adjust cluster_size if an initial assigment has been found
if initial_idx is not None:
cluster_size = len(np.unique(initial_idx))
prefix = "%s/%s-cluster-%d" % (path, service["name"], cluster_size)
if os.path.exists(prefix + "_1.png"):
print("skip " + prefix)
return (None, None)
cluster_metrics, score, filenames = do_kshape(prefix, df, cluster_size, initial_idx)
if cluster_size < 2:
# no silhouette_score for cluster size 1
return (None, None)
print("silhouette_score: %f" % score)
# protect the write access to the metadata file
metadata_lock.acquire()
with metadata.update(path) as data:
for srv in data["services"]:
if srv["name"] == service["name"]:
if "clusters" not in srv:
srv["clusters"] = {}
d = dict(silhouette_score=score, filenames=filenames, metrics=cluster_metrics)
srv["clusters"][cluster_size] = d
metadata_lock.release()
return (service["name"], cluster_size)
评论列表
文章目录