cluster.py 文件源码

python
阅读 34 收藏 0 点赞 0 评论 0

项目:rca-evaluation 作者: sieve-microservices 项目源码 文件源码
def do_kshape(name_prefix, df, cluster_size, initial_clustering=None):
    columns = df.columns
    matrix = []
    for c in columns:
        matrix.append(zscore(df[c]))
    res = kshape(matrix, cluster_size, initial_clustering)
    labels, score = silhouette_score(np.array(matrix), res)

    # keep a reference of which metrics are in each cluster
    cluster_metrics = defaultdict(list)
    # we keep it in a dict: cluster_metrics[<cluster_nr>]{<metric_a>, <metric_b>}
    for i, col in enumerate(columns):
        cluster_metrics[int(labels[i])].append(col)

    filenames = []
    for i, (centroid, assigned_series) in enumerate(res):
        d = {}
        for serie in assigned_series:
            d[columns[serie]] = pd.Series(matrix[serie], index=df.index)
        d["centroid"] = pd.Series(centroid, index=df.index)
        df2 = pd.DataFrame(d)
        figure = df2.plot()
        figure.legend(loc='center left', bbox_to_anchor=(1, 0.5))
        name = "%s_%d" % (name_prefix, (i+1))
        filename = name + ".tsv.gz"
        print(filename)
        df2.to_csv(filename, sep="\t", compression='gzip')
        filenames.append(os.path.basename(filename))
        graphs.write(df2, name + ".png")
    return cluster_metrics, score, filenames
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号