snapshot.py 文件源码-python代码片段

snapshot.py 文件源码

python

阅读 46 收藏 0 点赞 0 评论 0

def fcluster(df, Z, n_groups, n_clusters):
    """
    """
    # create flat cluster, i.e. maximal number of clusters...
    T = hac.fcluster(Z, criterion='maxclust', depth=2, t=n_clusters)

    # add cluster id to original dataframe
    df['cluster_id'] = np.NAN
    # group is either days (1-365) or weeks (1-52)

    #for d in df.index.get_level_values('group').unique():
    for g in range(1, n_groups+1):
        # T[d-1] because df.index is e.g. 1-365 (d) and T= is 0...364
        df.ix[g, 'cluster_id'] = T[g-1]
    # add the cluster id to the index
    df.set_index(['cluster_id'], append=True, inplace=True)
    # set cluster id as first index level for easier looping through cluster_ids
    df.index = df.index.swaplevel(0, 'cluster_id')
    # just to have datetime at the last level of the multiindex df
    df.index = df.index.swaplevel('datetime', 'group')

    return df