def fcluster(df, Z, n_groups, n_clusters):
"""
"""
# create flat cluster, i.e. maximal number of clusters...
T = hac.fcluster(Z, criterion='maxclust', depth=2, t=n_clusters)
# add cluster id to original dataframe
df['cluster_id'] = np.NAN
# group is either days (1-365) or weeks (1-52)
#for d in df.index.get_level_values('group').unique():
for g in range(1, n_groups+1):
# T[d-1] because df.index is e.g. 1-365 (d) and T= is 0...364
df.ix[g, 'cluster_id'] = T[g-1]
# add the cluster id to the index
df.set_index(['cluster_id'], append=True, inplace=True)
# set cluster id as first index level for easier looping through cluster_ids
df.index = df.index.swaplevel(0, 'cluster_id')
# just to have datetime at the last level of the multiindex df
df.index = df.index.swaplevel('datetime', 'group')
return df
评论列表
文章目录