def plot_cluster_hist(X_featurized, labels, num_clusters):
'''
Plot histograms of users and corresponding clusters.
Parameters
----------
X_featurized : array-like
Featurized Data
labels: array-like
Predicted cluster to data.
num_clusters: int
Number of clusters.
Returns
-------
Plot : matplotlib.lines.Line2D
Figure.
'''
fig = plt.figure()
ax_ = fig.add_subplot(1,1,1)
# Set colors.
# Create DataFrame with features and labels.
# Note sklearn cluster naming starts at zero, so adding 1 is convenient.
X_featurized['label'] = labels + 1
# Parameters for plotting.
params_ = {'ax': ax_ , 'bins': np.arange(num_clusters +2) - 0.5}
# Plot cluster and corresponding color.
X_featurized.label.plot(kind = 'hist', **params_)
# Format figure.
ax_.set_title("Number of users in each cluster.", fontsize =14, fontweight='bold')
ax_.set_xticks(range(1, num_clusters +1))
ax_.set_xlim([0, num_clusters + 1])
ax_.set_ylim([0,1200])
ax_.set_xlabel('Cluster')
ax_.set_ylabel("Number of users")
# plt.savefig('cluster_hist')
plt.show()
plots.py 文件源码
python
阅读 24
收藏 0
点赞 0
评论 0
评论列表
文章目录