def visualize_tweets(W, topic_number, color):
'''
INPUT
- W matrix of observations
- topic_number - this is the number of the topic to be checked
- color - this is the color to be used in creating the scatterplot
OUTPUT
- a scatter plot of the relative location of the different topics
from each other in a flattened space using multidimensional scaling
Returns none
'''
# mds = MDS(n_jobs=-1)
topic_list = np.apply_along_axis(np.argmax, 1, W)
Wsubset = W[topic_list == topic_number]
pca = PCA(n_components=2)
pca = PCA(n_components=2)
hflat = pca.fit_transform(Wsubset)
plt.scatter(hflat[:, 0], hflat[:, 1], color=color, alpha=.1)
plt.title('these are the {} tweets in topic # {}'.format(Wsubset.shape[0],
topic_number+1))
# plt.show()
评论列表
文章目录