def get_articles_by_distance(article, corpus): #article is the row from the articles df
article = corpus[article['index'],:]
iterable = ((x, cosine_distance(article, corpus[x,:])) for x in range(corpus.shape[0]))
articles_by_distance = np.fromiter(iterable, dtype='uint,float', count=corpus.shape[0])
articles_by_distance = pd.DataFrame(articles_by_distance).rename(columns={'f1':'cosine_distance', 'f0':'index'}).sort_values(by='cosine_distance')
return articles_by_distance[0:25]
评论列表
文章目录