textrank_fujun.py 文件源码-python代码片段

textrank_fujun.py 文件源码

python

阅读 34 收藏 0 点赞 0 评论 0

项目：YelpDataChallenge 作者: fujunswufe 项目源码文件源码

def cosine_similarity_self(A):
    similarity = np.dot(A, A.T)
    square_mag = np.diag(similarity)
    inv_square_mag = 1 / square_mag
    inv_square_mag[np.isinf(inv_square_mag)] = 0
    inv_mag = np.sqrt(inv_square_mag)
    cosine = similarity * inv_mag
    cosine = cosine.T * inv_mag
    return cosine

# document should be a list of sentences
# method = "word2vec", "lda", "tfidf"
# def extraction(document, method="rawText"):
#
#     # graph = build_graph(document, method)  # document is a list of sentences
#
#     calculated_page_rank = networkx.pagerank(graph, weight="weight")
#
#     # most important sentences in descending order of importance
#     sentences = sorted(calculated_page_rank, key=calculated_page_rank.get, reverse=False)
#
#     return sentences[0:4]