affinity_mapping.py 文件源码-python代码片段

affinity_mapping.py 文件源码

python

阅读 24 收藏 0 点赞 0 评论 0

项目：word2vec_pipeline 作者: NIHOPA 项目源码文件源码

def compute_affinity(item):

    text, f_idx, table_name, f_sql = item
    tokens = text.split()

    # Find out which tokens are defined
    valid_tokens = [w for w in tokens if w in M]

    collections.Counter(valid_tokens)
    labels = np.array(list(set(valid_tokens)))

    token_clf_index = np.array([M.word2index[w]
                                for w in labels])

    if not labels.size:
        msg = "Document has no valid tokens! This is problem."
        raise ValueError(msg)

    V = np.array([M[w] for w in labels])
    DV = cdist(V, V, metric='cosine')

    # Values are sometimes "slightly" less than zero due to rounding
    DV[DV < 0] = 0

    cluster_args = {"damping": damping}
    cluster = cluster_clf(**cluster_args)

    y_labels = cluster.fit_predict(DV)

    data = {}

    data = {
        "token_clf_index": token_clf_index,
        "y_labels": y_labels,
    }

    return f_idx, f_sql, data