BiCCA.py 文件源码-python代码片段

BiCCA.py 文件源码

python

阅读 90 收藏 0 点赞 0 评论 0

项目：WordEmbedding 作者: ziliwang 项目源码文件源码

def main(test, base, align, project, r):
    outdir = os.path.join(os.getcwd(), project)
    tmp_dir = os.path.join(outdir, 'tmp.{}'.format(project))
    if not os.path.exists(tmp_dir):
        os.makedirs(tmp_dir)
    print('temporary dir: {}'.format(tmp_dir))
    basedWordVectors, testedWordVectors, aligned_test, subsetTest = \
        align_vec(base, test, align, tmp_dir)
    test_cols = len(testedWordVectors)
    base_cols = len(basedWordVectors)
    print('normalizing matrix')
    baseX = preprocessing.normalize(dict_to_matrix(basedWordVectors))
    testX = preprocessing.normalize(dict_to_matrix(testedWordVectors))
    aligned_testX = preprocessing.normalize(dict_to_matrix(aligned_test))
    subtestX = preprocessing.normalize(dict_to_matrix(subsetTest))
    cca = CCA(n_components=200)
    print('computing CCA')
    cca.fit(subtestX, aligned_testX)
    ccaed_test = trans(testX, cca.x_weights_)
    ccaed_base = trans(baseX, cca.y_weights_)
    output(outdir, test, ccaed_test, testedWordVectors)
    output(outdir, base, ccaed_base, basedWordVectors)