def main(test, base, align, project, r):
outdir = os.path.join(os.getcwd(), project)
tmp_dir = os.path.join(outdir, 'tmp.{}'.format(project))
if not os.path.exists(tmp_dir):
os.makedirs(tmp_dir)
print('temporary dir: {}'.format(tmp_dir))
basedWordVectors, testedWordVectors, aligned_test, subsetTest = \
align_vec(base, test, align, tmp_dir)
test_cols = len(testedWordVectors)
base_cols = len(basedWordVectors)
print('normalizing matrix')
baseX = preprocessing.normalize(dict_to_matrix(basedWordVectors))
testX = preprocessing.normalize(dict_to_matrix(testedWordVectors))
aligned_testX = preprocessing.normalize(dict_to_matrix(aligned_test))
subtestX = preprocessing.normalize(dict_to_matrix(subsetTest))
cca = CCA(n_components=200)
print('computing CCA')
cca.fit(subtestX, aligned_testX)
ccaed_test = trans(testX, cca.x_weights_)
ccaed_base = trans(baseX, cca.y_weights_)
output(outdir, test, ccaed_test, testedWordVectors)
output(outdir, base, ccaed_base, basedWordVectors)
评论列表
文章目录