def run(count_path, out_path, smooth=0, cds=True, normalize=False, neg=1):
counts = create_representation("Explicit", count_path, normalize=False)
old_mat = counts.m
index = counts.wi
smooth = old_mat.sum() * smooth
# getting marginal probs
row_probs = old_mat.sum(1) + smooth
col_probs = old_mat.sum(0) + smooth
if cds:
col_probs = np.power(col_probs, 0.75)
row_probs = row_probs / row_probs.sum()
col_probs = col_probs / col_probs.sum()
# building PPMI matrix
ppmi_mat = make_ppmi_mat(old_mat, row_probs, col_probs, smooth, neg=neg, normalize=normalize)
import pyximport
pyximport.install(setup_args={"include_dirs": np.get_include()})
from representations import sparse_io
sparse_io.export_mat_eff(ppmi_mat.row, ppmi_mat.col, ppmi_mat.data, out_path + ".bin")
util.write_pickle(index, out_path + "-index.pkl")
评论列表
文章目录