def generate(config, argv):
# load valid dataset index
valid_index_fp = '%s/%s.offline.index' % (config.get('DIRECTORY', 'index_pt'),
config.get('TITLE_CONTENT_CNN', 'valid_index_offline_fn'))
valid_index = DataUtil.load_vector(valid_index_fp, 'int')
valid_index = [num - 1 for num in valid_index]
# load topic btm vec
topic_btm_vec = load_topic_btm_vec(config)
# offline / online
data_name = argv[0]
dis_func_names = ["cosine",
"cityblock",
"jaccard",
"canberra",
"euclidean",
"minkowski",
"braycurtis"]
btm_dis_feature_fn = ['vote_fs_btm_dis_%s' % dis_func_name for dis_func_name in dis_func_names]
btm_dis_feature_f = [open('%s/%s.%s.csv' % (config.get('DIRECTORY', 'dataset_pt'),
fn,
data_name), 'w') for fn in btm_dis_feature_fn]
if 'offline' == data_name:
btm_tw_cw_features = load_features_from_file(config, 'fs_btm_tw_cw', data_name, valid_index)
LogUtil.log('INFO', 'load_features_from_file, len=%d' % len(btm_tw_cw_features))
for line_id in range(len(btm_tw_cw_features)):
doc_vec = btm_tw_cw_features[line_id]
for dis_id in range(len(dis_func_names)):
vec = [0.] * 1999
for topic_id in range(1999):
topic_vec = topic_btm_vec[topic_id]
if 'minkowski' == dis_func_names[dis_id]:
vec[topic_id] = eval(dis_func_names[dis_id])(doc_vec, topic_vec, 3)
else:
vec[topic_id] = eval(dis_func_names[dis_id])(doc_vec, topic_vec)
btm_dis_feature_f[dis_id].write('%s\n' % ','.join([str(num) for num in vec]))
else:
btm_vec_fp = '%s/fs_btm_tw_cw.%s.csv' % (config.get('DIRECTORY', 'dataset_pt'), data_name)
btm_vec_f = open(btm_vec_fp, 'r')
for line in btm_vec_f:
doc_vec = np.nan_to_num(parse_feature_vec(line))
for dis_id in range(len(dis_func_names)):
vec = [0.] * 1999
for topic_id in range(1999):
topic_vec = topic_btm_vec[topic_id]
if 'minkowski' == dis_func_names[dis_id]:
vec[topic_id] = eval(dis_func_names[dis_id])(doc_vec, topic_vec, 3)
else:
vec[topic_id] = eval(dis_func_names[dis_id])(doc_vec, topic_vec)
btm_dis_feature_f[dis_id].write('%s\n' % ','.join([str(num) for num in vec]))
for f in btm_dis_feature_f:
f.close()
btm_dis_features.py 文件源码
python
阅读 31
收藏 0
点赞 0
评论 0
评论列表
文章目录