def knowsim_experiment(scope, scope_name, type_list, count, newLabels, tau=1, kNeighbors=10, label_num = 5):
split_path = 'data/local/split/' + scope_name + '/'
with open('data/local/' + scope_name + '.dmp') as f:
hin = pk.load(f)
repeats = 50
tf_param = {'word': True, 'entity': False, 'we_weight': 0.1}
X_word, newIds, entityIds = GraphGenerator.getTFVectorX(hin, tf_param)
n = X_word.shape[0]
knowsim = sparse.lil_matrix((n, n))
for t in type_list:
tf_param = {'word': True, 'entity': True, 'we_weight': 0.1}
X_typed, newIds, entityIds = GraphGenerator.getTFVectorX(hin, tf_param, t)
# make similarity graph
cosX = cosine_similarity(X_typed)
graph = sparse.lil_matrix((n, n))
for i in range(n):
for j in np.argpartition(cosX[i], -kNeighbors)[-kNeighbors:]:
if j == i:
continue
graph[i, j] = cosX[i, j] # np.exp(- (1 - cosX[i, j]) / 0.03) #
graph[j, i] = cosX[i, j] # np.exp(- (1 - cosX[i, j]) / 0.03) #
# calculate laplacian scores
row_sum = graph.sum(axis=1)
laplacian_score = generate_laplacian_score(row_sum, X_word, kNeighbors)
# add meta-path-based similarity to the knowsim
knowsim = knowsim + np.exp(-tau * laplacian_score) * graph
knowsim = knowsim.tocsr()
print 'running lp'
lp_param = {'alpha':0.98, 'normalization_factor':5}
ssl = SSLClassifier(knowsim, newLabels, scope, lp_param, repeatTimes=50, trainNumbers=label_num, classCount=count)
ssl.repeatedFixedExperimentwithNewIds(pathPrefix=split_path + 'lb' + str(label_num).zfill(3) + '_', newIds=newIds)
return ssl.get_mean()
评论列表
文章目录