def _train(self, training_frame):
hashing_vectorizer = HashingVectorizer(analyzer="word", n_features=(2 ** 30),
ngram_range=(1, 3), stop_words="english")
training_hashing_matrix = hashing_vectorizer.fit_transform(training_frame["description"])
self.log.info("starting kernel")
start = time()
cosine_similarities = cosine_similarity(training_hashing_matrix, training_hashing_matrix)
self.log.info("finished kernel. this took {} s".format(time() - start))
self.log.info("starting adding to redis database")
start = time()
i = 0
l = len(training_frame.index)
print_progress(i, l, prefix="Progress:", suffix="Complete", bar_length=50)
for idx, row in training_frame.iterrows():
similar_indices = cosine_similarities[idx].argsort()[:-100:-1]
similar_items = [(cosine_similarities[idx][i], training_frame['id'][i]) for i in similar_indices]
flattened = sum(similar_items[1:], ())
self._r.zadd(self.SIMKEY % row['id'], *flattened)
i += 1
print_progress(i, l, prefix="Progress:", suffix="Complete", bar_length=50)
self.log.info("finished adding {} rows to redis database. this took {} s".format(i, time() - start))
python类cosine_similarity()的实例源码
def compar_pic(path1,path2):
global net
#??????
X=read_image(path1)
test_num=np.shape(X)[0]
#X ?? ?????
out = net.forward_all(data = X)
#fc7??????,??????
feature1 = np.float64(out['fc7'])
feature1=np.reshape(feature1,(test_num,4096))
#??????
X=read_image(path2)
#X ?? ?????
out = net.forward_all(data=X)
#fc7??????,??????
feature2 = np.float64(out['fc7'])
feature2=np.reshape(feature2,(test_num,4096))
#????????cos?,??????????
predicts=pw.cosine_similarity(feature1, feature2)
return predicts
def compar_pic(path1,path2):
global net
#??????
X=read_image(path1)
test_num=np.shape(X)[0]
#X ?? ?????
out = net.forward_all(data = X)
#fc7??????,??????
feature1 = np.float64(out['fc7'])
feature1=np.reshape(feature1,(test_num,4096))
#np.savetxt('feature1.txt', feature1, delimiter=',')
#??????
X=read_image(path2)
#X ?? ?????
out = net.forward_all(data=X)
#fc7??????,??????
feature2 = np.float64(out['fc7'])
feature2=np.reshape(feature2,(test_num,4096))
#np.savetxt('feature2.txt', feature2, delimiter=',')
#????????cos?,??????????
predicts=pw.cosine_similarity(feature1, feature2)
return predicts
def cos_sim(ind1,ind2=1999):
view1 = np.load("test_v1.npy")[0:ind1]
view2 = np.load("test_v2.npy")[0:ind2]
#val = []
MAP=0
for i,j in enumerate(view1):
val=[]
AP=0
for x in view2:
val.append(cosine_similarity(j,x)[0].tolist())
#val=val[0].tolist()
#print val[0].tolist()
val=[(q,p)for p,q in enumerate(val)]
#print val
val.sort()
val.reverse()
t = [w[1]for w in val[0:7]]
for x,y in enumerate(t):
if y in range(i,i+5):
AP+=1/(x+1)
print(t)
print(AP)
MAP+=AP
print('MAP is : ',MAP/ind1)
def search_query(self, query):
"""
search for query and find most related document for query
http://webhome.cs.uvic.ca/~thomo/svd.pdf
"""
def topN(similarities, N=5):
return np.argsort(similarities)[::-1][:N]
words = query.split(" ")
tokens_ids = []
for word in words:
try:
token_id = self.tokens_mapping[word]
except KeyError:
print("Token not found in tokens mapping dict")
else:
tokens_ids.append(token_id)
query_representation = np.mean(self.tokens_representation[tokens_ids,:], axis=0)
similarities = cosine_similarity(query_representation, self.documents_representation)
topN_documents =[self.documents_mapping[index] for index in topN(similarities[0])]
return topN_documents
def emit(id):
if not id in hctx:
return (id, {})
hvector, candidates = v.transform(hctx[id]), Counter()
for hypernym in hctx[id]:
hsenses = Counter({hid: sim(v.transform(Counter(synsets[hid])), hvector).item(0) for hid in index[hypernym]})
for hid, cosine in hsenses.most_common(1):
if cosine > 0:
candidates[(hypernym, hid)] = cosine
matches = [(hypernym, hid, cosine) for (hypernym, hid), cosine in candidates.most_common(len(candidates) if args.k == 0 else args.k) if hypernym not in synsets[id]]
return (id, matches)
def generateCosineNeighborGraph(hin,kNeighbors=10,tf_param={'word':True, 'entity':False, 'we_weight':1}):
X, newIds, entIds = GraphGenerator.getTFVectorX(hin,param=tf_param)
cosX = cosine_similarity(X)
#return sparse.csc_matrix(X.dot(X.transpose())),newIds
n = cosX.shape[0]
graph = np.zeros((n,n))
tic = time.time()
for i in range(n):
for j in np.argpartition(-cosX[i],kNeighbors)[:kNeighbors]:
if j == i:
continue
#graph[i, j] += cosX[i, j]
#graph[j, i] += cosX[i, j]
graph[i, j] += 1
graph[j, i] += 1
toc = time.time() - tic
return sparse.csc_matrix(graph), newIds
def generateCosineNeighborGraphfromX(X, kNeighbors=10):
cosX = cosine_similarity(X)
# return sparse.csc_matrix(X.dot(X.transpose())),newIds
#print cosX.shape
n = cosX.shape[0]
graph = np.zeros((n, n))
tic = time.time()
for i in range(n):
for j in np.argpartition(-cosX[i], kNeighbors)[:kNeighbors]:
if j == i:
continue
# graph[i, j] += cosX[i, j]
# graph[j, i] += cosX[i, j]
graph[i, j] += 1
graph[j, i] += 1
toc = time.time() - tic
#print 'graph generation done in %f seconds.' % toc
return sparse.csc_matrix(graph)
def generate_laplacian_score_scalar(X_ent, X_word, kNeighbors):
# Generate cosine similarity graph
n = X_ent.shape[0]
cosX = cosine_similarity(X_word)
graph = np.zeros((n, n))
for i in range(n):
for j in np.argpartition(cosX[i], -kNeighbors)[-kNeighbors:]:
if j == i:
continue
graph[i, j] = cosX[i, j]
graph[j, i] = cosX[i, j]
D = sparse.diags([graph.sum(axis=0)], [0])
L = D - graph
f_tilde = X_ent - (float(X_ent.transpose() * D * np.ones((n, 1))) / D.sum().sum()) * np.ones((n, 1))
score = float(f_tilde.transpose() * L * f_tilde) / float(f_tilde.transpose() * D * f_tilde + 1e-10)
laplacian_score = score
return laplacian_score
def compar_pic(path1,path2):
global net
#??????
X=read_image(path1)
test_num=np.shape(X)[0]
#X ?? ?????
out = net.forward_all(blobs=['pool5'],data = X)
# print out.keys()
feature1 = np.float64(out["pool5"])
feature1=np.reshape(feature1,(test_num,25088))
#??????
X=read_image(path2)
#X ?? ?????
out = net.forward_all(blobs=['pool5'],data=X)
feature2 = np.float64(out['pool5'])
feature2=np.reshape(feature2,(test_num,25088))
#????????cos?,??????????
predicts=pw.cosine_similarity(feature1, feature2)
return predicts
def compar_pic(path1,path2):
global net
#??????
X=read_image(path1)
test_num=np.shape(X)[0]
#X ?? ?????
out = net.forward_all(blobs=['pool5'],data = X)
# print out.keys()
feature1 = np.float64(out["pool5"])
feature1=np.reshape(feature1,(test_num,25088))
#??????
X=read_image(path2)
#X ?? ?????
out = net.forward_all(blobs=['pool5'],data=X)
#fc7??????,??????
feature2 = np.float64(out['pool5'])
feature2=np.reshape(feature2,(test_num,25088))
#????????cos?,??????????
predicts=pw.cosine_similarity(feature1, feature2)
return predicts
def readFace(feature):
r=redis.Redis("localhost")
keys= r.keys("*")
for key in keys :
db_feature =pickle.loads( r.lindex(key,0) )
comple=pw.cosine_similarity(db_feature,feature)
if(comple>0.46) : #?????
return key
for key in keys :
if(r.llen(key))>1 :
db_feature =pickle.loads( r.lindex(key,1) )
comple=pw.cosine_similarity(db_feature,feature)
if(comple>0.46) : #?????
return key
return 'unknow'
#????????
def get_ranked_response(model, test_post_seg, candidate_list, similar_post_dic):
test_post_seg_vec = get_sentence_vec(model, test_post_seg, candidate_list, similar_post_dic)
for c in candidate_list:
c_p_vec = get_sentence_vec(model, c[1], candidate_list, similar_post_dic)
c_r_vec = get_sentence_vec(model, c[4], candidate_list, similar_post_dic)
c[2] = c_p_vec
c[5] = c_r_vec
s2 = float(cosine_similarity(c_p_vec, c_r_vec))
s3 = float(cosine_similarity(test_post_seg_vec, c_r_vec))
c[7] = s2
c[8] = s3
# rank_score = 1000*c[6]*c[7]*c[8]
rank_score = c[6]*0.5+c[7]*1.5+c[8]*2
c[9] = rank_score
rank_candidate = sorted(candidate_list, key = lambda l: l[-1])
return rank_candidate
def __init__(self, dataset, save_path_queries=None, **kwargs):
super(UncertaintyDenseSampling, self).__init__(dataset, **kwargs)
self.model = kwargs.pop('model', None)
if self.model is None:
raise TypeError(
"__init__() missing required keyword-only argument: 'model'"
)
self.save_path_queries = save_path_queries
self.save_path_queries_hdf5 = os.path.join(self.save_path_queries,
os.path.normpath(self.save_path_queries) + ".hdf5")
if os.path.isfile(self.save_path_queries_hdf5):
print "This file already exists %s" % self.save_path_queries_hdf5
quit(0)
self.model.train(self.dataset, first_time=True)
unlabeled_train = self.dataset.get_unlabeled_train_data()["features"]
print "Computing cosine similarities of", unlabeled_train.shape, "by", unlabeled_train.shape
self.similarity_matrix = cosine_similarity(unlabeled_train, unlabeled_train)
def disambiguate_word(self, sentence, index):
super().disambiguate_word(sentence, index)
lemmas = self.lemmatize(sentence)
if index not in lemmas:
return
svector = self.sensegram(lemmas.values()) # sentence vector
if svector is None:
return
# map synset identifiers to the cosine similarity value
candidates = Counter({id: sim(svector, self.dense[id]).item(0)
for id in self.inventory.index[lemmas[index]]
if self.dense[id] is not None})
if not candidates:
return
for id, _ in candidates.most_common(1):
return id
def compar_pic(path1,path2):
global net
#??????
X=read_image(path1)
test_num=np.shape(X)[0]
#X ?? ?????
out = net.forward_all(data = X)
#fc7??????,??????
feature1 = np.float64(out['fc7'])
feature1=np.reshape(feature1,(test_num,4096))
#??????
X=read_image(path2)
#X ?? ?????
out = net.forward_all(data=X)
#fc7??????,??????
feature2 = np.float64(out['fc7'])
feature2=np.reshape(feature2,(test_num,4096))
#????????cos?,??????????
predicts=pw.cosine_similarity(feature1, feature2)
return predicts
def compar_pic(path1,path2):
global net
#??????
X=read_image(path1)
test_num=np.shape(X)[0]
#X ?? ?????
out = net.forward_all(data = X)
#fc7??????,??????
feature1 = np.float64(out['fc7'])
feature1=np.reshape(feature1,(test_num,4096))
#np.savetxt('feature1.txt', feature1, delimiter=',')
#??????
X=read_image(path2)
#X ?? ?????
out = net.forward_all(data=X)
#fc7??????,??????
feature2 = np.float64(out['fc7'])
feature2=np.reshape(feature2,(test_num,4096))
#np.savetxt('feature2.txt', feature2, delimiter=',')
#????????cos?,??????????
predicts=pw.cosine_similarity(feature1, feature2)
return predicts
def test_cosine_similarity():
# Test the cosine_similarity.
rng = np.random.RandomState(0)
X = rng.random_sample((5, 4))
Y = rng.random_sample((3, 4))
Xcsr = csr_matrix(X)
Ycsr = csr_matrix(Y)
for X_, Y_ in ((X, None), (X, Y),
(Xcsr, None), (Xcsr, Ycsr)):
# Test that the cosine is kernel is equal to a linear kernel when data
# has been previously normalized by L2-norm.
K1 = pairwise_kernels(X_, Y=Y_, metric="cosine")
X_ = normalize(X_)
if Y_ is not None:
Y_ = normalize(Y_)
K2 = pairwise_kernels(X_, Y=Y_, metric="linear")
assert_array_almost_equal(K1, K2)
feature_selection_using_cmeans.py 文件源码
项目:FCM-Feature-Selection
作者: achyudhk
项目源码
文件源码
阅读 20
收藏 0
点赞 0
评论 0
def selecttop(CF, k):
"""
Finds cosine similarity between SC and Wi and returns index of top features
"""
NCF = np.zeros((CF.shape[1],CF.shape[1]))
for i in range(CF.shape[1]):
for j in range(CF.shape[1]):
if (CF[i,j]+CF[j,j]-CF[i,j]) !=0:
NCF[i,j]=CF[i,j]/(CF[i,j]+CF[j,j]-CF[i,j])
else:
NCF[i,j]=0
SC = np.zeros(CF.shape[1])
for i in range(CF.shape[1]):
SC[i] = np.sum(NCF[i,:])
print(np.isnan(SC).any())
print(np.isnan(CF).any())
cosim = cosine_similarity(SC,CF)
return (-cosim).argsort()[0][:int(k*CF.shape[1])]
#Loading CF matrix for each cluster
def cosine_sim(x, y):
try:
d = cosine_similarity(x.reshape(1,-1), y.reshape(1,-1))
d = d[0][0]
except:
d = 0.0
return d
def setKernel(self, kernel_name, kernel_param):
self.kernel_name = kernel_name
if kernel_name == 'rbf':
def rbf(x1,x2):
return rbf_kernel(x1,x2, gamma=kernel_param) # from sklearn
self.internal_kernel_func = rbf
else:
def dot_product(x1,x2):
return cosine_similarity(x1,x2) # from sklearn - a normalized version of dot product #np.dot(x1,x2.T)
self.internal_kernel_func = dot_product
def closest_docs_by_index(corpus_vectors, query_vectors, n_docs):
docs = []
sim = pw.cosine_similarity(corpus_vectors, query_vectors)
order = np.argsort(sim, axis=0)[::-1]
for i in range(len(query_vectors)):
docs.append(order[:, i][0:n_docs])
return np.array(docs)
def compare_pic(self,feature1,feature2):
predicts=pw.pairwise_distances(feature2, feature1,'cosine')
#predicts=pw.cosine_similarity(feature1, feature2)
return predicts
def compare_pic(self,feature1,feature2):
predicts=pw.pairwise_distances(feature2, feature1,'cosine')
#predicts=pw.cosine_similarity(feature1, feature2)
return predicts
def compare_pic(self,feature1,feature2):
predicts=pw.pairwise_distances(feature2, feature1,'cosine')
#predicts=pw.cosine_similarity(feature1, feature2)
return predicts
def compare_pic(self,feature1,feature2):
predicts=pw.pairwise_distances(feature2, feature1,'cosine')
#predicts=pw.cosine_similarity(feature1, feature2)
return predicts
def compare_pic(self,feature1,feature2):
predicts=pw.pairwise_distances(feature2, feature1,'cosine')
#predicts=pw.cosine_similarity(feature1, feature2)
return predicts
def compare_pic(self,feature1,feature2):
predicts=pw.pairwise_distances(feature2, feature1,'cosine')
#predicts=pw.cosine_similarity(feature1, feature2)
return predicts
def compare_pic(self,feature1,feature2):
predicts=pw.pairwise_distances(feature2, feature1,'cosine')
#predicts=pw.cosine_similarity(feature1, feature2)
return predicts
def compare_pic(self,feature1,feature2):
predicts=pw.pairwise_distances(feature2, feature1,'cosine')
#predicts=pw.cosine_similarity(feature1, feature2)
return predicts