python类cosine_similarity()的实例源码

engine.py 文件源码 项目:contentpy 作者: Joklost 项目源码 文件源码 阅读 42 收藏 0 点赞 0 评论 0
def _train(self, training_frame):
        hashing_vectorizer = HashingVectorizer(analyzer="word", n_features=(2 ** 30),
                                               ngram_range=(1, 3), stop_words="english")
        training_hashing_matrix = hashing_vectorizer.fit_transform(training_frame["description"])

        self.log.info("starting kernel")
        start = time()
        cosine_similarities = cosine_similarity(training_hashing_matrix, training_hashing_matrix)
        self.log.info("finished kernel. this took {} s".format(time() - start))

        self.log.info("starting adding to redis database")
        start = time()
        i = 0
        l = len(training_frame.index)
        print_progress(i, l, prefix="Progress:", suffix="Complete", bar_length=50)
        for idx, row in training_frame.iterrows():
            similar_indices = cosine_similarities[idx].argsort()[:-100:-1]
            similar_items = [(cosine_similarities[idx][i], training_frame['id'][i]) for i in similar_indices]

            flattened = sum(similar_items[1:], ())
            self._r.zadd(self.SIMKEY % row['id'], *flattened)
            i += 1
            print_progress(i, l, prefix="Progress:", suffix="Complete", bar_length=50)
        self.log.info("finished adding {} rows to redis database. this took {} s".format(i, time() - start))
test_main.py 文件源码 项目:FaceRecognitionProjects 作者: ForrestPi 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def compar_pic(path1,path2):
    global net
    #??????
    X=read_image(path1)
    test_num=np.shape(X)[0]
    #X  ?? ?????
    out = net.forward_all(data = X)
    #fc7??????,??????
    feature1 = np.float64(out['fc7'])
    feature1=np.reshape(feature1,(test_num,4096))
    #??????
    X=read_image(path2)
    #X  ?? ?????
    out = net.forward_all(data=X)
    #fc7??????,??????
    feature2 = np.float64(out['fc7'])
    feature2=np.reshape(feature2,(test_num,4096))
    #????????cos?,??????????
    predicts=pw.cosine_similarity(feature1, feature2)
    return  predicts
system_main.py 文件源码 项目:FaceRecognitionProjects 作者: ForrestPi 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def compar_pic(path1,path2):
    global net
    #??????
    X=read_image(path1)
    test_num=np.shape(X)[0]
    #X  ?? ?????
    out = net.forward_all(data = X)
    #fc7??????,??????
    feature1 = np.float64(out['fc7'])
    feature1=np.reshape(feature1,(test_num,4096))
    #np.savetxt('feature1.txt', feature1, delimiter=',')

    #??????
    X=read_image(path2)
    #X  ?? ?????
    out = net.forward_all(data=X)
    #fc7??????,??????
    feature2 = np.float64(out['fc7'])
    feature2=np.reshape(feature2,(test_num,4096))
    #np.savetxt('feature2.txt', feature2, delimiter=',')
    #????????cos?,??????????
    predicts=pw.cosine_similarity(feature1, feature2)
    return  predicts
utility.py 文件源码 项目:DeepLearn 作者: GauravBh1010tt 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def cos_sim(ind1,ind2=1999):
    view1 = np.load("test_v1.npy")[0:ind1]
    view2 = np.load("test_v2.npy")[0:ind2]
    #val = []
    MAP=0
    for i,j in enumerate(view1):
        val=[]
        AP=0
        for x in view2:            
            val.append(cosine_similarity(j,x)[0].tolist())
        #val=val[0].tolist()
        #print val[0].tolist()
        val=[(q,p)for p,q in enumerate(val)]
        #print val
        val.sort()
        val.reverse()
        t = [w[1]for w in val[0:7]]
        for x,y in enumerate(t):
            if y in range(i,i+5):
                AP+=1/(x+1)
        print(t)
        print(AP)
        MAP+=AP
    print('MAP is : ',MAP/ind1)
lsa.py 文件源码 项目:nlp-lt 作者: minven 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def search_query(self, query):
        """
        search for query and find most related document for query
        http://webhome.cs.uvic.ca/~thomo/svd.pdf
        """

        def topN(similarities, N=5):
            return np.argsort(similarities)[::-1][:N]

        words = query.split(" ")
        tokens_ids = []
        for word in words:
            try:
                token_id = self.tokens_mapping[word]
            except KeyError:
                print("Token not found in tokens mapping dict")
            else:
                tokens_ids.append(token_id)

        query_representation = np.mean(self.tokens_representation[tokens_ids,:], axis=0)
        similarities = cosine_similarity(query_representation, self.documents_representation)
        topN_documents =[self.documents_mapping[index] for index in topN(similarities[0])] 
        return topN_documents
link.py 文件源码 项目:watlink 作者: dustalov 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def emit(id):
    if not id in hctx:
        return (id, {})

    hvector, candidates = v.transform(hctx[id]), Counter()

    for hypernym in hctx[id]:
        hsenses = Counter({hid: sim(v.transform(Counter(synsets[hid])), hvector).item(0) for hid in index[hypernym]})

        for hid, cosine in hsenses.most_common(1):
            if cosine > 0:
                candidates[(hypernym, hid)] = cosine

    matches = [(hypernym, hid, cosine) for (hypernym, hid), cosine in candidates.most_common(len(candidates) if args.k == 0 else args.k) if hypernym not in synsets[id]]

    return (id, matches)
graphgenerator.py 文件源码 项目:semihin 作者: HKUST-KnowComp 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def generateCosineNeighborGraph(hin,kNeighbors=10,tf_param={'word':True, 'entity':False, 'we_weight':1}):
        X, newIds, entIds = GraphGenerator.getTFVectorX(hin,param=tf_param)
        cosX = cosine_similarity(X)
        #return sparse.csc_matrix(X.dot(X.transpose())),newIds
        n = cosX.shape[0]
        graph = np.zeros((n,n))
        tic = time.time()
        for i in range(n):
            for j in np.argpartition(-cosX[i],kNeighbors)[:kNeighbors]:
                if j == i:
                    continue
                #graph[i, j] += cosX[i, j]
                #graph[j, i] += cosX[i, j]
                graph[i, j] += 1
                graph[j, i] += 1
        toc = time.time() - tic

        return sparse.csc_matrix(graph), newIds
graphgenerator.py 文件源码 项目:semihin 作者: HKUST-KnowComp 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def generateCosineNeighborGraphfromX(X, kNeighbors=10):
        cosX = cosine_similarity(X)
        # return sparse.csc_matrix(X.dot(X.transpose())),newIds
        #print cosX.shape
        n = cosX.shape[0]
        graph = np.zeros((n, n))
        tic = time.time()
        for i in range(n):
            for j in np.argpartition(-cosX[i], kNeighbors)[:kNeighbors]:
                if j == i:
                    continue
                # graph[i, j] += cosX[i, j]
                # graph[j, i] += cosX[i, j]
                graph[i, j] += 1
                graph[j, i] += 1
        toc = time.time() - tic
        #print 'graph generation done in %f seconds.' % toc
        return sparse.csc_matrix(graph)
feature_grid_search.py 文件源码 项目:semihin 作者: HKUST-KnowComp 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def generate_laplacian_score_scalar(X_ent, X_word, kNeighbors):
    # Generate cosine similarity graph
    n = X_ent.shape[0]
    cosX = cosine_similarity(X_word)
    graph = np.zeros((n, n))
    for i in range(n):
        for j in np.argpartition(cosX[i], -kNeighbors)[-kNeighbors:]:
            if j == i:
                continue
            graph[i, j] = cosX[i, j]
            graph[j, i] = cosX[i, j]

    D = sparse.diags([graph.sum(axis=0)], [0])
    L = D - graph
    f_tilde = X_ent - (float(X_ent.transpose() * D * np.ones((n, 1))) / D.sum().sum()) * np.ones((n, 1))
    score = float(f_tilde.transpose() * L * f_tilde) / float(f_tilde.transpose() * D * f_tilde + 1e-10)
    laplacian_score = score
    return laplacian_score
test_main.py 文件源码 项目:Face-recognition-test 作者: jiangwei1995910 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def compar_pic(path1,path2):
    global net
    #??????
    X=read_image(path1)
    test_num=np.shape(X)[0]
    #X  ?? ?????
    out = net.forward_all(blobs=['pool5'],data = X)
    # print out.keys()

    feature1 = np.float64(out["pool5"])

    feature1=np.reshape(feature1,(test_num,25088))
    #??????
    X=read_image(path2)
    #X  ?? ?????
    out = net.forward_all(blobs=['pool5'],data=X)
    feature2 = np.float64(out['pool5'])
    feature2=np.reshape(feature2,(test_num,25088))
    #????????cos?,??????????
    predicts=pw.cosine_similarity(feature1, feature2)
    return  predicts
test.py 文件源码 项目:Face-recognition-test 作者: jiangwei1995910 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def compar_pic(path1,path2):
    global net
    #??????
    X=read_image(path1)
    test_num=np.shape(X)[0]
    #X  ?? ?????
    out = net.forward_all(blobs=['pool5'],data = X)
    # print out.keys()

    feature1 = np.float64(out["pool5"])

    feature1=np.reshape(feature1,(test_num,25088))
    #??????
    X=read_image(path2)
    #X  ?? ?????
    out = net.forward_all(blobs=['pool5'],data=X)
    #fc7??????,??????
    feature2 = np.float64(out['pool5'])
    feature2=np.reshape(feature2,(test_num,25088))
    #????????cos?,??????????
    predicts=pw.cosine_similarity(feature1, feature2)
    return  predicts
Read.py 文件源码 项目:Face-recognition-test 作者: jiangwei1995910 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def readFace(feature):

    r=redis.Redis("localhost")
    keys= r.keys("*")
    for key in keys :
        db_feature =pickle.loads( r.lindex(key,0) )
        comple=pw.cosine_similarity(db_feature,feature)
        if(comple>0.46) :  #?????
            return key
    for key in keys :
        if(r.llen(key))>1 :
            db_feature =pickle.loads( r.lindex(key,1) )
            comple=pw.cosine_similarity(db_feature,feature)
            if(comple>0.46) :  #?????
                return key
    return 'unknow'





#????????
First_output.py 文件源码 项目:stc_ntcir12_code 作者: luochuwei 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def get_ranked_response(model, test_post_seg, candidate_list, similar_post_dic):
    test_post_seg_vec = get_sentence_vec(model, test_post_seg, candidate_list, similar_post_dic)
    for c in candidate_list:
        c_p_vec = get_sentence_vec(model, c[1], candidate_list, similar_post_dic)
        c_r_vec = get_sentence_vec(model, c[4], candidate_list, similar_post_dic)
        c[2] = c_p_vec
        c[5] = c_r_vec
        s2 = float(cosine_similarity(c_p_vec, c_r_vec))
        s3 = float(cosine_similarity(test_post_seg_vec, c_r_vec))
        c[7] = s2
        c[8] = s3
        # rank_score = 1000*c[6]*c[7]*c[8]
        rank_score = c[6]*0.5+c[7]*1.5+c[8]*2
        c[9] = rank_score
    rank_candidate = sorted(candidate_list, key = lambda l: l[-1])
    return rank_candidate
query_strategy.py 文件源码 项目:ActiveBoundary 作者: MiriamHu 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def __init__(self, dataset, save_path_queries=None, **kwargs):
        super(UncertaintyDenseSampling, self).__init__(dataset, **kwargs)

        self.model = kwargs.pop('model', None)
        if self.model is None:
            raise TypeError(
                "__init__() missing required keyword-only argument: 'model'"
            )
        self.save_path_queries = save_path_queries
        self.save_path_queries_hdf5 = os.path.join(self.save_path_queries,
                                                   os.path.normpath(self.save_path_queries) + ".hdf5")
        if os.path.isfile(self.save_path_queries_hdf5):
            print "This file already exists %s" % self.save_path_queries_hdf5
            quit(0)
        self.model.train(self.dataset, first_time=True)
        unlabeled_train = self.dataset.get_unlabeled_train_data()["features"]
        print "Computing cosine similarities of", unlabeled_train.shape, "by", unlabeled_train.shape
        self.similarity_matrix = cosine_similarity(unlabeled_train, unlabeled_train)
wsd.py 文件源码 项目:mnogoznal 作者: nlpub 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def disambiguate_word(self, sentence, index):
        super().disambiguate_word(sentence, index)

        lemmas = self.lemmatize(sentence)

        if index not in lemmas:
            return

        svector = self.sensegram(lemmas.values()) # sentence vector

        if svector is None:
            return

        # map synset identifiers to the cosine similarity value
        candidates = Counter({id: sim(svector, self.dense[id]).item(0)
                              for id in self.inventory.index[lemmas[index]]
                              if self.dense[id] is not None})

        if not candidates:
            return

        for id, _ in candidates.most_common(1):
            return id
test_main.py 文件源码 项目:VGG_Face_Caffe_Model 作者: PatienceKai 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def compar_pic(path1,path2):
    global net
    #??????
    X=read_image(path1)
    test_num=np.shape(X)[0]
    #X  ?? ?????
    out = net.forward_all(data = X)
    #fc7??????,??????
    feature1 = np.float64(out['fc7'])
    feature1=np.reshape(feature1,(test_num,4096))
    #??????
    X=read_image(path2)
    #X  ?? ?????
    out = net.forward_all(data=X)
    #fc7??????,??????
    feature2 = np.float64(out['fc7'])
    feature2=np.reshape(feature2,(test_num,4096))
    #????????cos?,??????????
    predicts=pw.cosine_similarity(feature1, feature2)
    return  predicts
system_main.py 文件源码 项目:VGG_Face_Caffe_Model 作者: PatienceKai 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def compar_pic(path1,path2):
    global net
    #??????
    X=read_image(path1)
    test_num=np.shape(X)[0]
    #X  ?? ?????
    out = net.forward_all(data = X)
    #fc7??????,??????
    feature1 = np.float64(out['fc7'])
    feature1=np.reshape(feature1,(test_num,4096))
    #np.savetxt('feature1.txt', feature1, delimiter=',')

    #??????
    X=read_image(path2)
    #X  ?? ?????
    out = net.forward_all(data=X)
    #fc7??????,??????
    feature2 = np.float64(out['fc7'])
    feature2=np.reshape(feature2,(test_num,4096))
    #np.savetxt('feature2.txt', feature2, delimiter=',')
    #????????cos?,??????????
    predicts=pw.cosine_similarity(feature1, feature2)
    return  predicts
test_pairwise.py 文件源码 项目:Parallel-SGD 作者: angadgill 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def test_cosine_similarity():
    # Test the cosine_similarity.

    rng = np.random.RandomState(0)
    X = rng.random_sample((5, 4))
    Y = rng.random_sample((3, 4))
    Xcsr = csr_matrix(X)
    Ycsr = csr_matrix(Y)

    for X_, Y_ in ((X, None), (X, Y),
                   (Xcsr, None), (Xcsr, Ycsr)):
        # Test that the cosine is kernel is equal to a linear kernel when data
        # has been previously normalized by L2-norm.
        K1 = pairwise_kernels(X_, Y=Y_, metric="cosine")
        X_ = normalize(X_)
        if Y_ is not None:
            Y_ = normalize(Y_)
        K2 = pairwise_kernels(X_, Y=Y_, metric="linear")
        assert_array_almost_equal(K1, K2)
feature_selection_using_cmeans.py 文件源码 项目:FCM-Feature-Selection 作者: achyudhk 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def selecttop(CF, k):
    """
        Finds cosine similarity between SC and Wi and returns index of top features
    """
    NCF = np.zeros((CF.shape[1],CF.shape[1]))
    for i in range(CF.shape[1]):
        for j in range(CF.shape[1]):
            if (CF[i,j]+CF[j,j]-CF[i,j]) !=0:
                NCF[i,j]=CF[i,j]/(CF[i,j]+CF[j,j]-CF[i,j])
            else:
                NCF[i,j]=0

    SC = np.zeros(CF.shape[1])
    for i in range(CF.shape[1]):
        SC[i] = np.sum(NCF[i,:])

    print(np.isnan(SC).any())
    print(np.isnan(CF).any())
    cosim = cosine_similarity(SC,CF)
    return (-cosim).argsort()[0][:int(k*CF.shape[1])]

#Loading CF matrix for each cluster
nlp_utils.py 文件源码 项目:search_relevance 作者: rmanak 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def cosine_sim(x, y):
    try:
        d = cosine_similarity(x.reshape(1,-1), y.reshape(1,-1))
        d = d[0][0]
    except:
        d = 0.0
    return d
MTMKL.py 文件源码 项目:PersonalizedMultitaskLearning 作者: mitmedialab 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def setKernel(self, kernel_name, kernel_param):
        self.kernel_name = kernel_name
        if kernel_name == 'rbf':
            def rbf(x1,x2):
                return rbf_kernel(x1,x2, gamma=kernel_param) # from sklearn

            self.internal_kernel_func = rbf
        else:
            def dot_product(x1,x2):
                return cosine_similarity(x1,x2) # from sklearn - a normalized version of dot product #np.dot(x1,x2.T)
            self.internal_kernel_func = dot_product
evaluate.py 文件源码 项目:docnade 作者: AYLIEN 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def closest_docs_by_index(corpus_vectors, query_vectors, n_docs):
    docs = []
    sim = pw.cosine_similarity(corpus_vectors, query_vectors)
    order = np.argsort(sim, axis=0)[::-1]
    for i in range(len(query_vectors)):
        docs.append(order[:, i][0:n_docs])
    return np.array(docs)
eval.py 文件源码 项目:image-classifier 作者: gustavkkk 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def compare_pic(self,feature1,feature2):
    predicts=pw.pairwise_distances(feature2, feature1,'cosine')
    #predicts=pw.cosine_similarity(feature1, feature2)
    return  predicts
eval.py 文件源码 项目:image-classifier 作者: gustavkkk 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def compare_pic(self,feature1,feature2):
    predicts=pw.pairwise_distances(feature2, feature1,'cosine')
    #predicts=pw.cosine_similarity(feature1, feature2)
    return  predicts
eval.py 文件源码 项目:image-classifier 作者: gustavkkk 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def compare_pic(self,feature1,feature2):
    predicts=pw.pairwise_distances(feature2, feature1,'cosine')
    #predicts=pw.cosine_similarity(feature1, feature2)
    return  predicts
eval.py 文件源码 项目:image-classifier 作者: gustavkkk 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def compare_pic(self,feature1,feature2):
    predicts=pw.pairwise_distances(feature2, feature1,'cosine')
    #predicts=pw.cosine_similarity(feature1, feature2)
    return  predicts
eval.py 文件源码 项目:image-classifier 作者: gustavkkk 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def compare_pic(self,feature1,feature2):
    predicts=pw.pairwise_distances(feature2, feature1,'cosine')
    #predicts=pw.cosine_similarity(feature1, feature2)
    return  predicts
eval.py 文件源码 项目:image-classifier 作者: gustavkkk 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def compare_pic(self,feature1,feature2):
    predicts=pw.pairwise_distances(feature2, feature1,'cosine')
    #predicts=pw.cosine_similarity(feature1, feature2)
    return  predicts
eval.py 文件源码 项目:image-classifier 作者: gustavkkk 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def compare_pic(self,feature1,feature2):
    predicts=pw.pairwise_distances(feature2, feature1,'cosine')
    #predicts=pw.cosine_similarity(feature1, feature2)
    return  predicts
eval.py 文件源码 项目:image-classifier 作者: gustavkkk 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def compare_pic(self,feature1,feature2):
    predicts=pw.pairwise_distances(feature2, feature1,'cosine')
    #predicts=pw.cosine_similarity(feature1, feature2)
    return  predicts


问题


面经


文章

微信
公众号

扫码关注公众号