python类normalize()的实例源码

analyze.py 文件源码 项目:visually-grounded-speech 作者: gchrupala 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def test_homonym(H, sent, features, C=1.0):
    X_0 = features(matching(sent, H[0]))
    X_1 = features(matching(sent, H[1]))
    y_0 = numpy.zeros(len(X_0))
    y_1 = numpy.ones(len(X_1))
    X = normalize(numpy.vstack([X_0, X_1]), norm='l2')
    y = numpy.hstack([y_0, y_1])
    classifier = LogisticRegression(C=C)
    fold = StratifiedKFold(y, n_folds=10)
    score = []
    count = []
    for tr, te in fold:
        X_tr, X_te = X[tr], X[te]
        y_tr, y_te = y[tr], y[te]
        classifier.fit(X_tr, y_tr)
        score.append(sum(classifier.predict(X_te) == y_te))
        count.append(len(y_te))
    score = numpy.array(score, dtype='float')
    count = numpy.array(count, dtype='float')
    result = {'word1_count': len(y_0),
              'word2_count': len(y_1),
              'majority': 1.0 * max(len(y_0),len(y_1))/len(y),
              'kfold_acc': score/count }
    return result
recipe_cleanup.py 文件源码 项目:Flavor-Network 作者: lingcheng99 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def make_tfidf(arr):
    '''input, numpy array with flavor counts for each recipe and compounds
    return numpy array adjusted as tfidf
    '''
    arr2 = arr.copy()
    N=arr2.shape[0]
    l2_rows = np.sqrt(np.sum(arr2**2, axis=1)).reshape(N, 1)
    l2_rows[l2_rows==0]=1
    arr2_norm = arr2/l2_rows

    arr2_freq = np.sum(arr2_norm>0, axis=0)
    arr2_idf = np.log(float(N+1) / (1.0 + arr2_freq)) + 1.0

    from sklearn.preprocessing import normalize
    tfidf = np.multiply(arr2_norm, arr2_idf)
    tfidf = normalize(tfidf, norm='l2', axis=1)
    print tfidf.shape
    return tfidf
recipe_classification.py 文件源码 项目:Flavor-Network 作者: lingcheng99 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def flavor_profile(df,ingr,comp,ingr_comp):
    sorted_ingredients = df.columns
    underscore_ingredients=[]
    for item in sorted_ingredients:
        underscore_ingredients.append(item.replace(' ','_'))

    print len(underscore_ingredients), len(sorted_ingredients)

    ingr_total = ingr_comp.join(ingr,how='right',on='# ingredient id')
    ingr_total = ingr_total.join(comp,how='right',on='compound id')

    ingr_pivot = pd.crosstab(ingr_total['ingredient name'],ingr_total['compound id'])
    ingr_flavor = ingr_pivot[ingr_pivot.index.isin(underscore_ingredients)]

    df_flavor = df.values.dot(ingr_flavor.values)
    print df.shape, df_flavor.shape

    return df_flavor

#normalize flavor matrix with tfidf method
recipe_classification.py 文件源码 项目:Flavor-Network 作者: lingcheng99 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def make_tfidf(arr):
    '''input, numpy array with flavor counts for each recipe and compounds
    return numpy array adjusted as tfidf
    '''
    arr2 = arr.copy()
    N=arr2.shape[0]
    l2_rows = np.sqrt(np.sum(arr2**2, axis=1)).reshape(N, 1)
    l2_rows[l2_rows==0]=1
    arr2_norm = arr2/l2_rows

    arr2_freq = np.sum(arr2_norm>0, axis=0)
    arr2_idf = np.log(float(N+1) / (1.0 + arr2_freq)) + 1.0

    from sklearn.preprocessing import normalize
    tfidf = np.multiply(arr2_norm, arr2_idf)
    tfidf = normalize(tfidf, norm='l2', axis=1)
    print tfidf.shape
    return tfidf
embedding.py 文件源码 项目:histwords 作者: williamleif 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def __init__(self, path, words=[], dim=300, normalize=True, **kwargs):
        seen = []
        vs = {}
        for line in open(path):
            split = line.split()
            w = split[0]
            if words == [] or w in words:
                if len(split) != dim+1:
                    continue
                seen.append(w)
                vs[w] = np.array(map(float, split[1:]), dtype='float32')
        self.iw = seen
        self.wi = {w:i for i,w in enumerate(self.iw)}
        self.m = np.vstack(vs[w] for w in self.iw)
        if normalize:
            self.normalize()
explicit.py 文件源码 项目:histwords 作者: williamleif 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def get_subembed(self, word_list, normalize=False, restrict_context=True):
        """
        Gets subembedding.
        """
        w_set = set(self.iw)
        valid_w = [word for word in word_list if word in w_set]
        new_w_indices = np.array([self.wi[word] for word in valid_w])
        if restrict_context:
            c_set = set(self.ic)
            valid_c = [word for word in word_list if word in c_set]
            new_c_indices = np.array([self.ci[word] for word in valid_c])
            new_m = self.m[new_w_indices, :]
            new_m = new_m[:, new_c_indices]
        else:
            valid_c = self.ic
            new_m = self.m[new_w_indices, :]
        return Explicit(new_m, valid_w, valid_c, normalize=normalize)
loc2lang.py 文件源码 项目:geomdn 作者: afshinrahimi 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def get_local_words(preds, vocab, NEs=[], k=50):
    """
    given the word probabilities over many coordinates,
    first normalize the probability of each word in different
    locations to get a probability distribution, then compute
    the entropy of the word's distribution over all coordinates
    and return the words that are low entropy and are not
    named entities.
    """
    #normalize the probabilites of each vocab using entropy
    normalized_preds = normalize(preds, norm='l1', axis=0)
    entropies = stats.entropy(normalized_preds)
    sorted_indices = np.argsort(entropies)
    sorted_local_words = np.array(vocab)[sorted_indices].tolist()


    filtered_local_words = []
    NEset = set(NEs)
    for word in sorted_local_words:
        if word in NEset: continue
        filtered_local_words.append(word)
    return filtered_local_words[0:k]
DSC-Net-L2-EYaleB.py 文件源码 项目:Deep-subspace-clustering-networks 作者: panji1990 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def post_proC(C, K, d, alpha):
    # C: coefficient matrix, K: number of clusters, d: dimension of each subspace
    C = 0.5*(C + C.T)
    r = d*K + 1
    U, S, _ = svds(C,r,v0 = np.ones(C.shape[0]))
    U = U[:,::-1]    
    S = np.sqrt(S[::-1])
    S = np.diag(S)    
    U = U.dot(S)    
    U = normalize(U, norm='l2', axis = 1)       
    Z = U.dot(U.T)
    Z = Z * (Z>0)    
    L = np.abs(Z ** alpha) 
    L = L/L.max()   
    L = 0.5 * (L + L.T)    
    spectral = cluster.SpectralClustering(n_clusters=K, eigen_solver='arpack', affinity='precomputed',assign_labels='discretize')
    spectral.fit(L)
    grp = spectral.fit_predict(L) + 1
    return grp, L
DSC-Net-L2-ORL.py 文件源码 项目:Deep-subspace-clustering-networks 作者: panji1990 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def post_proC(C, K, d, alpha):
    # C: coefficient matrix, K: number of clusters, d: dimension of each subspace
    C = 0.5*(C + C.T)
    r = min(d*K + 1, C.shape[0]-1)      
    U, S, _ = svds(C,r,v0 = np.ones(C.shape[0]))
    U = U[:,::-1]    
    S = np.sqrt(S[::-1])
    S = np.diag(S)    
    U = U.dot(S)    
    U = normalize(U, norm='l2', axis = 1)       
    Z = U.dot(U.T)
    Z = Z * (Z>0)    
    L = np.abs(Z ** alpha) 
    L = L/L.max()   
    L = 0.5 * (L + L.T)    
    spectral = cluster.SpectralClustering(n_clusters=K, eigen_solver='arpack', affinity='precomputed',assign_labels='discretize')
    spectral.fit(L)
    grp = spectral.fit_predict(L) + 1
    return grp, L
DSC-Net-L2-COIL20.py 文件源码 项目:Deep-subspace-clustering-networks 作者: panji1990 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def post_proC(C, K, d, alpha):
    # C: coefficient matrix, K: number of clusters, d: dimension of each subspace
    n = C.shape[0]
    C = 0.5*(C + C.T)    
    C = C - np.diag(np.diag(C)) + np.eye(n,n) # for sparse C, this step will make the algorithm more numerically stable
    r = d*K + 1     
    U, S, _ = svds(C,r,v0 = np.ones(n))
    U = U[:,::-1] 
    S = np.sqrt(S[::-1])
    S = np.diag(S)
    U = U.dot(S)
    U = normalize(U, norm='l2', axis = 1)  
    Z = U.dot(U.T)
    Z = Z * (Z>0)
    L = np.abs(Z ** alpha)
    L = L/L.max()
    L = 0.5 * (L + L.T) 
    spectral = cluster.SpectralClustering(n_clusters=K, eigen_solver='arpack', affinity='precomputed', assign_labels='discretize')
    spectral.fit(L)
    grp = spectral.fit_predict(L) + 1
    return grp, L
DSC-Net-L2-COIL100.py 文件源码 项目:Deep-subspace-clustering-networks 作者: panji1990 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def post_proC(C, K, d, alpha):
    # C: coefficient matrix, K: number of clusters, d: dimension of each subspace
    C = 0.5*(C + C.T)
    r = d*K + 1 
    U, S, _ = svds(C,r,v0 = np.ones(C.shape[0]))
    U = U[:,::-1] 
    S = np.sqrt(S[::-1])
    S = np.diag(S)
    U = U.dot(S)
    U = normalize(U, norm='l2', axis = 1)  
    Z = U.dot(U.T)
    Z = Z * (Z>0)
    L = np.abs(Z ** alpha)
    L = L/L.max()
    L = 0.5 * (L + L.T) 
    spectral = cluster.SpectralClustering(n_clusters=K, eigen_solver='arpack', affinity='precomputed',assign_labels='discretize')
    spectral.fit(L)
    grp = spectral.fit_predict(L) + 1 
    return grp, L
classifiers.py 文件源码 项目:avito-contest 作者: fmilepe 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def rede_neural(X, y):
    print("Iniciando treinamento da Rede Neural")

    X2 = normalize(X)

    clf = MLPClassifier(hidden_layer_sizes=(100,50), activation='tanh', algorithm='adam', alpha=1e-5,
                        learning_rate='constant',tol=1e-8,learning_rate_init=0.0002,
                        early_stopping=True,validation_fraction=0.2)

    kf = KFold(len(y),n_folds=3)
    i = 0
    for train,test in kf:
        start = time.time()
        i = i + 1
        print("Treinamento",i)

        # dividindo dataset em treino e test
        #X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.4, random_state=1)
        X_train, X_test, y_train, y_test = X2[train], X2[test], y[train], y[test]

        # fit
        clf.fit(X_train, y_train)
        print("score:",clf.score(X_test, y_test),"(",(time.time()-start)/60.0,"minutos )")
    return clf
other.py 文件源码 项目:StageDP 作者: EastonWang 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def vectorize(features, vocab):
    """ Transform a features list into a numeric vector
        with a given vocab

    :type dpvocab: dict
    :param dpvocab: vocab for distributional representation

    :type projmat: scipy.lil_matrix
    :param projmat: projection matrix for disrep
    """
    vec = lil_matrix((1, len(vocab)))

    for feat in features:
        try:
            fidx = vocab[feat]
            vec[0, fidx] += 1.0
        except KeyError:
            pass
    # Normalization
    vec = normalize(vec)
    return vec
STFIWF.py 文件源码 项目:2016CCF_BDCI_Sougou 作者: coderSkyChen 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def strip_accents_unicode(s):
    """Transform accentuated unicode symbols into their simple counterpart

    Warning: the python-level loop and join operations make this
    implementation 20 times slower than the strip_accents_ascii basic
    normalization.

    See also
    --------
    strip_accents_ascii
        Remove accentuated char for any unicode symbol that has a direct
        ASCII equivalent.
    """
    normalized = unicodedata.normalize('NFKD', s)
    if normalized == s:
        return s
    else:
        return ''.join([c for c in normalized if not unicodedata.combining(c)])
STFIWF.py 文件源码 项目:2016CCF_BDCI_Sougou 作者: coderSkyChen 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def _char_wb_ngrams(self, text_document):
        """Whitespace sensitive char-n-gram tokenization.

        Tokenize text_document into a sequence of character n-grams
        excluding any whitespace (operating only inside word boundaries)"""
        # normalize white spaces
        text_document = self._white_spaces.sub(" ", text_document)

        min_n, max_n = self.ngram_range
        ngrams = []
        for w in text_document.split():
            w = ' ' + w + ' '
            w_len = len(w)
            for n in xrange(min_n, max_n + 1):
                offset = 0
                ngrams.append(w[offset:offset + n])
                while offset + n < w_len:
                    offset += 1
                    ngrams.append(w[offset:offset + n])
                if offset == 0:  # count a short word (w_len < n) only once
                    break
        return ngrams
STFIWF.py 文件源码 项目:2016CCF-sougou 作者: prozhuchen 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def strip_accents_unicode(s):
    """Transform accentuated unicode symbols into their simple counterpart

    Warning: the python-level loop and join operations make this
    implementation 20 times slower than the strip_accents_ascii basic
    normalization.

    See also
    --------
    strip_accents_ascii
        Remove accentuated char for any unicode symbol that has a direct
        ASCII equivalent.
    """
    normalized = unicodedata.normalize('NFKD', s)
    if normalized == s:
        return s
    else:
        return ''.join([c for c in normalized if not unicodedata.combining(c)])
STFIWF.py 文件源码 项目:2016CCF-sougou 作者: prozhuchen 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def _char_wb_ngrams(self, text_document):
        """Whitespace sensitive char-n-gram tokenization.

        Tokenize text_document into a sequence of character n-grams
        excluding any whitespace (operating only inside word boundaries)"""
        # normalize white spaces
        text_document = self._white_spaces.sub(" ", text_document)

        min_n, max_n = self.ngram_range
        ngrams = []
        for w in text_document.split():
            w = ' ' + w + ' '
            w_len = len(w)
            for n in xrange(min_n, max_n + 1):
                offset = 0
                ngrams.append(w[offset:offset + n])
                while offset + n < w_len:
                    offset += 1
                    ngrams.append(w[offset:offset + n])
                if offset == 0:  # count a short word (w_len < n) only once
                    break
        return ngrams
word2vec.py 文件源码 项目:vec4ir 作者: lgalke 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def fit(self, X_raw, y=None):
        cents = self.vect.fit_transform(X_raw)
        # print("Largest singular value: {:.2f}".format(
        #     np.linalg.norm(cents, ord=2)))
        # cents = all_but_the_top(cents, 1)
        # print("Largest singular value: {:.2f}".format(
        #     np.linalg.norm(cents, ord=2)))
        # print("Renormalizing")
        # normalize(cents, copy=False)
        # print("Largest singular value: {:.2f}".format(
        #     np.linalg.norm(cents, ord=2)))
        self.centroids = cents
        print(' FIT centroids shape', self.centroids.shape)

        self._y = y
        if self.matching:
            self.matching.fit(X_raw)
        else:
            self.nn.fit(cents)
test_lsi.py 文件源码 项目:FreeDiscovery 作者: FreeDiscovery 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def test_lsi():

    cache_dir = check_cache()
    n_components = 2

    fe = FeatureVectorizer(cache_dir=cache_dir, mode='w')
    uuid = fe.setup()
    fe.ingest(data_dir, file_pattern='.*\d.txt')

    lsi = _LSIWrapper(cache_dir=cache_dir, parent_id=uuid, mode='w')
    lsi_res, exp_var = lsi.fit_transform(n_components=n_components, alpha=1.0)
    assert lsi_res.components_.shape[0] == 5
    assert lsi_res.components_.shape[1] == fe.n_features_
    assert lsi._load_pars() is not None
    lsi._load_model()
    X_lsi = lsi._load_features()

    assert_allclose(normalize(X_lsi), X_lsi)

    lsi.list_models()
    lsi.delete()
test_vectorizer.py 文件源码 项目:FreeDiscovery 作者: FreeDiscovery 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def test_feature_extraction_tokenization(analyzer, ngram_range, use_hashing):
    cache_dir = check_cache()
    use_hashing = (use_hashing == 'hashed')

    fe = FeatureVectorizer(cache_dir=cache_dir, mode='w')
    uuid = fe.setup(analyzer=analyzer, ngram_range=ngram_range,
                    use_hashing=use_hashing)
    fe.ingest(data_dir, file_pattern='.*\d.txt')

    res2 = fe._load_features(uuid)
    assert isinstance(res2,  np.ndarray) or scipy.sparse.issparse(res2), "not an array {}".format(res2)

    assert np.isfinite(res2.data).all()

    assert_allclose(normalize(res2).data, res2.data)  # data is l2 normalized

    fe.delete()
test_vectorizer.py 文件源码 项目:FreeDiscovery 作者: FreeDiscovery 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def test_feature_extraction_weighting(weighting,
                                      use_hashing):
    cache_dir = check_cache()

    use_hashing = (use_hashing == 'hashed')

    fe = FeatureVectorizer(cache_dir=cache_dir, mode='w')
    uuid = fe.setup(weighting=weighting, use_hashing=use_hashing)
    fe.ingest(data_dir, file_pattern='.*\d.txt')

    res2 = fe._load_features(uuid)
    assert isinstance(res2,  np.ndarray) or scipy.sparse.issparse(res2), \
        "not an array {}".format(res2)

    assert np.isfinite(res2.data).all()
    assert_allclose(normalize(res2).data, res2.data)  # data is l2 normalized

    fe.delete()
load_langmod.py 文件源码 项目:Msc_Multi_label_ZeroShot 作者: thomasSve 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def load_pretrained():

    #glove_vec = ["glove_wiki_50","glove_wiki_150","glove_wiki_300"]
    glove_vec = ["glove_wiki_300"]
    #glove_vec = ["glove_wiki_50"]
    filename = 'glove_pretrained.h5'
    #import tensorflow as tf
    #sess = tf.InteractiveSession()

    features, words = load_h5py('glove_wiki_300',filename=root + glove_vec_fold + filename)
    filename = 'glove.h5'
    features = normalize(np.array(features), axis=1, norm='l2')
    with h5py.File(root + glove_vec_fold + filename, "w") as hf:
        hf.create_dataset(glove_vec[0], data=features)
        string_dt = h5py.special_dtype(vlen=str)
        hf.create_dataset(glove_vec[0] + "_words", data=words, dtype=string_dt)

    for vec in glove_vec:
        data, words = load_h5py(vec, filename=root + glove_vec_fold + "glove.h5")
        print(data.shape, words.shape)
        time.sleep(5)
word2vec_impl.py 文件源码 项目:Word2Vec 作者: hashbangCoder 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def testWord2Vec(testWords,weights,num_display=3):
    ##Generate inverse word mapping for easy lookup
    invWordDict = {v: k for k, v in wordDict.iteritems()}

    ## Normalize the trained weights for cosine similarity
    trainedWeights = normalize(weights,norm = 'l2', axis = 1)
    for word in testWords:
        try:
            embedding = trainedWeights[wordDict[word],:]
            prox = np.argsort(np.dot(embedding,trainedWeights.transpose())/np.linalg.norm(embedding))[-num_display:].tolist()       
            prox.reverse()
            print 'Closest word vector (by cosine similarity) for %s : '%word, [invWordDict[item] for item in prox]

        except KeyError:
            print '"%s" not found in the Trained Word Embeddings. Skipping...'%word
            pass
word2vec_impl.py 文件源码 项目:Word2Vec 作者: hashbangCoder 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def testWord2Vec(word_list,weights,num_display=3):
    ##Generate inverse word mapping for easy lookup
    invWordDict = {v: k for k, v in wordDict.iteritems()}

    ## Normalize the trained weights for cosine similarity
    trainedWeights = normalize(weights,norm = 'l2', axis = 1)
    for word in word_list:
        try:
            embedding = trainedWeights[wordDict[word],:]
            prox = np.argsort(np.dot(embedding,trainedWeights.transpose())/np.linalg.norm(embedding))[-num_display:].tolist()       
            prox.reverse()
            print 'Closest word vector (by cosine similarity) for %s : '%word, [invWordDict[item] for item in prox]

        except KeyError:
            print '"%s" not found in the Trained Word Embeddings. Skipping...'%word
            pass
B_preprocessing_clustering.py 文件源码 项目:blcf 作者: willard-yuan 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def trainingPCA(features, n_components=256, whiten=True, pca_model_name=None):
    print 'loaded features! {}'.format(features.shape)
    print np.sqrt(sum(features[0,:]**2))

    #print 'Features l2 normalization'
    #features = normalize(features)
    #print np.sqrt(sum(features[0,:]**2))

    print 'Feature PCA-whitenning'
    pca_model = PCA(n_components=n_components, whiten=whiten)
    features = pca_model.fit_transform(features)
    print np.sqrt(sum(features[0,:]**2))

    print 'Features l2 normalization'
    features = normalize(features)
    print np.sqrt(sum(features[0,:]**2))

    if pca_model_name is not None:
        print 'saving model...'
        check_path_file(pca_model_name, create_if_missing=True)
        save_obj(pca_model, pca_model_name)

    print 'done! {}'.format(pca_model_name)

    return pca_model
ProGENI_simplified.py 文件源码 项目:ProGENI 作者: KnowEnG 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def gen_network_matrix(num_nodes, net_df, node1, node2, weight, node2index):
    """Generates network adjacency matrix and normalizes it"""
    # Transform the first two columns of the DataFrame -- the nodes -- to their indexes
    net_df[node1] = net_df[node1].apply(lambda x: node2index[x])
    net_df[node2] = net_df[node2].apply(lambda x: node2index[x])
    # Create the sparse matrix
    network_matrix = sparse.csr_matrix((net_df[weight].values, (net_df[node1].values, net_df[node2].values)),
                                       shape=(num_nodes, num_nodes), dtype=float)
    # Make the ajdacency matrix symmetric
    network_matrix = (network_matrix + network_matrix.T)
    network_matrix.setdiag(0)
    # Normalize the rows of network_matrix because we are multiplying vector by matrix (from left)
    network_matrix = normalize(network_matrix, norm='l1', axis=1)
    return(net_df, network_matrix)



###############################################################################
ProGENI.py 文件源码 项目:ProGENI 作者: KnowEnG 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def gen_network_matrix(num_nodes, net_df, node1, node2, weight, node2index):
    """Generates network adjacency matrix and normalizes it"""
    # Transform the first two columns of the DataFrame -- the nodes -- to their indexes
    net_df[node1] = net_df[node1].apply(lambda x: node2index[x])
    net_df[node2] = net_df[node2].apply(lambda x: node2index[x])
    # Create the sparse matrix
    network_matrix = sparse.csr_matrix((net_df[weight].values, (net_df[node1].values, net_df[node2].values)),
                                       shape=(num_nodes, num_nodes), dtype=float)
    # Make the ajdacency matrix symmetric
    network_matrix = (network_matrix + network_matrix.T)
    network_matrix.setdiag(0)
    # Normalize the rows of network_matrix because we are multiplying vector by matrix (from left)
    network_matrix = normalize(network_matrix, norm='l1', axis=1)
    return(net_df, network_matrix)



###############################################################################
readfile.py 文件源码 项目:TemporalNetworkEpidemics 作者: andreaskoher 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def get_Temporal_Network(edges,firstday,lastday,directed,number_of_nodes,normalized):
    # Dictionary indexed by times from 0 to firstday-lastday: time: edge_list
    time_to_edges = {t: set() for t in xrange(0, lastday-firstday+1)}
    for u,v,t in edges:
        if u != v: # ignore self loops
            time_to_edges[t - firstday].add((u,v))
            if not directed:
                time_to_edges[t - firstday].add((v,u))
    # Initialize the temporal network
    Temporal_Network = {}
    for time, edges in time_to_edges.items():
        col = [u for u,v in edges]
        row = [v for u,v in edges]
        dat = [True for i in range(len(edges))]

        Adj_Matrix = sp.csr_matrix((dat,(row,col)),
                shape=(number_of_nodes, number_of_nodes), dtype=bool)
        # !!!!!!!!! Annahme, dass Kante: u -> v und p(t+1) = Ap(t) bzw. A[v,u] = 1 !!!!!!!!
        if normalized:
            Adj_Matrix = normalize(Adj_Matrix.transpose(), norm='l1', axis=1, copy=False).transpose()
            Temporal_Network[time] = Adj_Matrix
        else:
            Temporal_Network[time] = Adj_Matrix
    return Temporal_Network
BiCCA.py 文件源码 项目:WordEmbedding 作者: ziliwang 项目源码 文件源码 阅读 82 收藏 0 点赞 0 评论 0
def main(test, base, align, project, r):
    outdir = os.path.join(os.getcwd(), project)
    tmp_dir = os.path.join(outdir, 'tmp.{}'.format(project))
    if not os.path.exists(tmp_dir):
        os.makedirs(tmp_dir)
    print('temporary dir: {}'.format(tmp_dir))
    basedWordVectors, testedWordVectors, aligned_test, subsetTest = \
        align_vec(base, test, align, tmp_dir)
    test_cols = len(testedWordVectors)
    base_cols = len(basedWordVectors)
    print('normalizing matrix')
    baseX = preprocessing.normalize(dict_to_matrix(basedWordVectors))
    testX = preprocessing.normalize(dict_to_matrix(testedWordVectors))
    aligned_testX = preprocessing.normalize(dict_to_matrix(aligned_test))
    subtestX = preprocessing.normalize(dict_to_matrix(subsetTest))
    cca = CCA(n_components=200)
    print('computing CCA')
    cca.fit(subtestX, aligned_testX)
    ccaed_test = trans(testX, cca.x_weights_)
    ccaed_base = trans(baseX, cca.y_weights_)
    output(outdir, test, ccaed_test, testedWordVectors)
    output(outdir, base, ccaed_base, basedWordVectors)
STFIWF.py 文件源码 项目:2016_CCFsougou 作者: dhdsjy 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def strip_accents_unicode(s):
    """Transform accentuated unicode symbols into their simple counterpart

    Warning: the python-level loop and join operations make this
    implementation 20 times slower than the strip_accents_ascii basic
    normalization.

    See also
    --------
    strip_accents_ascii
        Remove accentuated char for any unicode symbol that has a direct
        ASCII equivalent.
    """
    normalized = unicodedata.normalize('NFKD', s)
    if normalized == s:
        return s
    else:
        return ''.join([c for c in normalized if not unicodedata.combining(c)])


问题


面经


文章

微信
公众号

扫码关注公众号