python类whiten()的实例源码-面圈网

Simple_function.py 文件源码项目：vapor 作者: mills-lab 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def k_means_cluster_Predict(data_list,info):
    array_diagnal=np.array([[data_list[0][x],data_list[1][x]] for x in range(len(data_list[0]))])
    ks = list(range(1,len(info)))
    KMeans = [cluster.KMeans(n_clusters = i, init="k-means++").fit(array_diagnal) for i in ks]
    BIC = [compute_bic(kmeansi,array_diagnal) for kmeansi in KMeans]
    ks_picked=ks[BIC.index(max(BIC))]
    if ks_picked==1:
        return [data_list]
    else:
        out=[]
        std_rec=[scipy.std(data_list[0]),scipy.std(data_list[1])]
        whitened = whiten(array_diagnal)
        centroids, distortion=kmeans(whitened,ks_picked)
        idx,_= vq(whitened,centroids)
        for x in range(ks_picked):
            group1=[[int(i) for i in array_diagnal[idx==x,0]],[int(i) for i in array_diagnal[idx==x,1]]]
            out.append(group1)
        return out

analysis.py 文件源码项目：data-analysis 作者: ymohanty 项目源码文件源码阅读 29 收藏 0 点赞 0 评论 0

def kmeans_numpy(d, headers, K, whiten=True):
    # assign to A the result of getting the data from your Data object
    A = d.get_data(headers)

    # assign to W the result of calling vq.whiten on A
    W = vq.whiten(A)

    # assign to codebook, bookerror the result of calling vq.kmeans with W and K
    codebook, bookerror = vq.kmeans(W, K)

    # assign to codes, error the result of calling vq.vq with W and the codebook
    codes, error = vq.vq(W, codebook)

    # return codebook, codes, and error
    return codebook, codes, error


# prep the k-means clustering algorithm by getting initial cluster means

train.py 文件源码项目：GenefyHMM 作者: adit-39 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def vector_quantize(data_dict, vs, bins):
    codebooks = {}
    vq_data = {}
    for size in vs.keys():
        all_size_data = []
        for disease in vs[size]:
            all_size_data.extend(data_dict[disease])
        #whitened = sp.whiten(all_size_data)
        #codebooks[size] = sp.kmeans(whitened, bins)[0]
        codebooks[size] = sp.kmeans(np.asarray(all_size_data), bins)[0]
    pickle.dump(codebooks,open("all_codebooks.pkl","wb"))
    for dis in data_dict.keys():
        n = len(data_dict[dis])
        m = len(data_dict[dis][0])
        vq_data[dis] = map(str,sp.vq(np.reshape(data_dict[dis],(n,m)), codebooks[len(data_dict[dis][0])])[0])
    return vq_data

Simple_function.py 文件源码项目：vapor 作者: mills-lab 项目源码文件源码阅读 24 收藏 0 点赞 0 评论 0

def k_means_cluster(data_list):
    if max(data_list[0])-min(data_list[0])>10 and max(data_list[1])-min(data_list[1])>10:
        array_diagnal=np.array([[data_list[0][x],data_list[1][x]] for x in range(len(data_list[0]))])
        ks = list(range(1,min([5,len(data_list[0])+1])))
        KMeans = [cluster.KMeans(n_clusters = i, init="k-means++").fit(array_diagnal) for i in ks]
        KMeans_predict=[cluster.KMeans(n_clusters = i, init="k-means++").fit_predict(array_diagnal) for i in ks]
        BIC=[]
        BIC_rec=[]
        for x in ks:
            if KMeans_predict[x-1].max()<x-1: continue
            else:
                BIC_i=compute_bic(KMeans[x-1],array_diagnal)
                if abs(BIC_i)<10**8:
                    BIC.append(BIC_i)
                    BIC_rec.append(x)
        #BIC = [compute_bic(kmeansi,array_diagnal) for kmeansi in KMeans]
        #ks_picked=ks[BIC.index(max(BIC))]
        ks_picked=BIC_rec[BIC.index(max(BIC))]
        if ks_picked==1:
            return [data_list]
        else:
            out=[]
            std_rec=[scipy.std(data_list[0]),scipy.std(data_list[1])]
            whitened = whiten(array_diagnal)
            centroids, distortion=kmeans(whitened,ks_picked)
            idx,_= vq(whitened,centroids)
            for x in range(ks_picked):
                group1=[[int(i) for i in array_diagnal[idx==x,0]],[int(i) for i in array_diagnal[idx==x,1]]]
                out.append(group1)
            return out
    else:
        return [data_list]

recognizer.py 文件源码项目：speaker_recognizer_robot 作者: shrutiyer 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def get_mfcc_feat(self):
        # creating codebook with all models

        mfcc_feats = None

        for filename in glob.iglob('../data/voices/*.wav'):
            print filename
            (rate, sig) = wav.read(filename)

            # MFCC Features. Each row corresponds to MFCC for a frame
            mfcc_person = mfcc(sig.astype(np.float64), rate)

            if mfcc_feats is None:
                mfcc_feats = mfcc_person 
            else:
                mfcc_feats = np.concatenate((mfcc_feats, mfcc_person), axis=0)

        # Normalize the features
        whitened = whiten(mfcc_feats)
        self.codebook, labeled_obs = kmeans2(data=whitened, k=3)

pairdist.py 文件源码项目：BioNLP-2016 作者: cambridgeltl 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def argparser():
    try:
        import argparse
    except ImportError:
        import compat.argparse as argparse

    ap=argparse.ArgumentParser()
    ap.add_argument('vectors', nargs=1, metavar='FILE', help='word vectors')
    ap.add_argument('-a', '--approximate', default=False, action='store_true',
                    help='filter by approximate similarity (with -t)')
    ap.add_argument('-i', '--min-index', default=0, type=int,
                    help='index of first word (default 0)')
    ap.add_argument('-M', '--metric', default=DEFAULT_METRIC, 
                    choices=sorted(metrics.keys()),
                    help='distance metric to apply')
    ap.add_argument('-n', '--normalize', default=False, action='store_true',
                    help='normalize vectors to unit length')
    ap.add_argument('-r', '--max-rank', metavar='INT', default=None, 
                    type=int, help='only consider r most frequent words')
    ap.add_argument('-t', '--threshold', metavar='FLOAT', default=None,
                    type=float, help='only output distances <= t')
    ap.add_argument('-T', '--tolerance', metavar='FLOAT', default=0.1,
                    type=float, help='approximation tolerace (with -a)')
    ap.add_argument('-w', '--whiten', default=False, action='store_true',
                    help='normalize features to unit variance ')
    ap.add_argument('-W', '--words',  default=False, action='store_true',
                    help='output words instead of indices')
    return ap

pairdist.py 文件源码项目：BioNLP-2016 作者: cambridgeltl 项目源码文件源码阅读 26 收藏 0 点赞 0 评论 0

def process_options(args):    
    options = argparser().parse_args(args)

    if options.max_rank is not None and options.max_rank < 1:
        raise ValueError('max-rank must be >= 1')
    if options.threshold is not None and options.threshold < 0.0:
        raise ValueError('threshold must be >= 0')
    if options.tolerance is not None and options.tolerance < 0.0:
        raise ValueError('tolerance must be >= 0')
    if options.approximate and not options.threshold:
        raise ValueError('approximate only makes sense with a threshold')
    if options.approximate and options.metric != 'cosine':
        raise NotImplementedError('approximate only supported for cosine')

    wv = wvlib.load(options.vectors[0], max_rank=options.max_rank)

    if options.normalize:
        logging.info('normalize vectors to unit length')
        wv.normalize()

    words, vectors = wv.words(), wv.vectors()

    if options.whiten:
        # whitening should be implemented in wvlib to support together with
        # approximate similarity
        if options.approximate:
            raise NotImplemenedError
        logging.info('normalize features to unit variance')
        vectors = whiten(vectors)

    return words, vectors, wv, options

analysis.py 文件源码项目：data-analysis 作者: ymohanty 项目源码文件源码阅读 23 收藏 0 点赞 0 评论 0

def kmeans(d, headers, K, metric, whiten=True, categories=None):
    '''Takes in a Data object, a set of headers, and the number of clusters to create
    Computes and returns the codebook, codes and representation errors.
    If given an Nx1 matrix of categories, it uses the category labels
    to calculate the initial cluster means.
    '''

    # assign to A the result getting the data given the headers
    try:
        A = d.get_data(headers)
    except AttributeError:
        A = d

    if whiten:
        W = vq.whiten(A)
    else:
        W = A

    codebook = kmeans_init(W, K, categories)

    # assign to codebook, codes, errors, the result of calling kmeans_algorithm with W and codebook
    codebook, codes, errors = kmeans_algorithm(W, codebook, metric)

    # return the codebook, codes, and representation error
    return codebook, codes, errors


# test function

compounds.py 文件源码项目：deep_learning_chemical 作者: samocooper 项目源码文件源码阅读 18 收藏 0 点赞 0 评论 0

def cluster(matrix):
    whitened = whiten(matrix.todense())

    # for x in range(25, 40):
    #     means, distortion = kmeans(whitened, x)
    #     print distortion

    means, distortion = kmeans(whitened, 30)

    # pickle.dump(means, open('30means-' + sys.argv[1] + '.pkl', 'wb'))

    return means, distortion

icetracking.py 文件源码项目：mht 作者: jonatanolofsson 项目源码文件源码阅读 21 收藏 0 点赞 0 评论 0

def kmeans_classify(features, shape, label=True, fill=False):
    """Run the k-means algorithm."""
    print("Starting kmeans")
    whitened = whiten(features)
    init = np.array((whitened.min(0), whitened.mean(0), whitened.max(0)))
    codebook, _ = kmeans(whitened, init)
    classified, _ = vq(whitened, codebook)
    print("Finished kmeans")
    return classified