python类whiten()的实例源码

Simple_function.py 文件源码 项目:vapor 作者: mills-lab 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def k_means_cluster_Predict(data_list,info):
    array_diagnal=np.array([[data_list[0][x],data_list[1][x]] for x in range(len(data_list[0]))])
    ks = list(range(1,len(info)))
    KMeans = [cluster.KMeans(n_clusters = i, init="k-means++").fit(array_diagnal) for i in ks]
    BIC = [compute_bic(kmeansi,array_diagnal) for kmeansi in KMeans]
    ks_picked=ks[BIC.index(max(BIC))]
    if ks_picked==1:
        return [data_list]
    else:
        out=[]
        std_rec=[scipy.std(data_list[0]),scipy.std(data_list[1])]
        whitened = whiten(array_diagnal)
        centroids, distortion=kmeans(whitened,ks_picked)
        idx,_= vq(whitened,centroids)
        for x in range(ks_picked):
            group1=[[int(i) for i in array_diagnal[idx==x,0]],[int(i) for i in array_diagnal[idx==x,1]]]
            out.append(group1)
        return out
analysis.py 文件源码 项目:data-analysis 作者: ymohanty 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def kmeans_numpy(d, headers, K, whiten=True):
    # assign to A the result of getting the data from your Data object
    A = d.get_data(headers)

    # assign to W the result of calling vq.whiten on A
    W = vq.whiten(A)

    # assign to codebook, bookerror the result of calling vq.kmeans with W and K
    codebook, bookerror = vq.kmeans(W, K)

    # assign to codes, error the result of calling vq.vq with W and the codebook
    codes, error = vq.vq(W, codebook)

    # return codebook, codes, and error
    return codebook, codes, error


# prep the k-means clustering algorithm by getting initial cluster means
train.py 文件源码 项目:GenefyHMM 作者: adit-39 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def vector_quantize(data_dict, vs, bins):
    codebooks = {}
    vq_data = {}
    for size in vs.keys():
        all_size_data = []
        for disease in vs[size]:
            all_size_data.extend(data_dict[disease])
        #whitened = sp.whiten(all_size_data)
        #codebooks[size] = sp.kmeans(whitened, bins)[0]
        codebooks[size] = sp.kmeans(np.asarray(all_size_data), bins)[0]
    pickle.dump(codebooks,open("all_codebooks.pkl","wb"))
    for dis in data_dict.keys():
        n = len(data_dict[dis])
        m = len(data_dict[dis][0])
        vq_data[dis] = map(str,sp.vq(np.reshape(data_dict[dis],(n,m)), codebooks[len(data_dict[dis][0])])[0])
    return vq_data
Simple_function.py 文件源码 项目:vapor 作者: mills-lab 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def k_means_cluster(data_list):
    if max(data_list[0])-min(data_list[0])>10 and max(data_list[1])-min(data_list[1])>10:
        array_diagnal=np.array([[data_list[0][x],data_list[1][x]] for x in range(len(data_list[0]))])
        ks = list(range(1,min([5,len(data_list[0])+1])))
        KMeans = [cluster.KMeans(n_clusters = i, init="k-means++").fit(array_diagnal) for i in ks]
        KMeans_predict=[cluster.KMeans(n_clusters = i, init="k-means++").fit_predict(array_diagnal) for i in ks]
        BIC=[]
        BIC_rec=[]
        for x in ks:
            if KMeans_predict[x-1].max()<x-1: continue
            else:
                BIC_i=compute_bic(KMeans[x-1],array_diagnal)
                if abs(BIC_i)<10**8:
                    BIC.append(BIC_i)
                    BIC_rec.append(x)
        #BIC = [compute_bic(kmeansi,array_diagnal) for kmeansi in KMeans]
        #ks_picked=ks[BIC.index(max(BIC))]
        ks_picked=BIC_rec[BIC.index(max(BIC))]
        if ks_picked==1:
            return [data_list]
        else:
            out=[]
            std_rec=[scipy.std(data_list[0]),scipy.std(data_list[1])]
            whitened = whiten(array_diagnal)
            centroids, distortion=kmeans(whitened,ks_picked)
            idx,_= vq(whitened,centroids)
            for x in range(ks_picked):
                group1=[[int(i) for i in array_diagnal[idx==x,0]],[int(i) for i in array_diagnal[idx==x,1]]]
                out.append(group1)
            return out
    else:
        return [data_list]
recognizer.py 文件源码 项目:speaker_recognizer_robot 作者: shrutiyer 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def get_mfcc_feat(self):
        # creating codebook with all models

        mfcc_feats = None

        for filename in glob.iglob('../data/voices/*.wav'):
            print filename
            (rate, sig) = wav.read(filename)

            # MFCC Features. Each row corresponds to MFCC for a frame
            mfcc_person = mfcc(sig.astype(np.float64), rate)

            if mfcc_feats is None:
                mfcc_feats = mfcc_person 
            else:
                mfcc_feats = np.concatenate((mfcc_feats, mfcc_person), axis=0)

        # Normalize the features
        whitened = whiten(mfcc_feats)
        self.codebook, labeled_obs = kmeans2(data=whitened, k=3)
pairdist.py 文件源码 项目:BioNLP-2016 作者: cambridgeltl 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def argparser():
    try:
        import argparse
    except ImportError:
        import compat.argparse as argparse

    ap=argparse.ArgumentParser()
    ap.add_argument('vectors', nargs=1, metavar='FILE', help='word vectors')
    ap.add_argument('-a', '--approximate', default=False, action='store_true',
                    help='filter by approximate similarity (with -t)')
    ap.add_argument('-i', '--min-index', default=0, type=int,
                    help='index of first word (default 0)')
    ap.add_argument('-M', '--metric', default=DEFAULT_METRIC, 
                    choices=sorted(metrics.keys()),
                    help='distance metric to apply')
    ap.add_argument('-n', '--normalize', default=False, action='store_true',
                    help='normalize vectors to unit length')
    ap.add_argument('-r', '--max-rank', metavar='INT', default=None, 
                    type=int, help='only consider r most frequent words')
    ap.add_argument('-t', '--threshold', metavar='FLOAT', default=None,
                    type=float, help='only output distances <= t')
    ap.add_argument('-T', '--tolerance', metavar='FLOAT', default=0.1,
                    type=float, help='approximation tolerace (with -a)')
    ap.add_argument('-w', '--whiten', default=False, action='store_true',
                    help='normalize features to unit variance ')
    ap.add_argument('-W', '--words',  default=False, action='store_true',
                    help='output words instead of indices')
    return ap
pairdist.py 文件源码 项目:BioNLP-2016 作者: cambridgeltl 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def process_options(args):    
    options = argparser().parse_args(args)

    if options.max_rank is not None and options.max_rank < 1:
        raise ValueError('max-rank must be >= 1')
    if options.threshold is not None and options.threshold < 0.0:
        raise ValueError('threshold must be >= 0')
    if options.tolerance is not None and options.tolerance < 0.0:
        raise ValueError('tolerance must be >= 0')
    if options.approximate and not options.threshold:
        raise ValueError('approximate only makes sense with a threshold')
    if options.approximate and options.metric != 'cosine':
        raise NotImplementedError('approximate only supported for cosine')

    wv = wvlib.load(options.vectors[0], max_rank=options.max_rank)

    if options.normalize:
        logging.info('normalize vectors to unit length')
        wv.normalize()

    words, vectors = wv.words(), wv.vectors()

    if options.whiten:
        # whitening should be implemented in wvlib to support together with
        # approximate similarity
        if options.approximate:
            raise NotImplemenedError
        logging.info('normalize features to unit variance')
        vectors = whiten(vectors)

    return words, vectors, wv, options
analysis.py 文件源码 项目:data-analysis 作者: ymohanty 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def kmeans(d, headers, K, metric, whiten=True, categories=None):
    '''Takes in a Data object, a set of headers, and the number of clusters to create
    Computes and returns the codebook, codes and representation errors.
    If given an Nx1 matrix of categories, it uses the category labels
    to calculate the initial cluster means.
    '''

    # assign to A the result getting the data given the headers
    try:
        A = d.get_data(headers)
    except AttributeError:
        A = d

    if whiten:
        W = vq.whiten(A)
    else:
        W = A

    codebook = kmeans_init(W, K, categories)

    # assign to codebook, codes, errors, the result of calling kmeans_algorithm with W and codebook
    codebook, codes, errors = kmeans_algorithm(W, codebook, metric)

    # return the codebook, codes, and representation error
    return codebook, codes, errors


# test function
compounds.py 文件源码 项目:deep_learning_chemical 作者: samocooper 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def cluster(matrix):
    whitened = whiten(matrix.todense())

    # for x in range(25, 40):
    #     means, distortion = kmeans(whitened, x)
    #     print distortion

    means, distortion = kmeans(whitened, 30)

    # pickle.dump(means, open('30means-' + sys.argv[1] + '.pkl', 'wb'))

    return means, distortion
icetracking.py 文件源码 项目:mht 作者: jonatanolofsson 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def kmeans_classify(features, shape, label=True, fill=False):
    """Run the k-means algorithm."""
    print("Starting kmeans")
    whitened = whiten(features)
    init = np.array((whitened.min(0), whitened.mean(0), whitened.max(0)))
    codebook, _ = kmeans(whitened, init)
    classified, _ = vq(whitened, codebook)
    print("Finished kmeans")
    return classified


问题


面经


文章

微信
公众号

扫码关注公众号