python类argsort()的实例源码

fisher_iris_visualization.py 文件源码 项目:blender-scripting 作者: njanakiev 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def PCA(data, num_components=None):
    # mean center the data
    data -= data.mean(axis=0)
    # calculate the covariance matrix
    R = np.cov(data, rowvar=False)
    # calculate eigenvectors & eigenvalues of the covariance matrix
    # use 'eigh' rather than 'eig' since R is symmetric,
    # the performance gain is substantial
    V, E = np.linalg.eigh(R)
    # sort eigenvalue in decreasing order
    idx = np.argsort(V)[::-1]
    E = E[:,idx]
    # sort eigenvectors according to same index
    V = V[idx]
    # select the first n eigenvectors (n is desired dimension
    # of rescaled data array, or dims_rescaled_data)
    E = E[:, :num_components]
    # carry out the transformation on the data using eigenvectors
    # and return the re-scaled data, eigenvalues, and eigenvectors
    return np.dot(E.T, data.T).T, V, E
spikesorting.py 文件源码 项目:NeoAnalysis 作者: neoanalysis 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def __SubDoWavelets(self,waveforms):
        scales = 4
        dimensions = 10
        nspk,ls = waveforms.shape
        cc = pywt.wavedec(waveforms,"haar",mode="symmetric",level=scales,axis=-1)
        cc = np.hstack(cc)

        sd = list()
        for i in range(ls):
            test_data = cc[:,i]
            thr_dist = np.std(test_data,ddof=1)*3
            thr_dist_min = np.mean(test_data)-thr_dist
            thr_dist_max = np.mean(test_data)+thr_dist
            aux = test_data[(test_data>thr_dist_min)&(test_data<thr_dist_max)]
            if aux.size > 10:
                sd.append(self.__test_ks(aux))
            else:
                sd.append(0)
        ind = np.argsort(sd)
        ind = ind[::-1]
        coeff = ind[:dimensions]

        waveletspk = cc[:,coeff]
        return waveletspk
pyrandom.py 文件源码 项目:HousePricePredictionKaggle 作者: Nuwantha 项目源码 文件源码 阅读 36 收藏 0 点赞 0 评论 0
def get_feature_importance(list_of_features):
    n_estimators=10000
    random_state=0
    n_jobs=4
    x_train=data_frame[list_of_features]
    y_train=data_frame.iloc[:,-1]
    feat_labels= data_frame.columns[1:]
    forest = BaggingRegressor(n_estimators=n_estimators,random_state=random_state,n_jobs=n_jobs) 
    forest.fit(x_train,y_train) 
    importances=forest.feature_importances_ 
    indices = np.argsort(importances)[::-1]


    for f in range(x_train.shape[1]):
        print("%2d) %-*s %f" % (f+1,30,feat_labels[indices[f]],
                                        importances[indices[f]]))


    plt.title("Feature Importance")
    plt.bar(range(x_train.shape[1]),importances[indices],color='lightblue',align='center')
    plt.xticks(range(x_train.shape[1]),feat_labels[indices],rotation=90)
    plt.xlim([-1,x_train.shape[1]])
    plt.tight_layout()
    plt.show()
gui.py 文件源码 项目:spyking-circus 作者: spyking-circus 项目源码 文件源码 阅读 40 收藏 0 点赞 0 评论 0
def update_sort_idcs(self):
        # The selected points are sorted before all the other points -- an easy
        # way to achieve this is to add the maximum score to their score
        if self.current_order == 0:
            score = self.score_x
        elif self.current_order == 1:
            score = self.score_y
        elif self.current_order == 2:
            score = self.score_z
        else:
            raise AssertionError(self.current_order)
        score = score.copy()
        if len(self.selected_points):
            score[np.array(sorted(self.selected_points))] += score.max()

        self.sort_idcs = np.argsort(score)
gpUtils.py 文件源码 项目:MKLMM 作者: omerwe 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def removeTopPCs(X, numRemovePCs):  
    t0 = time.time()
    X_mean = X.mean(axis=0)
    X -= X_mean
    XXT = symmetrize(blas.dsyrk(1.0, X, lower=0))
    s,U = la.eigh(XXT)
    if (np.min(s) < -1e-4): raise Exception('Negative eigenvalues found')
    s[s<0]=0
    ind = np.argsort(s)[::-1]
    U = U[:, ind]
    s = s[ind]
    s = np.sqrt(s)

    #remove null PCs
    ind = (s>1e-6)
    U = U[:, ind]
    s = s[ind]

    V = X.T.dot(U/s)    
    #print 'max diff:', np.max(((U*s).dot(V.T) - X)**2)
    X = (U[:, numRemovePCs:]*s[numRemovePCs:]).dot((V.T)[numRemovePCs:, :])
    X += X_mean

    return X
translate.py 文件源码 项目:dl4mt-multi 作者: nyu-dl 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def _translate(seq, f_init, f_next, trg_eos_idx, src_sel, trg_sel,
               k, cond_init_trg, normalize, n_best, **kwargs):
    sample, score = gen_sample(
        f_init, f_next, x=numpy.array(seq).reshape([len(seq), 1]),
        eos_idx=trg_eos_idx, src_selector=src_sel, trg_selector=trg_sel,
        k=k, maxlen=3*len(seq), stochastic=False, argmax=False,
        cond_init_trg=cond_init_trg, **kwargs)
    if normalize:
        lengths = numpy.array([len(s) for s in sample])
        score = score / lengths
    if n_best == 1:
        sidx = numpy.argmin(score)
    elif n_best > 1:
        sidx = numpy.argsort(score)[:n_best]
    else:
        raise ValueError('n_best cannot be negative!')
    return sample[sidx], score[sidx]
__init__.py 文件源码 项目:BMASS 作者: OSU-slatelab 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def closestNeighbor(query, embedding_array, normed=False, top_k=1):
    '''Gets the index of the closest neighbor of embedding_array
    to the query point.  Distance metric is cosine.

    SLOW. DO NOT USE THIS FOR RAPID COMPUTATION.
    '''
    embedding_array = numpy.array(embedding_array)
    if not normed:
        embedding_array = numpy.array([
            (embedding_array[i] / numpy.linalg.norm(embedding_array[i]))
                for i in range(embedding_array.shape[0])
        ])

    ## assuming embeddings are unit-normed by this point;
    ## norm(query) is a constant factor, so we can ignore it
    dists = numpy.array([
        numpy.dot(query, embedding_array[i])
            for i in range(embedding_array.shape[0])
    ])
    sorted_ixes = numpy.argsort(-1 * dists)
    return sorted_ixes[:top_k]
mapped_struct.py 文件源码 项目:sharedbuffers 作者: jampp 项目源码 文件源码 阅读 34 收藏 0 点赞 0 评论 0
def testMerge(self, dtype=dtype):
            testarray1 = range(1,101)
            testarray2 = range(5,106)
            a = numpy.empty((100,2), dtype=dtype)
            b = numpy.empty((100,2), dtype=dtype)
            merged = numpy.empty((200,2), dtype=dtype)
            incompatible1 = numpy.empty((200,3), dtype=dtype)
            incompatible2 = numpy.empty(200, dtype=dtype)
            a[:,0] = numpy.arange(1,101)
            a[:,1] = numpy.arange(2,102)
            b[:,0] = numpy.arange(5,105)
            b[:,1] = numpy.arange(6,106)
            ref = numpy.concatenate([a,b])
            ref = ref[numpy.argsort(ref[:,0])]
            self.assertEqual(mapped_struct.index_merge(a, b, merged), 200)
            self.assertTrue((merged == ref).all())
            self.assertRaises(ValueError, mapped_struct.index_merge, a, b, incompatible1)
            self.assertRaises(ValueError, mapped_struct.index_merge, a, incompatible1, merged)
            self.assertRaises(ValueError, mapped_struct.index_merge, a, b, incompatible2)
            self.assertRaises(ValueError, mapped_struct.index_merge, a, incompatible2, merged)
colormap.py 文件源码 项目:NeoAnalysis 作者: neoanalysis 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def __init__(self, pos, color, mode=None):
        """
        ===============     ==============================================================
        **Arguments:**
        pos                 Array of positions where each color is defined
        color               Array of RGBA colors.
                            Integer data types are interpreted as 0-255; float data types
                            are interpreted as 0.0-1.0
        mode                Array of color modes (ColorMap.RGB, HSV_POS, or HSV_NEG)
                            indicating the color space that should be used when
                            interpolating between stops. Note that the last mode value is
                            ignored. By default, the mode is entirely RGB.
        ===============     ==============================================================
        """
        self.pos = np.array(pos)
        order = np.argsort(self.pos)
        self.pos = self.pos[order]
        self.color = np.array(color)[order]
        if mode is None:
            mode = np.ones(len(pos))
        self.mode = mode
        self.stopsCache = {}
spikesorting.py 文件源码 项目:NeoAnalysis 作者: neoanalysis 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def __loadChnTimeWave(self,f,selectChan):
        times = list()
        waveforms = list()
        spk_startswith = "spike_{0}".format(selectChan)
        for chn_unit in f["spikes"].keys():
            if chn_unit.startswith(spk_startswith):
                time = f["spikes"][chn_unit]["times"].value
                waveform = f["spikes"][chn_unit]["waveforms"].value
                times.append(time)
                waveforms.append(waveform)
        if times:
            times = np.hstack(times)
            waveforms = np.vstack(waveforms)
            sort_index = np.argsort(times)
            waveforms = waveforms[sort_index]
            times = times[sort_index]
            return times,waveforms
        else:
            return None,None
colormap.py 文件源码 项目:NeoAnalysis 作者: neoanalysis 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def __init__(self, pos, color, mode=None):
        """
        ===============     ==============================================================
        **Arguments:**
        pos                 Array of positions where each color is defined
        color               Array of RGBA colors.
                            Integer data types are interpreted as 0-255; float data types
                            are interpreted as 0.0-1.0
        mode                Array of color modes (ColorMap.RGB, HSV_POS, or HSV_NEG)
                            indicating the color space that should be used when
                            interpolating between stops. Note that the last mode value is
                            ignored. By default, the mode is entirely RGB.
        ===============     ==============================================================
        """
        self.pos = np.array(pos)
        order = np.argsort(self.pos)
        self.pos = self.pos[order]
        self.color = np.array(color)[order]
        if mode is None:
            mode = np.ones(len(pos))
        self.mode = mode
        self.stopsCache = {}
spikesorting.py 文件源码 项目:NeoAnalysis 作者: neoanalysis 项目源码 文件源码 阅读 39 收藏 0 点赞 0 评论 0
def __loadChnTimeWave(self,f,selectChan):
        times = list()
        waveforms = list()
        spk_startswith = "spike_{0}".format(selectChan)
        for chn_unit in f["spikes"].keys():
            if chn_unit.startswith(spk_startswith):
                time = f["spikes"][chn_unit]["times"].value
                waveform = f["spikes"][chn_unit]["waveforms"].value
                times.append(time)
                waveforms.append(waveform)
        if times:
            times = np.hstack(times)
            waveforms = np.vstack(waveforms)
            sort_index = np.argsort(times)
            waveforms = waveforms[sort_index]
            times = times[sort_index]
            return times,waveforms
        else:
            return None,None
spikesorting.py 文件源码 项目:NeoAnalysis 作者: neoanalysis 项目源码 文件源码 阅读 48 收藏 0 点赞 0 评论 0
def __SubDoWavelets(self,waveforms):
        scales = 4
        dimensions = 10
        nspk,ls = waveforms.shape
        cc = pywt.wavedec(waveforms,"haar",mode="symmetric",level=scales,axis=-1)
        cc = np.hstack(cc)

        sd = list()
        for i in range(ls):
            test_data = cc[:,i]
            thr_dist = np.std(test_data,ddof=1)*3
            thr_dist_min = np.mean(test_data)-thr_dist
            thr_dist_max = np.mean(test_data)+thr_dist
            aux = test_data[(test_data>thr_dist_min)&(test_data<thr_dist_max)]
            if aux.size > 10:
                sd.append(self.__test_ks(aux))
            else:
                sd.append(0)
        ind = np.argsort(sd)
        ind = ind[::-1]
        coeff = ind[:dimensions]

        waveletspk = cc[:,coeff]
        return waveletspk
spikesorting.py 文件源码 项目:NeoAnalysis 作者: neoanalysis 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def __load_waveforms(self,selectChan,file_name):
        spk_startswith = "spike_{0}".format(selectChan)
        with hp.File(file_name,"r") as f:
            times = list()
            waveforms = list()
            for chn_unit in f["spikes"].keys():
                if chn_unit.startswith(spk_startswith):
                    tep_time = f["spikes"][chn_unit]["times"].value
                    waveform = f["spikes"][chn_unit]["waveforms"].value
                    times.append(tep_time)
                    waveforms.append(waveform)
            if times:
                times = np.hstack(times)
                waveforms = np.vstack(waveforms)
                sort_index = np.argsort(times)
                waveforms = waveforms[sort_index]
                return waveforms
            else:
                return None
a2_predict.py 文件源码 项目:text_classification 作者: brightmart 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def process_each_row_get_lable(row,vocabulary_index2word_label,vocabulary_word2index_label,result_list):
    """
    :param row: it is a list.length is number of labels. e.g. 2002
    :param vocabulary_index2word_label
    :param result_list
    :return: a lable
    """
    label_list=list(np.argsort(row))
    label_list.reverse()
    #print("label_list:",label_list) # a list,length is number of labels.
    for i,index in enumerate(label_list): # if index is not exists, and not _PAD,_END, then it is the label we want.
        #print(i,"index:",index)
        flag1=vocabulary_index2word_label[index] not in result_list
        flag2=index!=vocabulary_word2index_label[_PAD]
        flag3=index!=vocabulary_word2index_label[_END]
        if flag1 and flag2 and flag3:
            #print("going to return ")
            return vocabulary_index2word_label[index]

# write question id and labels to file system.
a2_predict_classification.py 文件源码 项目:text_classification 作者: brightmart 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def get_label_using_logits_batch(question_id_sublist, logits_batch, vocabulary_index2word_label, f, top_number=5):
    print("get_label_using_logits.shape:", np.array(logits_batch).shape) # (1, 128, 2002))
    for i, logits in enumerate(logits_batch):
        index_list = np.argsort(logits)[-top_number:]
        #print("index_list:",index_list)
        index_list = index_list[::-1]
        label_list = []
        for index in index_list:
            #print("index:",index)
            label = vocabulary_index2word_label[index]
            label_list.append(
                label)  # ('get_label_using_logits.label_list:', [u'-3423450385060590478', u'2838091149470021485', u'-3174907002942471215', u'-1812694399780494968', u'6815248286057533876'])
        # print("get_label_using_logits.label_list",label_list)
        write_question_id_with_labels(question_id_sublist[i], label_list, f)
    f.flush()

# get label using logits
a2_predict_classification.py 文件源码 项目:text_classification 作者: brightmart 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def process_each_row_get_lable(row,vocabulary_index2word_label,vocabulary_word2index_label,result_list):
    """
    :param row: it is a list.length is number of labels. e.g. 2002
    :param vocabulary_index2word_label
    :param result_list
    :return: a lable
    """
    label_list=list(np.argsort(row))
    label_list.reverse()
    #print("label_list:",label_list) # a list,length is number of labels.
    for i,index in enumerate(label_list): # if index is not exists, and not _PAD,_END, then it is the label we want.
        #print(i,"index:",index)
        flag1=vocabulary_index2word_label[index] not in result_list
        flag2=index!=vocabulary_word2index_label[_PAD]
        flag3=index!=vocabulary_word2index_label[_END]
        if flag1 and flag2 and flag3:
            #print("going to return ")
            return vocabulary_index2word_label[index]

# write question id and labels to file system.
a1_seq2seq_attention_predict.py 文件源码 项目:text_classification 作者: brightmart 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def process_each_row_get_lable(row,vocabulary_index2word_label,vocabulary_word2index_label,result_list):
    """
    :param row: it is a list.length is number of labels. e.g. 2002
    :param vocabulary_index2word_label
    :param result_list
    :return: a lable
    """
    label_list=list(np.argsort(row))
    label_list.reverse()
    #print("label_list:",label_list) # a list,length is number of labels.
    for i,index in enumerate(label_list): # if index is not exists, and not _PAD,_END, then it is the label we want.
        #print(i,"index:",index)
        flag1=vocabulary_index2word_label[index] not in result_list
        flag2=index!=vocabulary_word2index_label[_PAD]
        flag3=index!=vocabulary_word2index_label[_END]
        if flag1 and flag2 and flag3:
            #print("going to return ")
            return vocabulary_index2word_label[index]
solution_classes.py 文件源码 项目:risk-slim 作者: ustunb 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def filter_sort_unique(self, max_objval=float('Inf')):
        # filter
        if max_objval < float('inf'):
            good_idx = self.objvals <= max_objval
            self.objvals = self.objvals[good_idx]
            self.solutions = self.solutions[good_idx]

        if len(self.objvals) > 0:
            sort_idx = np.argsort(self.objvals)
            self.objvals = self.objvals[sort_idx]
            self.solutions = self.solutions[sort_idx]

            # unique
            b = np.ascontiguousarray(self.solutions).view(
                np.dtype((np.void, self.solutions.dtype.itemsize * self.P)))
            _, unique_idx = np.unique(b, return_index=True)
            self.objvals = self.objvals[unique_idx]
            self.solutions = self.solutions[unique_idx]
lattice_cpa.py 文件源码 项目:risk-slim 作者: ustunb 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def round_solution_pool(pool, constraints):

    pool.distinct().sort()
    P = pool.P
    L0_reg_ind = np.isnan(constraints['coef_set'].C_0j)
    L0_max = constraints['L0_max']
    rounded_pool = SolutionPool(P)

    for solution in pool.solutions:
        # sort from largest to smallest coefficients
        feature_order = np.argsort([-abs(x) for x in solution])
        rounded_solution = np.zeros(shape=(1, P))
        l0_norm_count = 0
        for k in range(0, P):
            j = feature_order[k]
            if not L0_reg_ind[j]:
                rounded_solution[0, j] = np.round(solution[j], 0)
            elif l0_norm_count < L0_max:
                rounded_solution[0, j] = np.round(solution[j], 0)
                l0_norm_count += L0_reg_ind[j]

        rounded_pool.add(objvals=np.nan, solutions=rounded_solution)

    rounded_pool.distinct().sort()
    return rounded_pool
acc_listener.py 文件源码 项目:tfplus 作者: renmengye 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def listen(self, results):
        score_out = results['score_out']
        y_gt = results['y_gt']
        sort_idx = np.argsort(score_out, axis=-1)
        idx_gt = np.argmax(y_gt, axis=-1)
        correct = 0
        count = 0
        for kk, ii in enumerate(idx_gt):
            sort_idx_ = sort_idx[kk][::-1]
            for jj in sort_idx_[:self.top_k]:
                if ii == jj:
                    correct += 1
                    break
            count += 1
        # self.log.info('Correct {}/{}'.format(correct, count))
        self.correct += correct
        self.count += count
        self.step = int(results['step'])
        # self.log.info('Step {}'.format(self.step))
        pass
skipthoughts.py 文件源码 项目:TAC-GAN 作者: dashayushman 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def nn(model, text, vectors, query, k=5):
    """
    Return the nearest neighbour sentences to query
    text: list of sentences
    vectors: the corresponding representations for text
    query: a string to search
    """
    qf = encode(model, [query])
    qf /= norm(qf)
    scores = numpy.dot(qf, vectors.T).flatten()
    sorted_args = numpy.argsort(scores)[::-1]
    sentences = [text[a] for a in sorted_args[:k]]
    print('QUERY: ' + query)
    print('NEAREST: ')
    for i, s in enumerate(sentences):
        print(s, sorted_args[i])
skipthoughts.py 文件源码 项目:how_to_convert_text_to_images 作者: llSourcell 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def nn(model, text, vectors, query, k=5):
    """
    Return the nearest neighbour sentences to query
    text: list of sentences
    vectors: the corresponding representations for text
    query: a string to search
    """
    qf = encode(model, [query])
    qf /= norm(qf)
    scores = numpy.dot(qf, vectors.T).flatten()
    sorted_args = numpy.argsort(scores)[::-1]
    sentences = [text[a] for a in sorted_args[:k]]
    print 'QUERY: ' + query
    print 'NEAREST: '
    for i, s in enumerate(sentences):
        print s, sorted_args[i]
utils.py 文件源码 项目:sand-glyphs 作者: inconvergent 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def _spatial_sort(glyph):
  from scipy.spatial.distance import cdist
  from numpy import argsort
  from numpy import argmin

  curr = argmin(glyph[:,0])
  visited = set([curr])
  order = [curr]

  dd = cdist(glyph, glyph)

  while len(visited)<len(glyph):
    row = dd[curr,:]

    for i in argsort(row):
      if row[i]<=0.0 or i==curr or i in visited:
        continue
      order.append(i)
      visited.add(i)
      break
  glyph[:,:] = glyph[order,:]
eval_metrics.py 文件源码 项目:answer-triggering 作者: jiez-osu 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def label_ranking_reciprocal_rank(label,  # [sent_num]
                                  preds): # [sent_num]
  """ Calcualting the reciprocal rank according to definition,
  """
  rank = np.argsort(preds)[::-1]

  #pos_rank = np.take(rank, np.where(label == 1)[0])
  #return np.mean(1.0 / pos_rank)

  if_find = False 
  pos = 0
  for r in rank:
      pos += 1
      if label[r] == 1:
          first_pos_r = pos
          if_find = True
          break

  assert(if_find)

  return 1.0 / first_pos_r
sf_kmeans.py 文件源码 项目:kmeans-service 作者: MAYHEM-Lab 项目源码 文件源码 阅读 40 收藏 0 点赞 0 评论 0
def _matrix_inverse(self, matrix):
        """
        Computes inverse of a matrix.
        """
        matrix = np.array(matrix)
        n_features = matrix.shape[0]
        rank = np.linalg.matrix_rank(matrix)

        if rank == n_features:
            return np.linalg.inv(matrix)
        else:
            # Matrix is not full rank, so use Hadi's technique to compute inverse
            # Reference: Ali S. Hadi (1992) "Identifying Multiple Outliers in Multivariate Data" eg. 2.3, 2.4
            eigenValues, eigenVectors = np.linalg.eig(matrix)
            eigenValues = np.abs(eigenValues)  # to deal with -0 values
            idx = eigenValues.argsort()[::-1]
            eigenValues = eigenValues[idx]
            eigenVectors = eigenVectors[:, idx]

            s = eigenValues[eigenValues != 0].min()
            w = [1 / max(e, s) for e in eigenValues]
            W = w * np.eye(n_features)

            return eigenVectors.dot(W).dot(eigenVectors.T)
pre_sumstats.py 文件源码 项目:PleioPred 作者: yiminghu 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def get_1000G_snps(sumstats, out_file):
    sf = np.loadtxt(sumstats,dtype=str,skiprows=1)
    h5f = h5py.File('ref/Misc/1000G_SNP_info.h5','r')
    rf = h5f['snp_chr'][:]
    h5f.close()
    ind1 = np.in1d(sf[:,1],rf[:,2])
    ind2 = np.in1d(rf[:,2],sf[:,1])
    sf1 = sf[ind1]
    rf1 = rf[ind2]
    ### check order ###
    if sum(sf1[:,1]==rf1[:,2])==len(rf1[:,2]):
        print 'Good!'
    else:
        print 'Shit happens, sorting sf1 to have the same order as rf1'
        O1 = np.argsort(sf1[:,1])
        O2 = np.argsort(rf1[:,2])
        O3 = np.argsort(O2)
        sf1 = sf1[O1][O3]
    out = ['hg19chrc snpid a1 a2 bp or p'+'\n']
    for i in range(len(sf1[:,1])):
        out.append(sf1[:,0][i]+' '+sf1[:,1][i]+' '+sf1[:,2][i]+' '+sf1[:,3][i]+' '+rf1[:,1][i]+' '+sf1[:,5][i]+' '+sf1[:,6][i]+'\n')
    ff = open(out_file,"w")
    ff.writelines(out)
    ff.close()
vis_corex.py 文件源码 项目:LinearCorex 作者: gregversteeg 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def plot_heatmaps(data, mis, column_label, cont, topk=30, prefix=''):
    cmap = sns.cubehelix_palette(as_cmap=True, light=.9)
    m, nv = mis.shape
    for j in range(m):
        inds = np.argsort(- mis[j, :])[:topk]
        if len(inds) >= 2:
            plt.clf()
            order = np.argsort(cont[:,j])
            subdata = data[:, inds][order].T
            subdata -= np.nanmean(subdata, axis=1, keepdims=True)
            subdata /= np.nanstd(subdata, axis=1, keepdims=True)
            columns = [column_label[i] for i in inds]
            sns.heatmap(subdata, vmin=-3, vmax=3, cmap=cmap, yticklabels=columns, xticklabels=False, mask=np.isnan(subdata))
            filename = '{}/heatmaps/group_num={}.png'.format(prefix, j)
            if not os.path.exists(os.path.dirname(filename)):
                os.makedirs(os.path.dirname(filename))
            plt.title("Latent factor {}".format(j))
            plt.yticks(rotation=0)
            plt.savefig(filename, bbox_inches='tight')
            plt.close('all')
            #plot_rels(data[:, inds], map(lambda q: column_label[q], inds), colors=cont[:, j],
            #          outfile=prefix + '/relationships/group_num=' + str(j), latent=labels[:, j], alpha=0.1)
vis_corex.py 文件源码 项目:LinearCorex 作者: gregversteeg 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def plot_top_relationships(data, corex, labels, column_label, topk=5, prefix=''):
    dual = (corex.moments['X_i Y_j'] * corex.moments['X_i Z_j']).T
    alpha = dual > 0.04
    cy = corex.moments['ry']
    m, nv = alpha.shape
    for j in range(m):
        inds = np.where(alpha[j] > 0)[0]
        inds = inds[np.argsort(- dual[j][inds])][:topk]
        if len(inds) >= 2:
            if dual[j, inds[0]] > 0.1:
                factor = labels[:, j]
                title = '$Y_{%d}$' % j
            else:
                k = np.argmax(np.abs(cy[j]))
                if k == j:
                    k = np.argsort(-np.abs(cy[j]))[1]
                factor = corex.moments['X_i Z_j'][inds[0], j] * labels[:, j] + corex.moments['X_i Z_j'][inds[0], k] * labels[:, k]
                title = '$Y_{%d} + Y_{%d}$' % (j, k)
            plot_rels(data[:, inds], map(lambda q: column_label[q], inds), colors=factor,
                      outfile=prefix + '/relationships/group_num=' + str(j), title=title)
vis_corex.py 文件源码 项目:LinearCorex 作者: gregversteeg 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def trim(g, max_parents=False, max_children=False):
    for node in g:
        if max_parents:
            parents = list(g.successors(node))
            weights = [g.edge[node][parent]['weight'] for parent in parents]
            for weak_parent in np.argsort(weights)[:-max_parents]:
                g.remove_edge(node, parents[weak_parent])
        if max_children:
            children = g.predecessors(node)
            weights = [g.edge[child][node]['weight'] for child in children]
            for weak_child in np.argsort(weights)[:-max_children]:
                g.remove_edge(children[weak_child], node)
    return g


# Misc. utilities


问题


面经


文章

微信
公众号

扫码关注公众号