def deviation_plot(rp, variable_name, slope_cutoff=1, average_cutoff = 2.):
average_panel = rp.value_panel(variable_name, types=['average'])
average_panel = (average_panel.T - np.median(average_panel, axis=1)).T
average_panel.sort()
average_ranges = np.max(average_panel, axis=1) - np.min(average_panel, axis=1)
average_panel = average_panel[np.argsort(average_ranges)][::-1]
slope_panel = rp.value_panel(variable_name, types=['slope'])
slope_panel = (slope_panel.T - np.median(slope_panel, axis=1)).T
slope_panel.sort()
slope_ranges = np.max(slope_panel, axis=1) - np.min(slope_panel, axis=1)
slope_panel = slope_panel[np.argsort(slope_ranges)][::-1]
return _multiplot(rp.dataset, variable_name, slope_panel, average_panel,
left_vmin = -1.0*slope_cutoff, left_vmax = slope_cutoff,
right_vmin = -1.0*average_cutoff, right_vmax = average_cutoff)
python类argsort()的实例源码
def get_local_words(preds, vocab, NEs=[], k=50):
"""
given the word probabilities over many coordinates,
first normalize the probability of each word in different
locations to get a probability distribution, then compute
the entropy of the word's distribution over all coordinates
and return the words that are low entropy and are not
named entities.
"""
#normalize the probabilites of each vocab using entropy
normalized_preds = normalize(preds, norm='l1', axis=0)
entropies = stats.entropy(normalized_preds)
sorted_indices = np.argsort(entropies)
sorted_local_words = np.array(vocab)[sorted_indices].tolist()
filtered_local_words = []
NEset = set(NEs)
for word in sorted_local_words:
if word in NEset: continue
filtered_local_words.append(word)
return filtered_local_words[0:k]
def cr(self):
# Composite Reliability
composite = pd.DataFrame(0, index=np.arange(1), columns=self.latent)
for i in range(self.lenlatent):
block = self.data_[self.Variables['measurement']
[self.Variables['latent'] == self.latent[i]]]
p = len(block.columns)
if(p != 1):
cor_mat = np.cov(block.T)
evals, evecs = np.linalg.eig(cor_mat)
U, S, V = np.linalg.svd(cor_mat, full_matrices=False)
indices = np.argsort(evals)
indices = indices[::-1]
evecs = evecs[:, indices]
evals = evals[indices]
loadings = V[0, :] * np.sqrt(evals[0])
numerador = np.sum(abs(loadings))**2
denominador = numerador + (p - np.sum(loadings ** 2))
cr = numerador / denominador
composite[self.latent[i]] = cr
else:
composite[self.latent[i]] = 1
composite = composite.T
return(composite)
def _get_sorted_channels_(self, all_keys, pattern):
sub_list = [f for f in all_keys if pattern in f]
all_channels = [int(f.split(pattern)[1]) for f in sub_list]
idx = numpy.argsort(all_channels)
return sub_list, idx
def set_streams(self, stream_mode):
if stream_mode == 'single-file':
sources = []
to_write = []
count = 0
params = self.get_description()
my_file = h5py.File(self.file_name)
all_matches = [re.findall('\d+', u) for u in my_file.keys()]
all_streams = []
for m in all_matches:
if len(m) > 0:
all_streams += [int(m[0])]
idx = numpy.argsort(all_streams)
for i in xrange(len(all_streams)):
params['h5_key'] = my_file.keys()[idx[i]]
new_data = type(self)(self.file_name, params)
sources += [new_data]
to_write += ['We found the datafile %s with t_start %d and duration %d' %(new_data.file_name, new_data.t_start, new_data.duration)]
print_and_log(to_write, 'debug', logger)
return sources
elif stream_mode == 'multi-files':
return H5File.set_streams(stream_mode)
def set_streams(self, stream_mode):
if stream_mode == 'single-file':
sources = []
to_write = []
count = 0
params = self.get_description()
my_file = h5py.File(self.file_name)
all_matches = my_file.get('recordings').keys()
all_streams = []
for m in all_matches:
all_streams += [int(m)]
idx = numpy.argsort(all_streams)
for count in xrange(len(all_streams)):
params['recording_number'] = all_streams[idx[count]]
new_data = type(self)(self.file_name, params)
sources += [new_data]
to_write += ['We found the datafile %s with t_start %d and duration %d' %(new_data.file_name, new_data.t_start, new_data.duration)]
print_and_log(to_write, 'debug', logger)
return sources
elif stream_mode == 'multi-files':
return H5File.set_streams(stream_mode)
def rho_estimation(data, update=None, compute_rho=True, mratio=0.01):
N = len(data)
rho = numpy.zeros(N, dtype=numpy.float32)
if update is None:
dist = distancematrix(data)
didx = lambda i,j: i*N + j - i*(i+1)//2 - i - 1
nb_selec = max(5, int(mratio*N))
sdist = {}
if compute_rho:
for i in xrange(N):
indices = numpy.concatenate((didx(i, numpy.arange(i+1, N)), didx(numpy.arange(0, i-1), i)))
tmp = numpy.argsort(numpy.take(dist, indices))[:nb_selec]
sdist[i] = numpy.take(dist, numpy.take(indices, tmp))
rho[i] = numpy.mean(sdist[i])
else:
M = len(update[0])
nb_selec = max(5, int(mratio*M))
sdist = {}
for i in xrange(N):
dist = distancematrix(data[i].reshape(1, len(data[i])), update[0]).ravel()
all_dist = numpy.concatenate((dist, update[1][i]))
idx = numpy.argsort(all_dist)[:nb_selec]
sdist[i] = numpy.take(all_dist, idx)
rho[i] = numpy.mean(sdist[i])
return rho, dist, sdist, nb_selec
def update_data_plot(self):
reverse_sort = np.argsort(self.sort_idcs)
if len(self.inspect_points):
inspect = reverse_sort[np.array(sorted(self.inspect_points))]
data = numpy.vstack((np.ones(len(inspect))*(2*self.raw_lags[-1]-self.raw_lags[-2]), inspect+0.5)).T
self.inspect_markers.set_offsets(data)
self.inspect_markers.set_color(self.inspect_colors)
else:
self.inspect_markers.set_offsets([])
self.inspect_markers.set_color([])
self.ui.data_overview.draw_idle()
def eigenDecompose(self, X, K, normalize=True):
if (X.shape[1] >= X.shape[0]):
s,U = la.eigh(K)
else:
U, s, _ = la.svd(X, check_finite=False, full_matrices=False)
if (s.shape[0] < U.shape[1]): s = np.concatenate((s, np.zeros(U.shape[1]-s.shape[0]))) #note: can use low-rank formulas here
s=s**2
if normalize: s /= float(X.shape[1])
if (np.min(s) < -1e-10): raise Exception('Negative eigenvalues found')
s[s<0]=0
ind = np.argsort(s)[::-1]
U = U[:, ind]
s = s[ind]
return s,U
def threshold_from_predictions(y, y_pred, false_positive_margin=0, recall=1):
"""Determines a threshold for classifying examples as positive
Args:
y: labels
y_pred: scores from the classifier
recall: Threshold is set to classify at least this fraction of positive
labelled examples as positive
false_positive_margin: Threshold is set to acheive desired recall, and
then is extended to include an additional fraction of negative
labelled examples equal to false_positive_margin (This allows adding
a buffer to the threshold while maintaining a constant "cost")
"""
n_positive = np.count_nonzero(y)
n_negative = len(y) - n_positive
if n_positive == 0:
return np.max(y_pred)
if false_positive_margin == 0 and recall == 1:
return np.min(y_pred[y])
ind = np.argsort(y_pred)
y_pred_sorted = y_pred[ind]
y_sorted = y[ind]
so_far = [0, 0]
j = 0
for i in reversed(range(len(y_sorted))):
so_far[y_sorted[i]] += 1
if so_far[1] >= int(np.floor(recall * n_positive)):
j = i
break
so_far = [0, 0]
if false_positive_margin == 0:
return y_pred_sorted[j]
k = 0
for i in reversed(range(j)):
so_far[y_sorted[i]] += 1
if so_far[0] >= false_positive_margin * n_negative:
k = i
break
return y_pred_sorted[k]
def threshold_from_predictions(y, y_pred, false_positive_margin=0, recall=1):
"""Determines a threshold for classifying examples as positive
Args:
y: labels
y_pred: scores from the classifier
recall: Threshold is set to classify at least this fraction of positive
labelled examples as positive
false_positive_margin: Threshold is set to acheive desired recall, and
then is extended to include an additional fraction of negative
labelled examples equal to false_positive_margin (This allows adding
a buffer to the threshold while maintaining a constant "cost")
"""
n_positive = np.count_nonzero(y)
n_negative = len(y) - n_positive
if n_positive == 0:
return np.max(y_pred)
if false_positive_margin == 0 and recall == 1:
return np.min(y_pred[y])
ind = np.argsort(y_pred)
y_pred_sorted = y_pred[ind]
y_sorted = y[ind]
so_far = [0, 0]
j = 0
for i in reversed(range(len(y_sorted))):
so_far[y_sorted[i]] += 1
if so_far[1] >= int(np.floor(recall * n_positive)):
j = i
break
so_far = [0, 0]
if false_positive_margin == 0:
return y_pred_sorted[j]
k = 0
for i in reversed(range(j)):
so_far[y_sorted[i]] += 1
if so_far[0] >= false_positive_margin * n_negative:
k = i
break
return y_pred_sorted[k]
def threshold_from_predictions(y, y_pred, false_positive_margin=0, recall=1):
"""Determines a threshold for classifying examples as positive
Args:
y: labels
y_pred: scores from the classifier
recall: Threshold is set to classify at least this fraction of positive
labelled examples as positive
false_positive_margin: Threshold is set to acheive desired recall, and
then is extended to include an additional fraction of negative
labelled examples equal to false_positive_margin (This allows adding
a buffer to the threshold while maintaining a constant "cost")
"""
n_positive = np.count_nonzero(y)
n_negative = len(y) - n_positive
if n_positive == 0:
return np.max(y_pred)
if false_positive_margin == 0 and recall == 1:
return np.min(y_pred[y])
ind = np.argsort(y_pred)
y_pred_sorted = y_pred[ind]
y_sorted = y[ind]
so_far = [0, 0]
j = 0
for i in reversed(range(len(y_sorted))):
so_far[y_sorted[i]] += 1
if so_far[1] >= int(np.floor(recall * n_positive)):
j = i
break
so_far = [0, 0]
if false_positive_margin == 0:
return y_pred_sorted[j]
k = 0
for i in reversed(range(j)):
so_far[y_sorted[i]] += 1
if so_far[0] >= false_positive_margin * n_negative:
k = i
break
return y_pred_sorted[k]
def threshold_from_predictions(y, y_pred, false_positive_margin=0, recall=1):
"""Determines a threshold for classifying examples as positive
Args:
y: labels
y_pred: scores from the classifier
recall: Threshold is set to classify at least this fraction of positive
labelled examples as positive
false_positive_margin: Threshold is set to acheive desired recall, and
then is extended to include an additional fraction of negative
labelled examples equal to false_positive_margin (This allows adding
a buffer to the threshold while maintaining a constant "cost")
"""
n_positive = np.count_nonzero(y)
n_negative = len(y) - n_positive
if n_positive == 0:
return np.max(y_pred)
if false_positive_margin == 0 and recall == 1:
return np.min(y_pred[y])
ind = np.argsort(y_pred)
y_pred_sorted = y_pred[ind]
y_sorted = y[ind]
so_far = [0, 0]
j = 0
for i in reversed(range(len(y_sorted))):
so_far[y_sorted[i]] += 1
if so_far[1] >= int(np.floor(recall * n_positive)):
j = i
break
so_far = [0, 0]
if false_positive_margin == 0:
return y_pred_sorted[j]
k = 0
for i in reversed(range(j)):
so_far[y_sorted[i]] += 1
if so_far[0] >= false_positive_margin * n_negative:
k = i
break
return y_pred_sorted[k]
def relabel_by_size(labels):
""" Relabel clusters so they are sorted by number of members, descending.
Args: labels (np.array(int)): 1-based cluster labels """
order = np.argsort(np.argsort(-np.bincount(labels)))
return 1 + order[labels]
def adjust_pvalue_bh(p):
""" Multiple testing correction of p-values using the Benjamini-Hochberg procedure """
descending = np.argsort(p)[::-1]
# q = p * N / k where p = p-value, N = # tests, k = p-value rank
scale = float(len(p)) / np.arange(len(p), 0, -1)
q = np.minimum(1, np.minimum.accumulate(scale * p[descending]))
# Return to original order
return q[np.argsort(descending)]
def compute_readpairs_per_umi_threshold(reads, subsample_rate):
''' Compute a threshold above which the UMIs are unlikely to be PCR off-products.
reads (np.array(int)) - Read pairs for each UMI
subsample_rate (float) - Subsample reads to this fraction.
Returns threshold (int) - The RPPU threshold in the subsampled space '''
if len(np.unique(reads)) < 2:
print 'Skipping RPPU threshold calculation.'
return 1
print 'RPPU subsample rate: %0.4f' % subsample_rate
reads = np.random.binomial(reads, subsample_rate)
reads = reads[reads > 0]
if len(np.unique(reads)) < 2:
print 'Subsampling gave a degenerate distribution of RPPU. Skipping RPPU threshold calculation.'
return 1
new_n50 = tk_stats.NX(reads, 0.5)
print 'New N50: %d:' % new_n50
# Log-transform counts
log_reads = np.log(reads)
# Run K-Means. Reshape necessary because kmeans takes a matrix.
kmeans = sk_cluster.KMeans(2).fit(log_reads.reshape((-1,1)))
kmeans.predict(log_reads.reshape((-1,1)))
# Take the cluster with the smallest mean
min_cluster = np.argsort(np.ravel(kmeans.cluster_centers_))[0]
print 'RPPU component means: ' + str(list(iter(np.exp(kmeans.cluster_centers_))))
print 'RPPU component members: ' + str(np.bincount(kmeans.labels_))
# Take the max element in the min-cluster
threshold = np.max(reads[kmeans.labels_ == min_cluster])
return threshold
def rebalance(self):
"""
Rebalances the binary heap. Takes O(n log n) time to run.
Avoid using, when possible.
"""
# Sort array by priority
sorted_indices_by_priority = np.argsort(-self.pq_array[:,0])
self.pq_array = self.pq_array[sorted_indices_by_priority]
pq_indices = range(self.size)
# Create hash tables
self.pq_hash = dict(zip(pq_indices,self.pq_array[:,1]))
self.exp_hash = dict(zip(self.pq_array[:,1],pq_indices))
RankOrderedAutoencoder.py 文件源码
项目:rank-ordered-autoencoder
作者: paulbertens
项目源码
文件源码
阅读 33
收藏 0
点赞 0
评论 0
def rank_output(self):
self.output_ranks = np.argsort(self.output_raw, axis=1, kind='mergesort').ravel()[::-1].astype(np.int32)
def overlay_emojiface(probs):
if max(probs) > 0.8:
emotion = emotions[np.argmax(probs)]
return 'emoji/{}-{}.png'.format(emotion, emotion)
else:
index1, index2 = np.argsort(probs)[::-1][:2]
emotion1 = emotions[index1]
emotion2 = emotions[index2]
return 'emoji/{}-{}.png'.format(emotion1, emotion2)
def __call__(self, words, weights, vocabulary_max):
if len(words) < vocabulary_max * self.trigger_ratio:
return words, weights
if not isinstance(words, numpy.ndarray):
words = numpy.array(words)
# Tail optimization does not help with very large vocabularies
if len(words) > vocabulary_max * 2:
indices = numpy.argpartition(weights, len(weights) - vocabulary_max)
indices = indices[-vocabulary_max:]
words = words[indices]
weights = weights[indices]
return words, weights
# Vocabulary typically consists of these three parts:
# 1) the core - we found it's border - `core_end` - 15%
# 2) the body - 70%
# 3) the minor tail - 15%
# (1) and (3) are roughly the same size
# (3) can be safely discarded, (2) can be discarded with care,
# (1) shall never be discarded.
sorter = numpy.argsort(weights)[::-1]
weights = weights[sorter]
trend_start = int(len(weights) * 0.2)
trend_finish = int(len(weights) * 0.8)
z = numpy.polyfit(numpy.arange(trend_start, trend_finish),
numpy.log(weights[trend_start:trend_finish]),
1)
exp_z = numpy.exp(z[1] + z[0] * numpy.arange(len(weights)))
avg_error = numpy.abs(weights[trend_start:trend_finish] -
exp_z[trend_start:trend_finish]).mean()
tail_size = numpy.argmax((numpy.abs(weights - exp_z) < avg_error)[::-1])
weights = weights[:-tail_size][:vocabulary_max]
words = words[sorter[:-tail_size]][:vocabulary_max]
return words, weights