def onehot(self, data, min_length=None):
if min_length == None:
min_length = self.vocab_size
return np.bincount(data, minlength=min_length)
python类bincount()的实例源码
def test_sample_from_probs2_gof(size):
set_random_seed(size)
probs = np.exp(2 * np.random.random(size)).astype(np.float32)
counts = np.zeros(size, dtype=np.int32)
num_samples = 2000 * size
probs2 = np.tile(probs, (num_samples, 1))
samples = sample_from_probs2(probs2)
probs /= probs.sum() # Normalize afterwards.
counts = np.bincount(samples, minlength=size)
print(counts)
print(probs * num_samples)
gof = multinomial_goodness_of_fit(probs, counts, num_samples, plot=True)
assert 1e-2 < gof
def count_pairs(assignments, v1, v2, M):
"""Construct sufficient statistics for (v1, v2) pairs.
Args:
assignments: An _ x V assignment matrix with values in range(M).
v1, v2: Column ids of the assignments matrix.
M: The number of possible assignment bins.
Returns:
An M x M array of counts.
"""
assert v1 != v2
pairs = assignments[:, v1].astype(np.int32) * M + assignments[:, v2]
return np.bincount(pairs, minlength=M * M).reshape((M, M))
def _fast_hist(self, label_true, label_pred, n_class):
mask = (label_true >= 0) & (label_true < n_class)
hist = np.bincount(
n_class * label_true[mask].astype(int) +
label_pred[mask], minlength=n_class**2).reshape(n_class, n_class)
return hist
def relabel_by_size(labels):
""" Relabel clusters so they are sorted by number of members, descending.
Args: labels (np.array(int)): 1-based cluster labels """
order = np.argsort(np.argsort(-np.bincount(labels)))
return 1 + order[labels]
def get_cluster_sizes(clustering):
""" Returns a numpy array containing cell-counts for each cluster """
return np.bincount(clustering.clusters)[1:]
def add_many(self, elems):
self.active = True
elems = np.copy(elems).astype(np.int_)
elems[elems > self.max_value] = 1 + self.max_value
self.counts += np.bincount(elems, minlength=len(self.counts))
def get_cdna_mol_counts_per_gene(self, gene_index, remove_none_gene=True):
mol_genes = self.get_column('gene')
num_genes = len(gene_index.get_genes())
gene_counts = np.bincount(mol_genes, minlength=num_genes + 1)
if remove_none_gene:
gene_counts = gene_counts[:num_genes]
return gene_counts
1decision_tree_submit.py 文件源码
项目:Python-Machine-Learning-By-Example
作者: PacktPublishing
项目源码
文件源码
阅读 31
收藏 0
点赞 0
评论 0
def get_leaf(labels):
# Obtain the leaf as the majority of the labels
return np.bincount(labels).argmax()
def compute_class_frequencies(segment,num_classes):
if isinstance(segment,list):
segment = np.asarray(segment)
f = 1.0 * np.bincount(segment.reshape(-1,).astype(int),minlength=num_classes) / np.prod(segment.shape)
return f
def compute_centralvoxel_frequencies(segment,minlength):
if isinstance(segment,list):
segment = np.asarray(segment)
shape = segment.shape[-3:]
middle_coordinate = np.zeros(3,int)
for it_coordinate,coordinate in enumerate(shape):
if coordinate%2==0:
middle_coordinate[it_coordinate] = coordinate / 2 - 1
else:
middle_coordinate[it_coordinate] = coordinate/2
segment = segment.reshape((-1,) + shape)
f = 1.0 * np.bincount(segment[:,middle_coordinate[0],middle_coordinate[1],middle_coordinate[2]].reshape(-1,).astype(int),minlength=minlength) / np.prod(segment.shape[:-3])
return f
def get_class_distribution(self, subject_list):
class_frequencies = np.zeros(self.n_classes)
for subj in subject_list:
labels = subj.load_labels()
mask = subj.load_ROI_mask()
class_frequencies += np.bincount(labels.flatten().astype('int'), weights=mask.flatten(),
minlength=self.n_classes)
return class_frequencies
def get_class_weights(self,subject_list, mask_bool = True):
class_frequencies = np.zeros(self.n_classes)
for subj in subject_list:
labels = subj.load_labels()
if mask_bool == 'ROI':
mask = subj.load_ROI_mask()
class_frequencies += np.bincount(labels.flatten().astype('int'), weights=mask.flatten().astype('int'),
minlength=self.n_classes)
elif mask_bool == 'labels':
mask = np.zeros_like(labels)
mask[labels > 0] = 1
# print(np.bincount(labels.flatten().astype('int'), weights=mask.flatten().astype('int'),
# minlength=self.n_classes))
class_frequencies += np.bincount(labels.flatten().astype('int'), weights=mask.flatten().astype('int'),
minlength=self.n_classes+1)[1:]
else :
class_frequencies += np.bincount(labels.flatten().astype('int'),
minlength=self.n_classes)
class_frequencies = class_frequencies / np.sum(class_frequencies)
class_weight = np.sort(class_frequencies)[int(np.ceil(1.0*self.n_classes/2))] / class_frequencies
class_weight[np.where(class_frequencies == 0)[0]] = 0 #avoid infinit weight
return class_weight
def epoch_voting(Y, chunk_size):
Y_new = Y.copy()
for i in range(1+len(Y_new)/chunk_size):
epoch = Y_new[i*chunk_size:(i+1)*chunk_size]
if len(epoch) != 0: winner = np.bincount(epoch).argmax()
Y_new[i*chunk_size:(i+1)*chunk_size] = winner
return Y_new
def est_pmf(self, samples, normalize=True, eps=1e-10):
"""Estimate probability mass function from samples
:param np.ndarray samples: `(n_samples, len(self.nsoutdims))`
array of samples
:param bool normalize: True: Return normalized probability
estimates (default). False: Return integer outcome counts.
:returns: Estimated probabilities as ndarray `est_pmf` with
shape `self.nsoutdims`
`n_samples * est_pmf[i1, ..., ik]` provides the number of
occurences of outcome `(i1, ..., ik)` in `samples`.
"""
n_samples = samples.shape[0]
n_out = np.prod(self.nsoutdims)
if samples.ndim > 1:
samples = self.pack_samples(samples)
counts = np.bincount(samples, minlength=n_out)
assert counts.shape == (n_out,)
counts = counts.reshape(self.nsoutdims)
assert counts.sum() == n_samples
if normalize:
return counts / n_samples
else:
return counts
def fit(self, data):
"""
Run K-Means on data n_init times.
Parameters
----------
data: numpy array
Returns
-------
No value is returned.
Function sets the following two object params:
self.labels_
self.cluster_centers_
"""
data = np.array(data)
labels, cluster_centers = [], []
for i in range(self.n_init):
if not self.warm_start:
self.cluster_centers_ = None
self._global_covar_matrices = None
self._inv_covar_matrices = None
self._fit(data)
labels += [self.labels_]
cluster_centers += [self.cluster_centers_]
self.inertias_ += [self._inertia(data)]
self.log_likelihoods_ += [self.log_likelihood(data)]
best_idx = np.argmin(self.inertias_)
self.labels_ = labels[best_idx]
self.all_labels_ = labels
self.best_log_likelihood_ = self.log_likelihoods_[best_idx]
self.best_inertia_ = self.inertias_[best_idx]
self.cluster_centers_ = cluster_centers[best_idx]
if self.verbose == 1:
print('fit: n_clusters: {}, label bin count: {}'.format(self.n_clusters, np.bincount(self.labels_, minlength=self.n_clusters)))
def _document_frequency(X):
"""Count the number of non-zero values for each feature in sparse X."""
if sp.isspmatrix_csr(X):
return np.bincount(X.indices, minlength=X.shape[1])
else:
return np.diff(sp.csc_matrix(X, copy=False).indptr)