def numpy_groupby(values, keys):
""" Group a collection of numpy arrays by key arrays.
Yields (key_tuple, view_tuple) where key_tuple is the key grouped on and view_tuple is a tuple of views into the value arrays.
values: tuple of arrays to group
keys: tuple of sorted, numeric arrays to group by """
if len(values) == 0:
return
if len(values[0]) == 0:
return
for key_array in keys:
assert len(key_array) == len(keys[0])
for value_array in values:
assert len(value_array) == len(keys[0])
# The indices where any of the keys differ from the previous key become group boundaries
key_change_indices = np.logical_or.reduce(tuple(np.concatenate(([1], np.diff(key))) != 0 for key in keys))
group_starts = np.flatnonzero(key_change_indices)
group_ends = np.roll(group_starts, -1)
group_ends[-1] = len(keys[0])
for group_start, group_end in itertools.izip(group_starts, group_ends):
yield tuple(key[group_start] for key in keys), tuple(value[group_start:group_end] for value in values)
python类flatnonzero()的实例源码
def compute_clustering_accuracy(label1, label2):
"""
From clustering_on_transcript_compatibility_counts, see github for MIT license
"""
uniq1,uniq2 = np.unique(label1),np.unique(label2)
# Create two dictionaries. Each will store the indices of each label
entries1,entries2 = {},{}
for label in uniq1: entries1[label] = set(np.flatnonzero((label1==label)))
for label in uniq2: entries2[label] = set(np.flatnonzero((label2==label)))
# Create an intersection matrix which counts the number of entries that overlap for each label combination
W = np.zeros((len(uniq1),len(uniq2)))
for i,j in itertools.product(range(len(uniq1)),range(len(uniq2))):
W[i,j]=len(entries1[uniq1[i]].intersection(entries2[uniq2[j]]))
# find the max weight matching
match_val = get_max_wt_matching(uniq1,uniq2,W)
# return the error rate
return (1-match_val/float(len(label1)))*100
def visualize(X, Y, classes, samples_per_class=10):
nb_classes = len(classes)
for y, cls in enumerate(classes):
idxs = np.flatnonzero(Y == y)
idxs = np.random.choice(idxs, samples_per_class, replace=False)
for i, idx in enumerate(idxs):
plt_idx = i * nb_classes + y + 1
plt.subplot(samples_per_class, nb_classes, plt_idx)
plt.imshow(X[idx], cmap='gray')
plt.axis('off')
if i == 0:
plt.title(cls)
#plt.show()
plt.savefig('img/data.png')
plt.clf()
def brute_radius_search(self, v, radius2=None, batchsize=1024):
v = v.ravel()
norm2 = self.get_dataset('norm2')
v_norm2 = np.sum(v * v)
candidates = []
ids = self.ids[:]
for i in xrange(0, self.num_ids, batchsize):
blockdata = self.data[i:i+batchsize, :]
dists = norm2[i:i+batchsize] + v_norm2 - 2 * np.dot(blockdata, v)
assert dists.ndim == 1
if radius2:
for j in np.flatnonzero(dists < radius2):
candidates.append((dists[j], ids[i+j]))
else:
for j in xrange(dists.shape[0]):
candidates.append((dists[j], ids[i+j]))
candidates.sort()
return candidates
def points_in_front(self, points, inverted=False, ret_indices=False):
'''
Given an array of points, return the points which lie either on the
plane or in the half-space in front of it (i.e. in the direction of
the plane normal).
points: An array of points.
inverted: When `True`, invert the logic. Return the points that lie
behind the plane instead.
ret_indices: When `True`, return the indices instead of the points
themselves.
'''
sign = self.sign(points)
if inverted:
mask = np.less_equal(sign, 0)
else:
mask = np.greater_equal(sign, 0)
indices = np.flatnonzero(mask)
return indices if ret_indices else points[indices]
def has_approx_support(m, m_hat, prob=0.01):
"""Returns 1 if model selection error is less than or equal to prob rate,
0 else.
NOTE: why does np.nonzero/np.flatnonzero create so much problems?
"""
m_nz = np.flatnonzero(np.triu(m, 1))
m_hat_nz = np.flatnonzero(np.triu(m_hat, 1))
upper_diagonal_mask = np.flatnonzero(np.triu(np.ones(m.shape), 1))
not_m_nz = np.setdiff1d(upper_diagonal_mask, m_nz)
intersection = np.in1d(m_hat_nz, m_nz) # true positives
not_intersection = np.in1d(m_hat_nz, not_m_nz) # false positives
true_positive_rate = 0.0
if len(m_nz):
true_positive_rate = 1. * np.sum(intersection) / len(m_nz)
true_negative_rate = 1. - true_positive_rate
false_positive_rate = 0.0
if len(not_m_nz):
false_positive_rate = 1. * np.sum(not_intersection) / len(not_m_nz)
return int(np.less_equal(true_negative_rate + false_positive_rate, prob))
def get_clusters(C):
nonassigned = list(range(len(C)))
clusters = []
while nonassigned:
queue = set([nonassigned[0]])
clusters.append([])
while queue:
node = queue.pop()
clusters[-1].append(node)
nonassigned.remove(node)
queue.update(n for n in np.flatnonzero(C[node]) if n in nonassigned)
C = np.zeros_like(C)
for cluster in clusters:
for i in cluster:
C[i, cluster] = True
return clusters, C
def __init__(self, geom, allowed=None):
self._coords = []
geom = geom.supercell()
dist = geom.dist(geom)
radii = np.array([get_property(sp, 'covalent_radius') for sp in geom.species])
bondmatrix = dist < 1.3*(radii[None, :]+radii[:, None])
self.fragments, C = get_clusters(bondmatrix)
radii = np.array([get_property(sp, 'vdw_radius') for sp in geom.species])
shift = 0.
C_total = C.copy()
while not C_total.all():
bondmatrix |= ~C_total & (dist < radii[None, :]+radii[:, None]+shift)
C_total = get_clusters(bondmatrix)[1]
shift += 1.
for i, j in combinations(range(len(geom)), 2):
if bondmatrix[i, j]:
bond = Bond(i, j, C=C)
self.append(bond)
for j in range(len(geom)):
for i, k in combinations(np.flatnonzero(bondmatrix[j, :]), 2):
ang = Angle(i, j, k, C=C)
if ang.eval(geom.coords) > pi/4:
self.append(ang)
for bond in self.bonds:
self.extend(get_dihedrals([bond.i, bond.j], geom.coords, bondmatrix, C))
def pinv(A, log=lambda _: None):
U, D, V = np.linalg.svd(A)
thre = 1e3
thre_log = 1e8
gaps = D[:-1]/D[1:]
try:
n = np.flatnonzero(gaps > thre)[0]
except IndexError:
n = len(gaps)
else:
gap = gaps[n]
if gap < thre_log:
log('Pseudoinverse gap of only: {:.1e}'.format(gap))
D[n+1:] = 0
D[:n+1] = 1/D[:n+1]
return U.dot(np.diag(D)).dot(V)
def inverse(self, encoded, duration=None):
'''Inverse static tag transformation'''
ann = jams.Annotation(namespace=self.namespace, duration=duration)
if np.isrealobj(encoded):
detected = (encoded >= 0.5)
else:
detected = encoded
for vd in self.encoder.inverse_transform(np.atleast_2d(detected))[0]:
vid = np.flatnonzero(self.encoder.transform(np.atleast_2d(vd)))
ann.append(time=0,
duration=duration,
value=vd,
confidence=encoded[vid])
return ann
def quant2ind(U):
"""
Use quantization matrix to return slices of non-zero elements.
:param U: quantization matrix
:return: list of `slice`s for non-zero elements of U
.. note:: This function assumes that the pulsar TOAs were sorted by time.
"""
inds = []
for cc, col in enumerate(U.T):
epinds = np.flatnonzero(col)
if epinds[-1] - epinds[0] + 1 != len(epinds):
raise ValueError('ERROR: TOAs not sorted properly!')
inds.append(slice(epinds[0], epinds[-1]+1))
return inds
def find(a,n=None,d=None,nargout=1):
if d:
raise NotImplementedError
# there is no promise that nonzero or flatnonzero
# use or will use indexing of the argument without
# converting it to array first. So we use asarray
# instead of asanyarray
if nargout == 1:
i = np.flatnonzero(np.asarray(a)).reshape(1,-1)+1
if n is not None:
i = i.take(n)
return matlabarray(i)
if nargout == 2:
i,j = np.nonzero(np.asarray(a))
if n is not None:
i = i.take(n)
j = j.take(n)
return (matlabarray((i+1).reshape(-1,1)),
matlabarray((j+1).reshape(-1,1)))
raise NotImplementedError
def error_contour(self, phi, epsilon = 2.0, delta_phi = None):
"""Error between worm shape and contour from image given implicitly by phi
Arguments:
phi (array): implicit worm contour, if none calculated from actual shape
delta_phi (number): range of phi values to consider for error estimate
epsilon (number): refularization parameter for the step functions
curvature (bool): error due to curvature on phi
"""
phi2 = self.phi(size = phi.shape);
if delta_phi is None:
error = np.sum((np.tanh(phi / epsilon)-np.tanh(phi2 / epsilon))**2);
else:
idx = np.flatnonzero(np.logical_and(phi2 <= delta_phi, phi2 >= -delta_phi))
error =np.sum((np.tanh(phi[idx] / epsilon)-np.tanh(phi2[idx] / epsilon))**2);
return error;
def compute_EER(Pfa, Pmiss):
""" computes the equal error rate (EER) given
Pmiss or False Negative Rate
and
Pfa or False Positive Rate
calculated for a range of operating points on the DET curve
@Author: "Timothee Kheyrkhah, Omid Sadjadi"
"""
fpr = Pfa
fnr = Pmiss
diff_pm_fa = fnr - fpr
x1 = np.flatnonzero(diff_pm_fa >= 0)[0]
x2 = np.flatnonzero(diff_pm_fa < 0)[-1]
a = (fnr[x1] - fpr[x1]) / (fpr[x2] - fpr[x1] - (fnr[x2] - fnr[x1]))
return fnr[x1] + a * (fnr[x2] - fnr[x1])
def visual_data(self):
print "visualized some cifar-10 picture..."
classes = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
num_classes = len(classes)
samples_per_class = 7
for y, cl in enumerate(classes):
idxs = np.flatnonzero(self.y_train == y)
idxs = np.random.choice(idxs, samples_per_class, replace=False)
for i, idx in enumerate(idxs):
plt_idx = i * num_classes + y + 1
plt.subplot(samples_per_class, num_classes, plt_idx)
plt.imshow(self.X_train[idx].astype('uint8'))
plt.axis('off')
if i == 0:
plt.title(cl)
plt.show()
print "visualized data done...\n---------\n"
#
def imcrop_tosquare(img):
"""Make any image a square image.
Parameters
----------
img : np.ndarray
Input image to crop, assumed at least 2d.
Returns
-------
crop : np.ndarray
Cropped image.
"""
size = np.min(img.shape[:2])
extra = img.shape[:2] - size
crop = img
for i in np.flatnonzero(extra):
crop = np.take(crop, extra[i] // 2 + np.r_[:size], axis=i)
return crop
def imcrop_tosquare(img):
"""Make any image a square image.
Parameters
----------
img : np.ndarray
Input image to crop, assumed at least 2d.
Returns
-------
crop : np.ndarray
Cropped image.
"""
size = np.min(img.shape[:2])
extra = img.shape[:2] - size
crop = img
for i in np.flatnonzero(extra):
crop = np.take(crop, extra[i] // 2 + np.r_[:size], axis=i)
return crop
def boundary_nodes(fs):
"""Find the list of boundary nodes in fs. This is a
unit-square-specific solution. A more elegant solution would employ
the mesh topology and numbering.
"""
eps = 1.e-10
f = Function(fs)
def on_boundary(x):
"""Return 1 if on the boundary, 0. otherwise."""
if x[0] < eps or x[0] > 1 - eps or x[1] < eps or x[1] > 1 - eps:
return 1.
else:
return 0.
f.interpolate(on_boundary)
return np.flatnonzero(f.values)
def computeEndToEnd(endtoendDistances, polWeights):
# Initiate variables
weightedEndtoendSq=np.zeros(c.nBeads)
weightedEndtoendSqStd=np.zeros(c.nBeads)
# Loop over all possible polymer lengths
for z in range(c.nBeads):
# Get data
t1 = np.squeeze(np.asarray(endtoendDistances)[:,z])
t2 = np.squeeze(np.asarray(polWeights)[:,z]);
# Count non-Zero
dataLength = len( np.flatnonzero(t2!=0) );
# Calculate mean and standard deveation of mean
weightedEndtoendSq[z]=np.average(t1, weights=t2)
weightedEndtoendSqStd[z]=( (np.average((t1 - weightedEndtoendSq[z])**2, weights=t2)) / (dataLength))**(1/2)
return weightedEndtoendSq, weightedEndtoendSqStd
# Calculate gyradius and errors
def computeGyradiusStd(gyradiusSq, polWeights):
# Calc Weighted gyradius and standard deviation
# Initiate variables
weightedGyradiusSq=np.zeros(c.nBeads)
weightedGyradiusSqStd=np.zeros(c.nBeads)
# Loop over all possible polymer lengths
for z in range(c.nBeads):
# Get weight for bead number z in all polymers
w = np.squeeze(np.asarray(polWeights)[:,z])
# Count nonzero
dataLength = len( np.flatnonzero(w!=0) )
# Calculate mean and standard deveation of mean
weightedGyradiusSq[z]=np.average(gyradiusSq[:,z], weights=w)
weightedGyradiusSqStd[z]=( (np.average((gyradiusSq[:,z] - weightedGyradiusSq[z])**2, weights=w)) / (dataLength))**(1/2)
return weightedGyradiusSq, weightedGyradiusSqStd
def set_dropout(self):
n_nodes = self.n_nodes
dropout=np.random.binomial([np.ones(sum(n_nodes))],
self.DORATE)[0].astype(np.bool)
dropout[0:n_nodes[0]]=False # no dropout units in first layer
dropout[-n_nodes[-1]:]=False # no dropout units in last layer
self.dropout = dropout
# ths_d ???
self.ths_d = np.zeros(len(self.ths)).astype(np.bool)
for l in range(len(self.ths_l)):
if (l<len(self.ths_l)-1):
# input node? dropout ? ??
for i in np.flatnonzero(self.__get_dropout(l)):
self.ths_d[self.ths_l[l]+\
# ??? +1? bias ?? ??
np.arange(n_nodes[l+1])*(n_nodes[l]+1)+(i+1)] = True
# output node? dropout ? ??
for i in np.flatnonzero(self.__get_dropout(l+1)):
self.ths_d[self.ths_l[l]+i*(n_nodes[l]+1):\
self.ths_l[l]+(i+1)*(n_nodes[l]+1)] = True
def mapToNewPos(curposIDs, bigtosmall):
''' Convert list of old ids to new positions after bigtosmall reordering.
Example
-------
>>> curposIDs = [0, 2, 4]
>>> N = [11, 9, 3, 1, 5]
>>> bigtosmall = argsort_bigtosmall_stable(N)
>>> print bigtosmall
[0 1 4 2 3]
>>> newposIDs = mapToNewPos(curposIDs, bigtosmall)
>>> print newposIDs
[0, 3, 2]
'''
newposIDs = np.zeros_like(curposIDs)
for posID in range(len(curposIDs)):
newposIDs[posID] = np.flatnonzero(bigtosmall == curposIDs[posID])[0]
return newposIDs.tolist()
def findMergePairByUID(self, uidA, uidB):
''' Find which currently tracked merge pair contains desired uids.
Returns
-------
rowID : int
index of tracked merge quantities related to specific uid pair.
'''
assert hasattr(self, 'mUIDPairs')
rowID = np.flatnonzero(
np.logical_and(uidA == self.mUIDPairs[:, 0],
uidB == self.mUIDPairs[:, 1]))
if not rowID.size == 1:
raise ValueError(
'Bad search for correct merge UID pair.\n' + str(rowID))
rowID = rowID[0]
return rowID
def _discardAnyTrackedPairsThatOverlapWithAorB(self, uidA, uidB):
''' Update to discard remaining pairs that overlap uidA/uidB.
Post Condition
--------------
Attributes mUIDPairs and _MergeTerms dont have any more info
about other pairs (uidj,uidk) where where uidA or uidB are involved.
'''
if hasattr(self, 'mUIDPairs'):
mUIDPairs = self.mUIDPairs
# Remove any other pairs associated with kA or kB
keepRowIDs = ((mUIDPairs[:, 0] != uidA) *
(mUIDPairs[:, 1] != uidA) *
(mUIDPairs[:, 0] != uidB) *
(mUIDPairs[:, 1] != uidB))
keepRowIDs = np.flatnonzero(keepRowIDs)
self.setMergeUIDPairs(mUIDPairs[keepRowIDs])
# Remove any other pairs related to kA, kB
if self.hasMergeTerms():
for key, dims in self._MergeTerms._FieldDims.items():
mArr = getattr(self._MergeTerms, key)
if dims[0] == 'M':
mArr = mArr[keepRowIDs]
self._MergeTerms.setField(key, mArr, dims=dims)
def makePlanForEmptyComps(curSS, dtargetMinCount=0.01, **kwargs):
""" Create a Plan dict for any empty states.
Returns
-------
Plan : dict with either no fields, or two fields named
* candidateIDs
* candidateUIDs
Any "empty" Plan dict indicates that no empty comps exist.
"""
Nvec = curSS.getCountVec()
emptyIDs = np.flatnonzero(Nvec < dtargetMinCount)
if len(emptyIDs) == 0:
return dict()
Plan = dict(candidateIDs=emptyIDs.tolist(),
candidateUIDs=curSS.uIDs[emptyIDs].tolist(),
)
return Plan
def argsortBigToSmallByTiers(tierAScores, tierBScores):
''' Perform argsort, prioritizing first arr then second arr.
Example
-------
>>> AScores = [1, 1, 1, 0, 0]
>>> BScores = [6, 5, 4, 8, 7]
>>> print argsortBigToSmallByTiers(AScores, BScores)
[0 1 2 3 4]
>>> AScores = [1, 1, 1, 0, 0, -1, -1, -1]
>>> BScores = [6, 5, 4, 8, 7, 11, 1.5, -1.5]
>>> print argsortBigToSmallByTiers(AScores, BScores)
[0 1 2 3 4 5 6 7]
>>> print argsortBigToSmallByTiers(BScores, AScores)
[5 3 4 0 1 2 6 7]
'''
tierAScores = np.asarray(tierAScores, dtype=np.float64)
sortids = argsort_bigtosmall_stable(tierAScores)
limits = np.flatnonzero(np.diff(tierAScores[sortids]) != 0) + 1
sortids = sortids[
argsort_bigtosmall_stable(tierBScores, limits=limits)]
return sortids
def plot_sequence(seqID, Data, dimID=0, maxT=200):
Xseq = Data.X[Data.doc_range[seqID]:Data.doc_range[seqID + 1]]
Zseq = Data.TrueParams['Z'][
Data.doc_range[seqID]:Data.doc_range[
seqID +
1]]
Xseq = Xseq[:maxT, dimID] # Xseq is 1D after this statement!
Zseq = Zseq[:maxT]
# Plot X, colored by segments Z
changePts = np.flatnonzero(np.abs(np.diff(Zseq)))
changePts = np.hstack([0, changePts + 1])
for ii, loc in enumerate(changePts[:-1]):
nextloc = changePts[ii + 1]
ts = np.arange(loc, nextloc)
xseg = Xseq[loc:nextloc]
kseg = int(Zseq[loc])
color = GaussViz.Colors[kseg % len(GaussViz.Colors)]
pylab.plot(ts, xseg, '.-', color=color, markersize=8)
pylab.plot(
[nextloc - 1, nextloc], [Xseq[nextloc - 1], Xseq[nextloc]], 'k:')
pylab.ylim([-2, 14])
def calc_underprediction_scores_per_word(model, Data, LP=None, **kwargs):
''' Find scalar score for each vocab word. Larger => worse prediction.
'''
if LP is None:
LP = model.calc_local_params(Data)
DocWordFreq_emp = calcWordFreqPerDoc_empirical(Data)
DocWordFreq_model = calcWordFreqPerDoc_model(model, LP)
uError = np.maximum(DocWordFreq_emp - DocWordFreq_model, 0)
# For each word, identify set of relevant documents
DocWordMat = Data.to_sparse_docword_matrix().toarray()
score = np.zeros(Data.vocab_size)
# TODO: only consider words with many docs overall
for vID in xrange(Data.vocab_size):
countPerDoc = DocWordMat[:, vID]
typicalWordCount = np.median(countPerDoc[countPerDoc > 0])
candidateDocs = np.flatnonzero(countPerDoc > typicalWordCount)
if len(candidateDocs) < 10:
continue
score[vID] = np.mean(uError[candidateDocs, vID])
# Only give positive probability to words with above average score
score = score - np.mean(score)
score = np.maximum(score, 0)
score = score * score # make more peaked!
score /= score.sum()
return score
def _sample_target_GroupXData(Data, model, LP, **kwargs):
''' Draw sample subset of provided GroupXData dataset
'''
randstate = kwargs['randstate']
if not hasValidKey('targetCompID', kwargs):
raise NotImplementedError('TODO')
ktarget = kwargs['targetCompID']
targetProbThr = kwargs['targetCompFrac']
mask = LP['resp'][:, ktarget] > targetProbThr
objIDs = np.flatnonzero(mask)
if len(objIDs) < 2:
return None, dict()
randstate.shuffle(objIDs)
targetObjIDs = objIDs[:kwargs['targetMaxSize']]
TargetData = Data.select_subset_by_mask(atomMask=targetObjIDs,
doTrackFullSize=False)
TargetInfo = dict(ktarget=ktarget)
return TargetData, TargetInfo
def removeEmptyComps_SSandLP(self, SS, LP):
''' Remove all parameters related to empty components from SS and LP
Returns
--------
SS : bnpy SuffStatBag
LP : dict for local params
'''
badks = np.flatnonzero(SS.N[:-1] < 1)
# Remove in order, from largest index to smallest
for k in badks[::-1]:
SS.removeComp(k)
mask = LP['Z'] > k
LP['Z'][mask] -= 1
if 'resp' in LP:
del LP['resp']
return SS, LP