def gaus_posterior(dim, std0):
"""Initialise a posterior Gaussian distribution with a diagonal covariance.
Even though this is initialised with a diagonal covariance, a full
covariance will be learned, using a lower triangular Cholesky
parameterisation.
Parameters
----------
dim : tuple or list
the dimension of this distribution.
std0 : float
the initial (unoptimized) diagonal standard deviation of this
distribution.
Returns
-------
Q : tf.contrib.distributions.MultivariateNormalTriL
the initialised posterior Gaussian object.
Note
----
This will make tf.Variables on the randomly initialised mean and covariance
of the posterior. The initialisation of the mean is from a Normal with zero
mean, and ``std0`` standard deviation, and the initialisation of the (lower
triangular of the) covariance is from a gamma distribution with an alpha of
``std0`` and a beta of 1.
"""
o, i = dim
# Optimize only values in lower triangular
u, v = np.tril_indices(i)
indices = (u * i + v)[:, np.newaxis]
l0 = np.tile(np.eye(i), [o, 1, 1])[:, u, v].T
l0 = l0 * tf.random_gamma(alpha=std0, shape=l0.shape, seed=next(seedgen))
lflat = tf.Variable(l0, name="W_cov_q")
Lt = tf.transpose(tf.scatter_nd(indices, lflat, shape=(i * i, o)))
L = tf.reshape(Lt, (o, i, i))
mu_0 = tf.random_normal((o, i), stddev=std0, seed=next(seedgen))
mu = tf.Variable(mu_0, name="W_mu_q")
Q = MultivariateNormalTriL(mu, L)
return Q
#
# KL divergence calculations
#
python类tril_indices()的实例源码
def impute_missing_bins(hic_matrix, regions=None, per_chromosome=True, stat=np.ma.mean):
"""
Impute missing contacts in a Hi-C matrix.
For inter-chromosomal data uses the mean of all inter-chromosomal contacts,
for intra-chromosomal data uses the mean of intra-chromosomal counts at the corresponding diagonal.
:param hic_matrix: A square numpy array
:param regions: A list of :class:`~GenomicRegion`s - if omitted, will create a dummy list
:param per_chromosome: Do imputation on a per-chromosome basis (recommended)
:param stat: The aggregation statistic to be used for imputation, defaults to the mean.
"""
if regions is None:
for i in range(hic_matrix.shape[0]):
regions.append(GenomicRegion(chromosome='', start=i, end=i))
chr_bins = dict()
for i, region in enumerate(regions):
if region.chromosome not in chr_bins:
chr_bins[region.chromosome] = [i, i]
else:
chr_bins[region.chromosome][1] = i
n = len(regions)
if not hasattr(hic_matrix, "mask"):
hic_matrix = masked_matrix(hic_matrix)
imputed = hic_matrix.copy()
if per_chromosome:
for c_start, c_end in chr_bins.itervalues():
# Correcting intrachromoc_startmal contacts by mean contact count at each diagonal
for i in range(c_end - c_start):
ind = kth_diag_indices(c_end - c_start, -i)
diag = imputed[c_start:c_end, c_start:c_end][ind]
diag[diag.mask] = stat(diag)
imputed[c_start:c_end, c_start:c_end][ind] = diag
# Correcting interchromoc_startmal contacts by mean of all contact counts between
# each set of chromoc_startmes
for other_start, other_end in chr_bins.itervalues():
# Only correct upper triangle
if other_start <= c_start:
continue
inter = imputed[c_start:c_end, other_start:other_end]
inter[inter.mask] = stat(inter)
imputed[c_start:c_end, other_start:other_end] = inter
else:
for i in range(n):
diag = imputed[kth_diag_indices(n, -i)]
diag[diag.mask] = stat(diag)
imputed[kth_diag_indices(n, -i)] = diag
# Copying upper triangle to lower triangle
imputed[np.tril_indices(n)] = imputed.T[np.tril_indices(n)]
return imputed
def _plot(self, region=None, cax=None):
if region is None:
raise ValueError("Cannot plot triangle plot for whole genome.")
hm, sr = sub_matrix_regions(self.hic_matrix, self.regions, region)
hm[np.tril_indices(hm.shape[0])] = np.nan
# Remove part of matrix further away than max_dist
if self.max_dist is not None:
for i in range(hm.shape[0]):
i_region = sr[i]
for j in range(hm.shape[1]):
j_region = sr[j]
if j_region.start-i_region.end > self.max_dist:
hm[i, j] = np.nan
hm_masked = np.ma.MaskedArray(hm, mask=np.isnan(hm))
# prepare an array of the corner coordinates of the Hic-matrix
# Distances have to be scaled by sqrt(2), because the diagonals of the bins
# are sqrt(2)*len(bin_size)
sqrt2 = math.sqrt(2)
bin_coords = np.r_[[(x.start - 1) for x in sr], sr[-1].end]/sqrt2
X, Y = np.meshgrid(bin_coords, bin_coords)
# rotatate coordinate matrix 45 degrees
sin45 = math.sin(math.radians(45))
X_, Y_ = X*sin45 + Y*sin45, X*sin45 - Y*sin45
# shift x coords to correct start coordinate and center the diagonal directly on the
# x-axis
X_ -= X_[1, 0] - (sr[0].start - 1)
Y_ -= .5*np.min(Y_) + .5*np.max(Y_)
# create plot
self.hicmesh = self.ax.pcolormesh(X_, Y_, hm_masked, cmap=self.colormap, norm=self.norm)
# set limits and aspect ratio
#self.ax.set_aspect(aspect="equal")
ylim_max = 0.5*(region.end-region.start)
if self.max_dist is not None and self.max_dist/2 < ylim_max:
ylim_max = self.max_dist/2
self.ax.set_ylim(0, ylim_max)
# remove y ticks
self.ax.set_yticks([])
# hide background patch
self.ax.patch.set_visible(False)
if self.show_colorbar:
self.add_colorbar(cax)
def mixtureMV(self, mu, sig, ps):
"""LearningRules.mixtureMV
Sample from a multivariate gaussian mixture with \mu = means, \Sigma = cov matrix
"""
# print "%s.mixtureMV: Implement me" % (self.__class__.__name__)
# print "mixtureMV", "mu", mu.shape, "sig", sig.shape, "ps", ps.shape
mixcomps = self.mixcomps
d = self.dim
assert mixcomps == ps.shape[0]
assert not np.isnan(mu).any()
assert not np.isnan(sig).any()
assert not np.isnan(ps).any()
# check flat inputs
if mu.shape[1] == 1:
mu = mu.reshape((self.mixcomps, self.dim))
# mu_ = mu.reshape((mixcomps, d))
if sig.shape[1] == 1:
# S_diag = sig[:(mixcomps * self.dim)]
# S_triu = sig[(mixcomps * self.dim):]
S_diag = sig[:(mixcomps * d)].reshape((mixcomps, d))
S_triu = sig[(mixcomps * d):].reshape((mixcomps, -1))
S = []
for i in range(mixcomps):
S_ = np.diag(S_diag[i])
S_[np.triu_indices(d, 1)] = S_triu[i]
S_[np.tril_indices(d, -1)] = S_triu[i] # check if thats correct
S.append(S_)
sig = np.array(S)
# print "mixtureMV", "mu", mu.shape, "sig", sig.shape, "ps", ps.shape
# d = mu.shape[0]
compidx = self.mixture_sample_prior(ps)
mu_ = mu[compidx]
S_ = sig[compidx]
# print "compidx", compidx, "mu_", mu_, "S_", S_
y = np.random.multivariate_normal(mu_, S_)
return y
# mixture, mdn_loss, and softmax are taken from karpathy's MixtureDensityNets.py
def test_mixtureMV(args):
"""test_mixtureMV
Test mixtureMV() by sampling from dummy statistics mu, S, p
"""
mixcomps = 3
d = 2
mu = np.random.uniform(-1, 1, size = (mixcomps, d))
S = np.array([np.eye(d) * 0.01 for c in range(mixcomps)])
for s in S:
s += 0.05 * np.random.uniform(-1, 1, size = s.shape)
s[np.tril_indices(d, -1)] = s[np.triu_indices(d, 1)]
pi = np.ones((mixcomps, )) * 1.0/mixcomps
lr = LearningRules(ndim_out = d, dim = d)
lr.learnFORCEmdn_setup(mixcomps = mixcomps)
fig = plt.figure()
gs = gridspec.GridSpec(2, mixcomps)
for g in gs:
fig.add_subplot(g)
ax = fig.axes[0]
ax.plot(mu[:,0], mu[:,1], "ko", alpha = 0.5)
# ax.set_xlim((-1.1, 1.1))
# ax.set_ylim((-1.1, 1.1))
ax = fig.axes[0] # (2 * mixcomps) + c]
for i in range(1000):
y = lr.mixtureMV(mu, S, pi)
print "y", y
ax.plot([y[0]], [y[1]], "ro", alpha = 0.2, markersize = 3.0)
ax.set_aspect(1)
for c in range(mixcomps):
ax = fig.axes[mixcomps + c]
ax.imshow(S[c], cmap = plt.get_cmap('PiYG'))
fig.show()
plt.show()
print "mu", mu.shape, "S", S.shape
def updateScoreMat_wholeELBO(ScoreMat, curModel, SS, doAllPairs=0):
''' Calculate upper-tri matrix of exact ELBO gap for each candidate pair
Returns
---------
Mraw : 2D array, size K x K. Uppert tri entries carry content.
Mraw[j,k] gives the scalar ELBO gap for the potential merge of j,k
'''
K = SS.K
if doAllPairs:
AGap = curModel.allocModel.calcHardMergeGap_AllPairs(SS)
OGap = curModel.obsModel.calcHardMergeGap_AllPairs(SS)
ScoreMat = AGap + OGap
ScoreMat[np.tril_indices(SS.K)] = -np.inf
for k, uID in enumerate(SS.uIDs):
CountTracker[uID] = SS.getCountVec()[k]
nUpdated = SS.K * (SS.K - 1) / 2
else:
ScoreMat[np.tril_indices(SS.K)] = -np.inf
# Rescore only specific pairs that are positive
redoMask = ScoreMat > -1 * ELBO_GAP_ACCEPT_TOL
for k, uID in enumerate(SS.uIDs):
if CountTracker[uID] == 0:
# Always precompute for brand-new comps
redoMask[k, :] = 1
redoMask[:, k] = 1
else:
absDiff = np.abs(SS.getCountVec()[k] - CountTracker[uID])
percDiff = absDiff / (CountTracker[uID] + 1e-10)
if percDiff > 0.25:
redoMask[k, :] = 1
redoMask[:, k] = 1
CountTracker[uID] = SS.getCountVec()[k]
redoMask[np.tril_indices(SS.K)] = 0
aList, bList = np.unravel_index(np.flatnonzero(redoMask), (SS.K, SS.K))
if len(aList) > 0:
mPairIDs = zip(aList, bList)
AGap = curModel.allocModel.calcHardMergeGap_SpecificPairs(
SS, mPairIDs)
OGap = curModel.obsModel.calcHardMergeGap_SpecificPairs(
SS, mPairIDs)
ScoreMat[aList, bList] = AGap + OGap
nUpdated = len(aList)
MergeLogger.log('MERGE ScoreMat Updates: %d entries.' % (nUpdated),
level='debug')
return ScoreMat
def posthoc_tukey_hsd(x, g, alpha = 0.05):
'''Pairwise comparisons with TukeyHSD confidence intervals. This is a
convenience function to make statsmodels pairwise_tukeyhsd() method more
applicable for further use.
Parameters
----------
x : array_like or pandas Series object, 1d
An array, any object exposing the array interface, containing
the response variable. NaN values will cause an error. Please
handle manually.
g : array_like or pandas Series object, 1d
An array, any object exposing the array interface, containing
the groups' names. Can be string or integers.
alpha : float, optional
Significance level for the test. Default is 0.05.
Returns
-------
Numpy ndarray where 0 is False (not significant), 1 is True (significant),
and -1 is for diagonal elements.
Examples
--------
>>> x = [[1,2,3,4,5], [35,31,75,40,21], [10,6,9,6,1]]
>>> g = [['a'] * 5, ['b'] * 5, ['c'] * 5]
>>> sp.posthoc_tukey_hsd(np.concatenate(x), np.concatenate(g))
array([[-1, 1, 0],
[ 1, -1, 1],
[ 0, 1, -1]])
'''
result = pairwise_tukeyhsd(x, g, alpha=0.05)
groups = np.array(result.groupsunique, dtype=np.str)
groups_len = len(groups)
vs = np.arange(groups_len, dtype=np.int)[:,None].T.repeat(groups_len, axis=0)
for a in result.summary()[1:]:
a0 = str(a[0])
a1 = str(a[1])
a0i = np.where(groups == a0)[0][0]
a1i = np.where(groups == a1)[0][0]
vs[a0i, a1i] = 1 if str(a[5]) == 'True' else 0
vs = np.triu(vs)
np.fill_diagonal(vs, -1)
tri_lower = np.tril_indices(vs.shape[0], -1)
vs[tri_lower] = vs.T[tri_lower]
return vs