def plot_eigenspectrum(G, s, nvis, nhid):
with misc.gnumpy_conversion_check('allow'):
dim = G.shape[0]
d, Q = scipy.linalg.eigh(G)
d = d[::-1]
Q = Q[:, ::-1]
pts = np.unique(np.floor(np.logspace(0., np.log10(dim-1), 500)).astype(int)) - 1
cf = [fisher.correlation_fraction(Q[:, i], s, nvis, nhid) for i in pts]
pylab.figure()
pylab.subplot(2, 1, 1)
pylab.loglog(range(1, dim+1), d, 'b-', lw=2.)
pylab.xticks([])
pylab.yticks(fontsize='large')
pylab.subplot(2, 1, 2)
pylab.semilogx(pts+1, cf, 'r-', lw=2.)
pylab.xticks(fontsize='x-large')
pylab.yticks(fontsize='large')
python类semilogx()的实例源码
def plot_objfn(pos_term_info, log_Z_info, color, zoom=False, label=None):
assert np.all(pos_term_info.counts == log_Z_info.counts)
exact = not hasattr(log_Z_info, 'lower')
mean = pos_term_info.values - log_Z_info.mean
if not exact:
lower = pos_term_info.values - log_Z_info.upper
upper = pos_term_info.values - log_Z_info.lower
pylab.semilogx(pos_term_info.counts, mean, color=color, label=label)
if not exact:
pylab.errorbar(pos_term_info.counts, (lower+upper)/2., yerr=(upper-lower)/2., fmt='', ls='None', ecolor=color)
if zoom:
pylab.ylim(mean.max() - 50., mean.max() + 5.)
def plot_results(self, results, xloc, color, ls, label):
iter_counts = sorted(set([it for it, av in results.keys() if av == self.average]))
sorted_results = [results[it, self.average] for it in iter_counts]
avg = np.array([r.train_logprob() for r in sorted_results])
if hasattr(r, 'train_logprob_interval'):
lower = np.array([r.train_logprob_interval()[0] for r in sorted_results])
upper = np.array([r.train_logprob_interval()[1] for r in sorted_results])
if self.logscale:
plot_cmd = pylab.semilogx
else:
plot_cmd = pylab.plot
xloc = xloc[:len(avg)]
lw = 2.
if label not in self.labels:
plot_cmd(xloc, avg, color=color, ls=ls, lw=lw, label=label)
else:
plot_cmd(xloc, avg, color=color, ls=ls, lw=lw)
self.labels.add(label)
pylab.xticks(fontsize='xx-large')
pylab.yticks(fontsize='xx-large')
try:
pylab.errorbar(xloc, (lower+upper)/2., yerr=(upper-lower)/2., fmt='', ls='None', ecolor=color)
except:
pass
def get_required_coverage(self, M=0.01):
"""Return the required coverage to ensure the genome is covered
A general question is what should be the coverage to make sure
that e.g. E=99% of the genome is covered by at least a read.
The answer is:
.. math:: \log^{-1/(E-1)}
This equation is correct but have a limitation due to floating precision.
If one provides E=0.99, the answer is 4.6 but we are limited to a
maximum coverage of about 36 when one provides E=0.9999999999999999
after which E is rounded to 1 on most computers. Besides, it is no
convenient to enter all those numbers. A scientific notation would be better but
requires to work with :math:`M=1-E` instead of :math:`E`.
.. math:: \log^{-1/ - M}
So instead of asking the question what is the
requested fold coverage to have 99% of the genome covered, we ask the question what
is the requested fold coverage to have 1% of the genome not covered.
This allows us to use :math:`M` values as low as 1e-300 that is a fold coverage
as high as 690.
:param float M: this is the fraction of the genome not covered by
any reads (e.g. 0.01 for 1%). See note above.
:return: the required fold coverage
.. plot::
import pylab
from sequana import Coverage
cover = Coverage()
misses = np.array([1e-1, 1e-2, 1e-3, 1e-4,1e-5,1e-6])
required_coverage = cover.get_required_coverage(misses)
pylab.semilogx(misses, required_coverage, 'o-')
pylab.ylabel("Required coverage", fontsize=16)
pylab.xlabel("Uncovered genome", fontsize=16)
pylab.grid()
# The inverse equation is required fold coverage = [log(-1/(E - 1))]
"""
# What should be the fold coverage to have 99% of the genome sequenced ?
# It is the same question as equating 1-e^{-(NL/G}) == 0.99, we need NL/G = 4.6
if isinstance(M, float) or isinstance(M, int):
assert M < 1
assert M >=0
else:
M = np.array(M)
# Here we do not use log(-1/(E-1)) but log(-1/(1-E-1)) to allow
# for using float down to 1e-300 since 0.999999999999999 == 1
return np.log(-1/(-M))