def plot_bar_mapq(self, fontsize=16, filename=None):
"""Plots bar plots of the MAPQ (quality) of alignments
.. plot::
:include-source:
from sequana import BAM, sequana_data
b = BAM(sequana_data('test.bam', "testing"))
b.plot_bar_mapq()
"""
df = self.get_mapq_as_df()
df.plot(kind='hist', bins=range(0,df.max().values[0]+1), legend=False,
grid=True, logy=True)
pylab.xlabel("MAPQ", fontsize=fontsize)
pylab.ylabel("Count", fontsize=fontsize)
try:
# This may raise issue on MAC platforms
pylab.tight_layout()
except:
pass
if filename:
pylab.savefig(filename)
python类hist()的实例源码
def hist_coverage(self, bins=100):
"""
.. plot::
:include-source:
from sequana import sequana_data, BAM
b = BAM(sequana_data("measles.fa.sorted.bam"))
b.hist_coverage()
"""
try: self.coverage
except: self.set_fast_stats()
pylab.hist(self.coverage, bins=bins)
pylab.xlabel("Coverage")
pylab.ylabel("Number of mapped bases")
pylab.grid()
def plotmatchdisthist(M, mas=True, nbins=100, doclf=True, color='b', **kwa):
import pylab as plt
if doclf:
plt.clf()
R = np.sqrt(M.dra_arcsec**2 + M.ddec_arcsec**2)
if mas:
R *= 1000.
rng = [0, M.rad*1000.]
else:
rng = [0, M.rad]
print 'Match distances: median', np.median(R), 'arcsec'
n,b,p = plt.hist(R, nbins, range=rng, histtype='step', color=color, **kwa)
if mas:
plt.xlabel('Match distance (mas)')
else:
plt.xlabel('Match distance (arcsec)')
plt.xlim(*rng)
return n,b,p
def plotHistPopScore(population, fitness=False):
""" Population score distribution histogram
Example:
>>> Interaction.plotHistPopScore(population)
:param population: population object (:class:`GPopulation.GPopulation`)
:param fitness: if True, the fitness score will be used, otherwise, the raw.
:rtype: None
"""
score_list = getPopScores(population, fitness)
n, bins, patches = pylab.hist(score_list, 50, facecolor='green', alpha=0.75, normed=1)
pylab.plot(bins, pylab.normpdf(bins, numpy.mean(score_list), numpy.std(score_list)), 'r--')
pylab.xlabel('Score')
pylab.ylabel('Frequency')
pylab.grid(True)
pylab.title("Plot of population score distribution")
pylab.show()
# -----------------------------------------------------------------
def plotHistPopScore(population, fitness=False):
""" Population score distribution histogram
Example:
>>> Interaction.plotHistPopScore(population)
:param population: population object (:class:`GPopulation.GPopulation`)
:param fitness: if True, the fitness score will be used, otherwise, the raw.
:rtype: None
"""
score_list = getPopScores(population, fitness)
n, bins, patches = pylab.hist(score_list, 50, facecolor='green', alpha=0.75, normed=1)
pylab.plot(bins, pylab.normpdf(bins, numpy.mean(score_list), numpy.std(score_list)), 'r--')
pylab.xlabel('Score')
pylab.ylabel('Frequency')
pylab.grid(True)
pylab.title("Plot of population score distribution")
pylab.show()
# -----------------------------------------------------------------
def DrawHist(pl, shs):
"""??????, shs: ??? array"""
shs = np.array(shs, dtype=float)
#print "mean: %.2f"%shs.mean()
shs = shs[np.isnan(shs) == False]
if len(shs)>0:
pl.figure
pl.hist(shs)
def ShowHitCount(shs):
#????
go_count = len(shs) - len(shs[np.isnan(shs)])
#???
if len(shs) != 0:
v = float(go_count)/ float(len(shs))
#print("trade rato:%.2f%%"%(v*100))
#?????
if go_count>0:
v = float(len(shs[shs>0]))/float(go_count)
#print("win rato: %.2f%%"%(v*100))
pl.show()
#ShowHitCount(shs)
def data_loop(self):
import pylab
fig = pylab.figure()
pylab.ion()
while True:
fig.clear()
#pylab.plot(self.t[np.where(self.on==0)])
hz = 1000000 / self.delta
pylab.hist(hz, 50, range=(800, 1200))
pylab.xlim(500, 1500)
pylab.ylim(0, 100)
self.delta = self.delta[:0]
fig.canvas.draw()
fig.canvas.flush_events()
def plotAgainstGFP_hist2d(self):
fig1 = pylab.figure(figsize = (20, 15))
print len(self.GFP)
for i in xrange(min(len(data.cat), 4)):
print len(self.GFP[self.categories == i])
vect = []
pylab.subplot(2,2,i+1)
pop = self.GFP[self.categories == i]
print "cat", i, "n pop", len(self.GFP[(self.categories == i) & (self.GFP > -np.log(12.5))])
H, xedges, yedges = np.histogram2d(self.angles[self.categories == i], self.GFP[self.categories == i], bins = 10)
hist = pylab.hist2d(self.GFP[self.categories == i], self.angles[self.categories == i], bins = 10, cmap = pylab.cm.Reds, normed = True)
pylab.clim(0.,0.035)
pylab.colorbar()
pylab.title(data.cat[i])
pylab.xlabel('GFP score')
pylab.ylabel('Angle (degree)')
pylab.xlim([-4.2, -1])
pylab.show()
def get_mapped_read_length(self):
"""Return dataframe with read length for each read
.. plot::
from pylab import hist
from sequana import sequana_data, BAM
b = BAM(sequana_data("test.bam"))
hist(b.get_mapped_read_length())
"""
read_length = [read.reference_length for read in self
if read.is_unmapped is False]
return read_length
def plot_gc_content(self, fontsize=16, ec="k", bins=100):
"""plot GC content histogram
:params bins: a value for the number of bins or an array (with a copy()
method)
:param ec: add black contour on the bars
.. plot::
:include-source:
from sequana import BAM, sequana_data
b = BAM(sequana_data('test.bam'))
b.plot_gc_content()
"""
data = self.get_gc_content()
try:
X = np.linspace(0, 100, bins)
except:
X = bins.copy()
pylab.hist(data, X, normed=True, ec=ec)
pylab.grid(True)
mu = pylab.mean(data)
sigma = pylab.std(data)
X = pylab.linspace(X.min(), X.max(), 100)
pylab.plot(X, pylab.normpdf(X, mu, sigma), lw=2, color="r", ls="--")
pylab.xlabel("GC content", fontsize=16)
def plotfitquality(H, xe, ye, A):
'''
H,xe,ye from plotalignment()
'''
import pylab as plt
xe /= 1000.
ye /= 1000.
xx = (xe[:-1] + xe[1:])/2.
yy = (ye[:-1] + ye[1:])/2.
XX,YY = np.meshgrid(xx, yy)
XX = XX.ravel()
YY = YY.ravel()
XY = np.vstack((XX,YY)).T
Mdist = np.sqrt(mahalanobis_distsq(XY, A.mu, A.C))
assert(len(H.ravel()) == len(Mdist))
mod = A.getModel(XX, YY)
R2 = XX**2 + YY**2
mod[R2 > (A.match.rad)**2] = 0.
mod *= (H.sum() / mod.sum())
plt.clf()
rng = (0, 7)
plt.hist(Mdist, 100, weights=H.ravel(), histtype='step', color='b', label='data', range=rng)
plt.hist(Mdist, 100, weights=mod, histtype='step', color='r', label='model', range=rng)
plt.xlabel('| Chi |')
plt.ylabel('Number of matches')
plt.title('Gaussian peak fit quality')
plt.legend(loc='upper right')
def histlog10(x, **kwargs):
import pylab as plt
I = (x > 0)
L = np.log10(x[I])
plt.clf()
plt.hist(L, **kwargs)
def ex3():
x = np.random.randn(1000)
# Boxplot
#plt.boxplot(x)
# Histogram
plt.hist(x)
plt.title("Mon Titre")
plt.show()
def check_pdf(df):
df_num = df.select_dtypes(include=[np.float, np.int])
for index in df_num.columns:
try:
if index in ['LOG_BULL_RETURN', 'LOG_BEAR_RETURN','RTISf', 'TOTAL_SCANNED_MESSAGES_DIFF', 'TOTAL_SENTIMENT_MESSAGES_DIFF']:
h = df_num[index][1:].sort_values().values
fit = s.norm.pdf(h, np.mean(h), np.std(h)) #this is a fitting indeed
P.plot(h,fit,'-o')
P.hist(h,normed=True) #use this to draw histogram of your data
P.title(index)
P.show()
# fig = sm.graphics.tsa.plot_acf(df_num[index][1:],lags=40)
# plt.title(index)
elif index in ['LOG_BULL_BEAR_RATIO']:
h = df_num[index][1:].sort_values().values
fit = s.norm.pdf(h, np.mean(h), np.std(h)) #this is a fitting indeed
P.plot(h,fit,'-o')
P.hist(h,normed=True) #use this to draw histogram of your data
P.title(index)
P.show()
else:
h = df_num[index][1:].sort_values().values
fit = s.norm.pdf(h, np.mean(h), np.std(h)) #this is a fitting indeed
P.plot(h,fit,'-o')
P.hist(h,normed=True) #use this to draw histogram of your data
P.title(index)
P.show()
except:
print index, "error"
# check autocorrelation
# provide a range of graphics which diagramatically show spikes for autocorrelation
def display_histogram_scipy(bench, mean, bins):
values = bench.get_values()
values = sorted(values)
if mean:
fit = stats.norm.pdf(values, bench.mean(), bench.stdev())
pylab.plot(values, fit, '-o', label='mean-stdev')
else:
fit = stats.norm.pdf(values, bench.mean(), bench.stdev())
pylab.plot(values, fit, '-o', label='mean-stdev')
plt.legend(loc='upper right', shadow=True, fontsize='x-large')
pylab.hist(values, bins=bins, normed=True)
pylab.show()
def layer(self, x, y):
""""""
#n = np.random.randn(1000)
#plt.hist(n, 100)
plt.plot(x, y, 'r')
#----------------------------------------------------------------------
def hist(self, *args, **kwargs):
pl.hist(*args, **kwargs)
def histogram(title, title_x, title_y,
x, bins_x):
"""
Plot a basic histogram.
"""
pylab.figure()
pylab.hist(x, bins_x)
pylab.xlabel(title_x)
pylab.ylabel(title_y)
pylab.title(title)
############################################################
def draw_sum_slices(hist, **kwargs):
return draw_slices(hist,func=np.sum, **kwargs)
def draw_max_slices(hist, **kwargs):
return draw_slices(hist,func=np.max, **kwargs)
def plot_distributions(sortedlist, title):
fit = stats.norm.pdf(sortedlist, np.mean(sortedlist), np.std(sortedlist)) #this is a fitting indeed
pl.title(title)
pl.plot(sortedlist,fit,'-o')
pl.hist(sortedlist,normed=True) #use this to draw histogram of your data
pl.savefig(title)
pl.show() #use may also need add this
def histplot(self, extradataA = [], extradataG = [], intensity = []):
pylab.figure(figsize = (25,8))
cat = ['NT, 500ng/mL DOX', 'DLG siRNA, 500ng/mL DOX', 'NuMA siRNA, 500ng/mL DOX', 'NT, 1ug/mL DOX']
pops = []
for i in xrange(3):
pylab.subplot(1,3,i+1)
pop = self.angles[(self.categories == i)]# & (self.GFP > -np.log(12.5))]# & (intensity == 'r')]
print "cat {0}, pop {1}, pop + GFP {2}".format(i, len(self.angles[self.categories == i]), len(pop))
pops.append(pop)
hist, binedges = np.histogram(pop, bins = 18)
pylab.tick_params(axis='both', which='major', labelsize=25)
pylab.plot(binedges[:-1], np.cumsum(hist)/1./len(pop), data.colors[i], label = data.cat[i], linewidth = 4)
if len(extradataA) > i:
print extradataA[i]
h, bins = np.histogram(extradataA[i], bins= 18)
hbis = h/1./len(extradataA[i])
x, y = [], []
for index in xrange(len(hbis)):
x.extend([bins[index], bins[index+1]])
y.extend([hbis[index], hbis[index]])
print x, y, len(x)
pylab.tick_params(axis='both', which='major', labelsize=25)
pylab.plot(bins[:-1], np.cumsum(h)/1./len(extradataA[i]), 'k', linewidth = 4)
pylab.xlabel("Angle (degre)", fontsize = 25)
#pylab.title(cat[i])
pylab.ylim([0., 1.2])
pylab.legend(loc = 2, prop = {'size' : 20})
for ip, p in enumerate(pops):
for ip2, p2 in enumerate(pops):
ksstat, kspval = scipy.stats.ks_2samp(p2, p)
print "#### cat{0} & cat{3} : ks Stat {1}, pvalue {2}".format(ip, ksstat, kspval, ip2)
pylab.show()
#pylab.savefig("{0}hist.png".format(dirpath, nbins, 2, randint(0,999), dirpath))
def plotAgainstGFP(self, extradataA = [], extradataG = [], intensity = [], seq = []):
fig1 = pylab.figure(figsize = (25, 10))
print len(self.GFP)
for i in xrange(min(len(data.cat), 3)):
print len(self.GFP[self.categories == i])
vect = []
pylab.subplot(1,3,i+1)
#pylab.hist(self.GFP[self.categories == i], bins = 20, color = data.colors[i])
pop = self.GFP[self.categories == i]
pylab.plot(self.GFP[self.categories == i], self.angles[self.categories == i], data.colors[i]+'o', markersize = 8)#, label = data.cat[i])
print "cat", i, "n pop", len(self.GFP[(self.categories == i) & (self.GFP > -np.log(12.5))])
x = np.linspace(np.min(self.GFP[self.categories == i]), np.percentile(self.GFP[self.categories == i], 80),40)
#fig1.canvas.mpl_connect('pick_event', onpick)
for j in x:
vect.append(np.median(self.angles[(self.GFP > j) & (self.categories == i)]))
pylab.plot([-4.5, -0.5], [vect[0], vect[0]], data.colors[i], label = "mediane de la population entiere", linewidth = 5)
print vect[0], vect[np.argmax(x > -np.log(12.5))]
pylab.plot([-np.log(12.5), -0.5], [vect[np.argmax(x > -np.log(12.5))] for k in [0,1]], data.colors[i], label = "mediane de la population de droite", linewidth = 5, ls = '--')
pylab.axvline(x = -np.log(12.5), color = 'm', ls = '--', linewidth = 3)
pylab.xlim([-4.5, -0.5])
pylab.legend(loc = 2, prop = {'size':17})
pylab.title(data.cat[i].split(',')[0], fontsize = 24)
pylab.xlabel('score GFP', fontsize = 20)
pylab.ylabel('Angle (degre)', fontsize = 20)
pylab.tick_params(axis='both', which='major', labelsize=20)
pylab.ylim([-5, 105])
##pylab.xscale('log')
pylab.show()
def bootstrap(self, nBoot, nbins = 20):
pops = np.zeros((nBoot, nbins))
#medianpop = [[] for i in data.cat]
pylab.figure(figsize = (20,14))
for i in xrange(3):
pylab.subplot(1,3,i+1)
#if i ==0:
#pylab.title("Bootstrap on medians", fontsize = 20.)
pop = self.angles[(self.categories == i)]# & (self.GFP > 2000)]
for index in xrange(nBoot):
newpop = np.random.choice(pop, size=len(pop), replace=True)
#medianpop[i].append(np.median(newpop))
newhist, binedges = np.histogram(newpop, bins = nbins)
pops[index,:] = newhist/1./len(pop)
#pylab.hist(medianpop[i], bins = nbins, label = "{2} median {0:.1f}, std {1:.1f}".format(np.median(medianpop[i]), np.std(medianpop[i]), data.cat[i]), color = data.colors[i], alpha =.2, normed = True)
meanpop = np.sum(pops, axis = 0)/1./nBoot
stdY = np.std(pops, axis = 0)
print "width", binedges[1] - binedges[0]
pylab.bar(binedges[:-1], meanpop, width = binedges[1] - binedges[0], label = "mean distribution", color = data.colors[i], alpha = 0.6)
pylab.fill_between((binedges[:-1]+binedges[1:])/2., meanpop-stdY, meanpop+stdY, alpha = 0.3)
pylab.legend()
pylab.title(data.cat[i])
pylab.xlabel("Angle(degree)", fontsize = 15)
pylab.ylim([-.01, 0.23])
pylab.savefig("/users/biocomp/frose/frose/Graphics/FINALRESULTS-diff-f3/distrib_nBootstrap{0}_bins{1}_GFPsup{2}_{3}.png".format(nBoot, nbins, 'all', randint(0,999)))
def draw_slices(hist, func=np.sum, **kwargs):
""" Draw horizontal and vertical slices through histogram """
from mpl_toolkits.axes_grid1 import make_axes_locatable
kwargs.setdefault('ls','-')
ax = plt.gca()
data = hist
# Slices
vslice = func(data,axis=0)
hslice = func(data,axis=1)
npix = np.array(data.shape)
#xlim,ylim = plt.array(zip([0,0],npix-1))
xlim = ax.get_xlim()
ylim = ax.get_ylim()
#extent = ax.get_extent()
#xlim =extent[:2]
#ylim = extent[2:]
# Bin centers
xbin = np.linspace(xlim[0],xlim[1],len(vslice))#+0.5
ybin = np.linspace(ylim[0],ylim[1],len(hslice))#+0.5
divider = make_axes_locatable(ax)
#gh2 = pywcsgrid2.GridHelperSimple(wcs=self.header, axis_nums=[2, 1])
hax = divider.append_axes("right", size=1.2, pad=0.05,sharey=ax,
axes_class=axes_divider.LocatableAxes)
hax.axis["left"].toggle(label=False, ticklabels=False)
#hax.plot(hslice, plt.arange(*ylim)+0.5,'-') # Bin center
hax.plot(hslice, ybin, **kwargs) # Bin center
hax.xaxis.set_major_locator(MaxNLocator(4,prune='both'))
hax.set_ylim(*ylim)
#gh1 = pywcsgrid2.GridHelperSimple(wcs=self.header, axis_nums=[0, 2])
vax = divider.append_axes("top", size=1.2, pad=0.05, sharex=ax,
axes_class=axes_divider.LocatableAxes)
vax.axis["bottom"].toggle(label=False, ticklabels=False)
vax.plot(xbin, vslice, **kwargs)
vax.yaxis.set_major_locator(MaxNLocator(4,prune='lower'))
vax.set_xlim(*xlim)
return vax,hax
def drawKernelHist(ax, kernel):
ext = kernel.extension
theta = kernel.theta
lon, lat = kernel.lon, kernel.lat
xmin,xmax = -5*ext,5*ext
ymin,ymax = -5*ext,5*ext,
x = np.linspace(xmin,xmax,100)+kernel.lon
y = np.linspace(ymin,ymax,100)+kernel.lat
xx,yy = np.meshgrid(x,y)
zz = kernel.pdf(xx,yy)
im = ax.imshow(zz)#,extent=[xmin,xmax,ymin,ymax])
hax,vax = draw_slices(ax,zz,color='k')
mc_lon,mc_lat = kernel.sample(1e5)
hist,xedges,yedges = np.histogram2d(mc_lon,mc_lat,bins=[len(x),len(y)],
range=[[x.min(),x.max()],[y.min(),y.max()]])
xbins,ybins = np.arange(hist.shape[0])+0.5,np.arange(hist.shape[1])+0.5
vzz = zz.sum(axis=0)
hzz = zz.sum(axis=1)
vmc = hist.sum(axis=0)
hmc = hist.sum(axis=1)
vscale = vzz.max()/vmc.max()
hscale = hzz.max()/hmc.max()
kwargs = dict(marker='.',ls='',color='r')
hax.errorbar(hmc*hscale, ybins, xerr=np.sqrt(hmc)*hscale,**kwargs)
vax.errorbar(xbins, vmc*vscale,yerr=np.sqrt(vmc)*vscale,**kwargs)
ax.set_ylim(0,len(y))
ax.set_xlim(0,len(x))
#try: ax.cax.colorbar(im)
#except: pylab.colorbar(im)
#a0 = np.array([0.,0.])
#a1 =kernel.a*np.array([np.sin(np.deg2rad(theta)),-np.cos(np.deg2rad(theta))])
#ax.plot([a0[0],a1[0]],[a0[1],a1[1]],'-ob')
#
#b0 = np.array([0.,0.])
#b1 =kernel.b*np.array([np.cos(np.radians(theta)),np.sin(np.radians(theta))])
#ax.plot([b0[0],b1[0]],[b0[1],b1[1]],'-or')
label_kwargs = dict(xy=(0.05,0.05),xycoords='axes fraction', xytext=(0, 0),
textcoords='offset points',ha='left', va='bottom',size=10,
bbox={'boxstyle':"round",'fc':'1'}, zorder=10)
norm = zz.sum() * (x[1]-x[0])**2
ax.annotate("Sum = %.2f"%norm,**label_kwargs)
#ax.set_xlabel(r'$\Delta$ LON (deg)')
#ax.set_ylabel(r'$\Delta$ LAT (deg)')
###################################################
def doit(csvfile):
sensordata=[]
timestamplist=[]
if 1:#with open(fname, 'rb') as csvfile:
for t in range(args.skip_lines): csvfile.readline()
reader = csv.reader(csvfile, delimiter=args.delimiter)
for a in reader:
if a==[]: continue # empty line
try:
values = [float(t.replace(',','.')) for t in a if t !='']
except Exception,e:
print a, e
continue
if args.columns:
values = [values[t] for t in args.columns]
if args.timestamps:
sensordata.append(values[1:])
timestamplist.append(values[0])
else:
sensordata.append(values)
if args.histogram:
import matplotlib.mlab as mlab
mu = mlab.np.average(sensordata)
sigma = max(abs(mlab.np.max(sensordata)- mu), abs(mlab.np.min(sensordata)- mu))
# the histogram of the data
n, bins, patches = pylab.hist(mlab.np.array(sensordata), 100, normed=True, facecolor='green', alpha=0.75)
pylab.grid()
pylab.show()
if args.output_file_name:
outfile = open(args.output_file_name,'w')
for line in sensordata:
outfile.write(args.output_delimiter.join([args.output_formatter % round(t*args.output_multiplier) for t in line])+'\n')
else:
if timestamplist!=[]: # data with timestamp
pylab.plot(timestamplist, sensordata, args.tick_mark)
pylab.xlabel('time')
else:
pylab.plot(sensordata, args.tick_mark)
pylab.xlabel('sample #')
pylab.title(csvfile.name)
if args.legend:
pylab.legend(args.legend)
pylab.grid()
pylab.show()