def plot_heatmap():
data = load_dispersion_data()
linkage = data["linkage"]
sns.set_context("notebook", font_scale=1.25)
p = sns.clustermap(data=data["dispersion"],
row_linkage=linkage,
col_linkage=linkage,
vmin=0.50,
vmax=1.00,
cmap=cmap_clustermap,
figsize=(12, 10))
labels = p.data2d.columns
# Sanity check, make sure the plotted dendrogram matches the saved values
assert((labels == data["dendrogram_order"]).all())
python类clustermap()的实例源码
def process(self, obj_data):
'''
Produces a cluster map and stores the linkage results.
@param obj_data: Data wrapper
'''
import seaborn as sns
data = obj_data.getResults()[self.obj_name]
linkage = sp.cluster.hierarchy.linkage(data, method='average')
plt.figure()
g = sns.clustermap(data, col_linkage = linkage, row_linkage=linkage)
for item in g.ax_heatmap.get_yticklabels():
item.set_rotation(0)
plt.figure()
sp.cluster.hierarchy.dendrogram(linkage)
obj_data.addResult(self.str_description, linkage)
def print_heatmap( points,label,id_map ):
'''
points: N_samples * N_features
label: (int) N_samples
id_map: map label id to its name
'''
# = sns.color_palette("RdBu_r", max(label)+1)
#cNorm = colors.Normalize(vmin=0,vmax=max(label)) #normalise the colormap
#scalarMap = cm.ScalarMappable(norm=cNorm,cmap='Paired') #map numbers to colors
index = [id_map[i] for i in label]
df = DataFrame(
points,
columns = list(range(points.shape[1])),
index = index
)
row_color = [current_palette[i] for i in label]
cmap = sns.cubehelix_palette(as_cmap=True, rot=-.3, light=1)
g = sns.clustermap( df,cmap=cmap,row_colors=row_color,col_cluster=False,xticklabels=False,yticklabels=False) #,standard_scale=1 )
return g.fig
def plot_matrix2(self, labels=None, **kwargs):
""" Plot distance matrix and dendrogram using seaborn. This package
needs to be installed manually.
Parameters
----------
kwargs dict
Keyword arguments to be passed to seaborn.clustermap. See
http://seaborn.pydata.org/generated/seaborn.clustermap.html
Returns
-------
seaborn.clustermap
"""
try:
import seaborn as sns
except:
raise ImportError('Need seaborn package installed.')
cg = sns.clustermap(
self.mat, row_linkage=self.linkage, col_linkage=self.linkage, **kwargs)
# Rotate labels
plt.setp(cg.ax_heatmap.xaxis.get_majorticklabels(), rotation=90)
plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)
# Make labels smaller
plt.setp(cg.ax_heatmap.xaxis.get_majorticklabels(), fontsize=4)
plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(), fontsize=4)
# Increase padding
cg.fig.subplots_adjust(right=.8, top=.95, bottom=.2)
module_logger.info(
'Use matplotlib.pyplot.show() to render figure.')
return cg
def plot_clustermap(sequences, title, plotpath, size=300, dpi=200):
"""
Plot a clustermap of the given sequences
size -- Downsample to this many sequences
title -- plot title
Return the number of clusters.
"""
logger.info('Clustering %d sequences (downsampled to at most %d)', len(sequences), size)
sequences = downsampled(sequences, size)
df, linkage, clusters = cluster_sequences(sequences)
palette = sns.color_palette([(0.15, 0.15, 0.15)])
palette += sns.color_palette('Spectral', n_colors=max(clusters), desat=0.9)
row_colors = [ palette[cluster_id] for cluster_id in clusters ]
cm = sns.clustermap(df,
row_linkage=linkage,
col_linkage=linkage,
row_colors=row_colors,
linewidths=None,
linecolor='none',
figsize=(210/25.4, 210/25.4),
cmap='Blues',
xticklabels=False,
yticklabels=False
)
if title is not None:
cm.fig.suptitle(title)
cm.savefig(plotpath, dpi=dpi)
# free the memory used by the plot
import matplotlib.pyplot as plt
plt.close('all')
return len(set(clusters))
two_sigma_financial_modelling.py 文件源码
项目:PortfolioTimeSeriesAnalysis
作者: MizioAnd
项目源码
文件源码
阅读 23
收藏 0
点赞 0
评论 0
def dendrogram(df, number_of_clusters=int(df.shape[1] / 1.2)):
# Create Dendrogram
agglomerated_features = FeatureAgglomeration(n_clusters=number_of_clusters)
used_networks = np.arange(0, number_of_clusters, dtype=int)
# Create a custom palette to identify the networks
network_pal = sns.cubehelix_palette(len(used_networks),
light=.9, dark=.1, reverse=True,
start=1, rot=-2)
network_lut = dict(zip(map(str, df.columns), network_pal))
# Convert the palette to vectors that will be drawn on the side of the matrix
networks = df.columns.get_level_values(None)
network_colors = pd.Series(networks, index=df.columns).map(network_lut)
sns.set(font="monospace")
# Create custom colormap
cmap = sns.diverging_palette(h_neg=210, h_pos=350, s=90, l=30, as_cmap=True)
cg = sns.clustermap(df.astype(float).corr(), cmap=cmap, linewidths=.5, row_colors=network_colors,
col_colors=network_colors)
plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)
plt.setp(cg.ax_heatmap.xaxis.get_majorticklabels(), rotation=90)
plt.show()
def __init__(self, path, games, logger, suffix):
super(WordCoocurence, self).__init__(path, self.__class__.__name__, suffix)
questions = []
word_counter = collections.Counter()
NO_WORDS_TO_DISPLAY = 50
for game in games:
# split questions into words
for q in game.questions:
questions.append(q)
q = re.sub('[?]', '', q)
words = re.findall(r'\w+', q)
for w in words:
word_counter[w.lower()] += 1
# compute word co-coocurrence
common_words = word_counter.most_common(NO_WORDS_TO_DISPLAY)
common_words = [pair[0] for pair in common_words]
corrmat = np.zeros((NO_WORDS_TO_DISPLAY, NO_WORDS_TO_DISPLAY))
# compute the correlation matrices
for i, question in enumerate(questions):
for word in question:
if word in common_words:
for other_word in question:
if other_word in common_words:
if word != other_word:
corrmat[common_words.index(word)][common_words.index(other_word)] += 1.
# Display the cor matrix
df = pd.DataFrame(data=corrmat, index=common_words, columns=common_words)
f = sns.clustermap(df, standard_scale=0, col_cluster=False, row_cluster=True, cbar_kws={"label": "co-occurence"})
f.ax_heatmap.xaxis.tick_top()
plt.setp(f.ax_heatmap.get_xticklabels(), rotation=90)
plt.setp(f.ax_heatmap.get_yticklabels(), rotation=0)
def dendrogram(df, number_of_clusters, agglomerated_feature_labels):
import seaborn as sns
# Todo: Create Dendrogram
# used networks are the labels occuring in agglomerated_features.labels_
# which corresponds to np.arange(0, number_of_clusters)
# number_of_clusters = int(df.shape[1] / 1.2)
# used_networks = np.arange(0, number_of_clusters, dtype=int)
used_networks = np.unique(agglomerated_feature_labels)
# used_networks = [1, 5, 6, 7, 8, 11, 12, 13, 16, 17]
# In our case all columns are clustered, which means used_columns is true in every element
# used_columns = (df.columns.get_level_values(None)
# .astype(int)
# .isin(used_networks))
# used_columns = (agglomerated_feature_labels.astype(int).isin(used_networks))
# df = df.loc[:, used_columns]
# Create a custom palette to identify the networks
network_pal = sns.cubehelix_palette(len(used_networks),
light=.9, dark=.1, reverse=True,
start=1, rot=-2)
network_lut = dict(zip(map(str, df.columns), network_pal))
# Convert the palette to vectors that will be drawn on the side of the matrix
networks = df.columns.get_level_values(None)
# networks = agglomerated_feature_labels
network_colors = pd.Series(networks, index=df.columns).map(network_lut)
# plt.figure()
# cg = sns.clustermap(df, metric="correlation")
# plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)
sns.set(font="monospace")
# Create custom colormap
cmap = sns.diverging_palette(h_neg=210, h_pos=350, s=90, l=30, as_cmap=True)
cg = sns.clustermap(df.astype(float).corr(), cmap=cmap, linewidths=.5, row_colors=network_colors,
col_colors=network_colors)
plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)
plt.setp(cg.ax_heatmap.xaxis.get_majorticklabels(), rotation=90)
# plt.xticks(rotation=90)
plt.show()
def plot_corrmat(in_csv, out_file=None):
import seaborn as sn
sn.set(style="whitegrid")
dataframe = pd.read_csv(in_csv, index_col=False, na_values='n/a', na_filter=False)
colnames = dataframe.columns.ravel().tolist()
for col in ['subject_id', 'site', 'modality']:
try:
colnames.remove(col)
except ValueError:
pass
# Correlation matrix
corr = dataframe[colnames].corr()
corr = corr.dropna((0,1), 'all')
# Generate a mask for the upper triangle
mask = np.zeros_like(corr, dtype=np.bool)
mask[np.triu_indices_from(mask)] = True
# Generate a custom diverging colormap
cmap = sn.diverging_palette(220, 10, as_cmap=True)
# Draw the heatmap with the mask and correct aspect ratio
corrplot = sn.clustermap(corr, cmap=cmap, center=0., method='average', square=True, linewidths=.5)
plt.setp(corrplot.ax_heatmap.yaxis.get_ticklabels(), rotation='horizontal')
# , mask=mask, square=True, linewidths=.5, cbar_kws={"shrink": .5})
if out_file is None:
out_file = 'corr_matrix.svg'
fname, ext = op.splitext(out_file)
if ext[1:] not in ['pdf', 'svg', 'png']:
ext = '.svg'
out_file = fname + '.svg'
corrplot.savefig(out_file, format=ext[1:], bbox_inches='tight', pad_inches=0, dpi=100)
return corrplot
def plot_clustermap(D, xticklabels=None, yticklabels=None):
import seaborn as sns
if xticklabels is None: xticklabels = range(D.shape[0])
if yticklabels is None: yticklabels = range(D.shape[1])
zmat = sns.clustermap(
D, yticklabels=yticklabels, xticklabels=xticklabels,
linewidths=0.2, cmap='BuGn')
plt.setp(zmat.ax_heatmap.get_yticklabels(), rotation=0)
plt.setp(zmat.ax_heatmap.get_xticklabels(), rotation=90)
return zmat
def heatmap_dendrogram(dataframe, outfile, options):
'''
Create a full clustered heatmap using Seaborn
'''
if options.corr:
sns_plot = sns.clustermap(dataframe, cmap="RdBu", linewidths=.3, method='complete', metric='euclidean')
else:
sns_plot = sns.clustermap(dataframe, cmap="RdBu", linewidths=.3)
sns.plt.setp(sns_plot.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)
sns.plt.suptitle(options.hlabel)
sns_plot.savefig(outfile, bbox_inches='tight', dpi=options.dpi)
def plot_clustermap(df):
# corr = df.corr()
# yticks = corr.index
# sns.clustermap(corr, 'yticklabels=yticks')
cg=sns.clustermap(df.corr())
# plt.yticks(rotation=0)
plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)
# plt.show()
def inspect_bulk(df, df_bulk, de_genes, de_genes_bulk):
"""
"""
quant_types = [("bitseq", df_bulk)]
for quant_type, exp_matrix in quant_types:
print(quant_type)
# Boxplots of expression
fig, axis = plt.subplots(1)
sns.boxplot(data=pd.melt(exp_matrix), x="grna", y="value", hue="condition", ax=axis)
fig.savefig(os.path.join("results", "bulk", "bulk_samples.qc.{}.expression_boxplots.png".format(quant_type)), dpi=300, bbox_inches="tight")
# Heatmap and correlation on signature genes
# derived from bulk
# derived from scRNA
for geneset in ["de_genes", "de_genes_bulk"]:
g = sns.clustermap(
exp_matrix.ix[eval(geneset)].dropna(),
z_score=0,
row_cluster=True, col_cluster=True,
xticklabels=True, yticklabels=True,
figsize=(15, 15))
for item in g.ax_heatmap.get_yticklabels():
item.set_rotation(0)
for item in g.ax_heatmap.get_xticklabels():
item.set_rotation(90)
g.fig.savefig(os.path.join("results", "bulk", "bulk_samples.qc.{}.{}.png".format(quant_type, geneset)), dpi=300, bbox_inches="tight")
g = sns.clustermap(
exp_matrix.ix[eval(geneset)].dropna().corr(),
row_cluster=True, col_cluster=True,
xticklabels=True, yticklabels=True,
figsize=(15, 15))
for item in g.ax_heatmap.get_yticklabels():
item.set_rotation(0)
for item in g.ax_heatmap.get_xticklabels():
item.set_rotation(90)
g.fig.savefig(os.path.join("results", "bulk", "bulk_samples.qc.{}.{}.correlation.png".format(quant_type, geneset)), dpi=300, bbox_inches="tight")