def correlation(self, vec_col, method="pearson"):
"""
Compute the correlation matrix for the input dataset of Vectors using the specified method. Method
mapped from pyspark.ml.stat.Correlation.
:param vec_col: The name of the column of vectors for which the correlation coefficient needs to be computed.
This must be a column of the dataset, and it must contain Vector objects.
:param method: String specifying the method to use for computing correlation. Supported: pearson (default),
spearman.
:return: Heatmap plot of the corr matrix using seaborn.
"""
assert isinstance(method, str), "Error, method argument provided must be a string."
assert method == 'pearson' or (
method == 'spearman'), "Error, method only can be 'pearson' or 'sepearman'."
cor = Correlation.corr(self._df, vec_col, method).head()[0].toArray()
return sns.heatmap(cor, mask=np.zeros_like(cor, dtype=np.bool), cmap=sns.diverging_palette(220, 10,
as_cmap=True))
python类diverging_palette()的实例源码
def plot_heatmaps(img_arr, img_names, titles, heatmaps, labels, out_dir):
# construct cmap
pal = sns.diverging_palette(240, 10, n=30, center="dark")
my_cmap = ListedColormap(sns.color_palette(pal).as_hex())
min_val, max_val = np.min(heatmaps), np.max(heatmaps)
for j, (img, img_name, h_map, title, y) in enumerate(zip(img_arr, img_names, heatmaps, titles, labels)):
fig, ax = plt.subplots()
img = np.transpose(img, (1, 2, 0))
plt.clf()
plt.imshow(img, cmap='Greys', interpolation='bicubic')
plt.imshow(h_map, cmap=my_cmap, alpha=0.7, interpolation='nearest') #, vmin=-.05, vmax=.05)
plt.colorbar()
plt.axis('off')
plt.title(title)
class_name = CLASSES[y]
class_dir = make_sub_dir(out_dir, class_name)
plt.savefig(join(class_dir, img_name), bbox_inches='tight', dpi=300)
def sns_triangle(matrix, plt_title, only_class=None):
sns.set(style="white")
# Generate a mask for the upper triangle
mask = np.zeros_like(matrix, dtype=np.bool)
mask[np.triu_indices_from(mask)] = True
# Set up the matplotlib figure
f, ax = subplots(figsize=(11, 9))
# Generate a custom diverging colormap
cmap = sns.diverging_palette(220, 10, as_cmap=True)
# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(matrix.as_matrix(), mask=mask, cmap=cmap, vmax=.3,
square=True, xticklabels=5, yticklabels=5,
linewidths=.5, cbar_kws={"shrink": .5}, ax=ax)
title(plt_title)
xlabel('Preprocessed Features')
ylabel('Preprocessed Features')
if only_class is None:
only_class = ''
savefig('images/triangle'+only_class+'.png')
def plot_corr_heatmap(corr, labels, heading):
sns.set(style="white")
# Generate a mask for the upper triangle
mask = np.zeros_like(corr, dtype=np.bool)
mask[np.triu_indices_from(mask)] = True
# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(8, 8))
# Generate a custom diverging colormap
cmap = sns.diverging_palette(220, 10, as_cmap=True)
# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(corr, mask=mask, cmap=cmap, vmax=.3,
square=True, xticklabels=labels, yticklabels=labels,
linewidths=.5, ax=ax, cbar_kws={"shrink": .5}, annot=True)
ax.set_title(heading)
plt.show()
def plot_difference_matrix(fname, confmat1, confmat2, target_names,
title='', cmap='Blues', perc=True,figsize=[5,4],cbar=True,
**kwargs):
"""Plot Confusion Matrix."""
figsize = deepcopy(figsize)
if cbar == False:
figsize[0] = figsize[0] - 0.6
cm1 = confmat1
cm2 = confmat2
cm1 = 100 * cm1.astype('float') / cm1.sum(axis=1)[:, np.newaxis]
cm2 = 100 * cm2.astype('float') / cm2.sum(axis=1)[:, np.newaxis]
cm = cm2 - cm1
cm_eye = np.zeros_like(cm)
cm_eye[np.eye(len(cm_eye), dtype=bool)] = cm.diagonal()
df = pd.DataFrame(data=cm_eye, columns=target_names, index=target_names)
plt.figure(figsize=figsize)
g = sns.heatmap(df, annot=cm, fmt=".1f" ,
linewidths=.5, vmin=-10, vmax=10,
cmap='coolwarm_r',annot_kws={"size": 13},cbar=cbar,**kwargs)#sns.diverging_palette(20, 220, as_cmap=True))
g.set_title(title)
g.set_ylabel('True sleep stage',fontdict={'fontsize' : 12, 'fontweight':'bold'})
g.set_xlabel('Predicted sleep stage',fontdict={'fontsize' : 12, 'fontweight':'bold'})
plt.tight_layout()
g.figure.savefig(os.path.join('plots', fname))
two_sigma_financial_modelling.py 文件源码
项目:PortfolioTimeSeriesAnalysis
作者: MizioAnd
项目源码
文件源码
阅读 27
收藏 0
点赞 0
评论 0
def dendrogram(df, number_of_clusters=int(df.shape[1] / 1.2)):
# Create Dendrogram
agglomerated_features = FeatureAgglomeration(n_clusters=number_of_clusters)
used_networks = np.arange(0, number_of_clusters, dtype=int)
# Create a custom palette to identify the networks
network_pal = sns.cubehelix_palette(len(used_networks),
light=.9, dark=.1, reverse=True,
start=1, rot=-2)
network_lut = dict(zip(map(str, df.columns), network_pal))
# Convert the palette to vectors that will be drawn on the side of the matrix
networks = df.columns.get_level_values(None)
network_colors = pd.Series(networks, index=df.columns).map(network_lut)
sns.set(font="monospace")
# Create custom colormap
cmap = sns.diverging_palette(h_neg=210, h_pos=350, s=90, l=30, as_cmap=True)
cg = sns.clustermap(df.astype(float).corr(), cmap=cmap, linewidths=.5, row_colors=network_colors,
col_colors=network_colors)
plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)
plt.setp(cg.ax_heatmap.xaxis.get_majorticklabels(), rotation=90)
plt.show()
def __init__(self, parent, data, labels, width=6, height=6, dpi=100):
figure = Figure(figsize=(width, height), dpi=dpi, tight_layout=True)
axes = figure.add_subplot(111)
super(CorrelationPlot, self).__init__(figure)
self.setParent(parent)
sns.set(style="darkgrid")
corr = data
# cmap = sns.diverging_palette(220, 10, as_cmap=True)
# corrplot(data, names=labels, annot=True, sig_stars=False,
# diag_names=True, cmap=cmap, ax=axes, cbar=True)
df = pd.DataFrame(data=data, columns=labels)
corr = df.corr()
# Generate a mask for the upper triangle
mask = np.zeros_like(corr, dtype=np.bool)
mask[np.triu_indices_from(mask)] = True
# Draw the heatmap with the mask and correct aspect ratio
vmax = np.abs(corr.values[~mask]).max()
# vmax = np.abs(corr).max()
sns.heatmap(corr, mask=mask, cmap=plt.cm.PuOr, vmin=-vmax, vmax=vmax,
square=True, linecolor="lightgray", linewidths=1, ax=axes)
for i in range(len(corr)):
axes.text(i + 0.5, i + 0.5, corr.columns[i],
ha="center", va="center", rotation=0)
for j in range(i + 1, len(corr)):
s = "{:.3f}".format(corr.values[i, j])
axes.text(j + 0.5, i + 0.5, s,
ha="center", va="center")
axes.axis("off")
# If uncommented, fills widget
self.setSizePolicy(QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Expanding)
self.updateGeometry()
self.setMinimumSize(self.size())
def radiocolorf(freq):
"""Replace radio code with color."""
ffreq = (float(freq) - 1.0) / (45.0 - 1.0)
pal = sns.diverging_palette(200, 60, l=80, as_cmap=True, center="dark")
return rgb2hex(pal(ffreq))
def dendrogram(df, number_of_clusters, agglomerated_feature_labels):
import seaborn as sns
# Todo: Create Dendrogram
# used networks are the labels occuring in agglomerated_features.labels_
# which corresponds to np.arange(0, number_of_clusters)
# number_of_clusters = int(df.shape[1] / 1.2)
# used_networks = np.arange(0, number_of_clusters, dtype=int)
used_networks = np.unique(agglomerated_feature_labels)
# used_networks = [1, 5, 6, 7, 8, 11, 12, 13, 16, 17]
# In our case all columns are clustered, which means used_columns is true in every element
# used_columns = (df.columns.get_level_values(None)
# .astype(int)
# .isin(used_networks))
# used_columns = (agglomerated_feature_labels.astype(int).isin(used_networks))
# df = df.loc[:, used_columns]
# Create a custom palette to identify the networks
network_pal = sns.cubehelix_palette(len(used_networks),
light=.9, dark=.1, reverse=True,
start=1, rot=-2)
network_lut = dict(zip(map(str, df.columns), network_pal))
# Convert the palette to vectors that will be drawn on the side of the matrix
networks = df.columns.get_level_values(None)
# networks = agglomerated_feature_labels
network_colors = pd.Series(networks, index=df.columns).map(network_lut)
# plt.figure()
# cg = sns.clustermap(df, metric="correlation")
# plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)
sns.set(font="monospace")
# Create custom colormap
cmap = sns.diverging_palette(h_neg=210, h_pos=350, s=90, l=30, as_cmap=True)
cg = sns.clustermap(df.astype(float).corr(), cmap=cmap, linewidths=.5, row_colors=network_colors,
col_colors=network_colors)
plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)
plt.setp(cg.ax_heatmap.xaxis.get_majorticklabels(), rotation=90)
# plt.xticks(rotation=90)
plt.show()
def plot_corrmat(in_csv, out_file=None):
import seaborn as sn
sn.set(style="whitegrid")
dataframe = pd.read_csv(in_csv, index_col=False, na_values='n/a', na_filter=False)
colnames = dataframe.columns.ravel().tolist()
for col in ['subject_id', 'site', 'modality']:
try:
colnames.remove(col)
except ValueError:
pass
# Correlation matrix
corr = dataframe[colnames].corr()
corr = corr.dropna((0,1), 'all')
# Generate a mask for the upper triangle
mask = np.zeros_like(corr, dtype=np.bool)
mask[np.triu_indices_from(mask)] = True
# Generate a custom diverging colormap
cmap = sn.diverging_palette(220, 10, as_cmap=True)
# Draw the heatmap with the mask and correct aspect ratio
corrplot = sn.clustermap(corr, cmap=cmap, center=0., method='average', square=True, linewidths=.5)
plt.setp(corrplot.ax_heatmap.yaxis.get_ticklabels(), rotation='horizontal')
# , mask=mask, square=True, linewidths=.5, cbar_kws={"shrink": .5})
if out_file is None:
out_file = 'corr_matrix.svg'
fname, ext = op.splitext(out_file)
if ext[1:] not in ['pdf', 'svg', 'png']:
ext = '.svg'
out_file = fname + '.svg'
corrplot.savefig(out_file, format=ext[1:], bbox_inches='tight', pad_inches=0, dpi=100)
return corrplot
def radiocolorf(freq):
ffreq = (float(freq) - 1.0)/(45.0 - 1.0)
pal = sns.diverging_palette(200, 60, l=80, as_cmap=True, center="dark")
return rgb2hex(pal(ffreq))
def __init__(self, echonet):
assert isinstance(echonet, EchoNet)
self.echonet = echonet
self.loss = []
self.train_score = []
self.validation_score = []
self.test_score = []
self.start_time = None
# self.f = plt.figure(figsize=(12, 12))
self.cmap = sb.diverging_palette(220, 10, as_cmap=True)
def vis_activation(self, tweet, grads=False, activations=True, over_words=True, over_units=False):
pred = self.predict(tweet)
act_grad_matrix, layer_labels, text_labels = self._get_activations_gradients(tweet, grads,
activations, over_words, over_units)
plt.figure(figsize=(14,4))
cmap = sns.diverging_palette(220, 20, n=7)
ax = sns.heatmap(act_grad_matrix, xticklabels=text_labels, yticklabels=layer_labels, cmap=cmap)
ax.xaxis.tick_top()
plt.yticks(rotation=0)
plt.xticks(rotation=90)
plt.title('Score:%s'%pred['score'].values[0])
plt.show()
def plot_2_corr_heatmaps(corr1, corr2, labels, title1, title2):
fig=plt.figure(figsize=(9, 8))
gs = gridspec.GridSpec(1, 2)
ax1 = fig.add_subplot(gs[0, 0])
ax2 = fig.add_subplot(gs[0, 1])
sns.set(style="white")
# Generate a mask for the upper triangle
mask = np.zeros_like(corr1, dtype=np.bool)
mask[np.triu_indices_from(mask)] = True
# Generate a custom diverging colormap
cmap = sns.diverging_palette(220, 10, as_cmap=True)
# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(corr1, mask=mask, cmap=cmap, vmax=.3,
square=True, xticklabels=labels, yticklabels=labels,
linewidths=.5, ax=ax1, cbar_kws={"shrink": .3}, annot=True)
ax1.set_title(title1)
sns.heatmap(corr2, mask=mask, cmap=cmap, vmax=.3,
square=True, xticklabels=labels, yticklabels=labels,
linewidths=.5, ax=ax2, cbar_kws={"shrink": .3}, annot=True)
ax2.set_title(title2)
fig.tight_layout()
plt.show()
###############################################################################
# Attribution
###############################################################################
def heatmap(sources, refs, trans, actions, idx, atten=None, savefig=True, name='test', info=None, show=False):
source = [s.strip() for s in sources[idx].decode('utf8').replace('@@', '--').split()] + ['||']
target = ['*'] + [s.strip() for s in trans[idx].decode('utf8').replace('@@', '--').split()] + ['||']
action = actions[idx]
if atten:
attention = numpy.array(atten[idx])
def track(acts, data, annote):
x, y = 0, 0
for a in acts:
x += a
y += 1 - a
# print a, x, y, target[x].encode('utf8')
data[y, x] = 1
annote[y, x] = 'W' if a == 0 else 'C'
return data, annote
# print target
data = numpy.zeros((len(source), len(target)))
annote = numpy.chararray(data.shape, itemsize=8)
annote[:] = ''
data, annote = track(action, data, annote)
data[0, 0] = 1
annote[0, 0] = 'S'
if atten:
data[:-1, 1:] += attention.T
d = pd.DataFrame(data=data, columns=target, index=source)
# p = sns.diverging_palette(220, 10, as_cmap=True)
f, ax = plot.subplots(figsize=(11, 11))
f.set_canvas(plot.gcf().canvas)
g = sns.heatmap(d, ax=ax, annot=annote, fmt='s')
g.xaxis.tick_top()
plot.xticks(rotation=90)
plot.yticks(rotation=0)
# plot.show()
if savefig:
if not os.path.exists('.images/C_{}'.format(name)):
os.mkdir('.images/C_{}'.format(name))
filename = 'Idx={}||'.format(info['index'])
for w in info:
if w is not 'index':
filename += '.{}={:.2f}'.format(w, float(info[w]))
print 'saving...'
f.savefig('.images/C_{}'.format(name) + '/{}'.format(filename) + '.pdf', dpi=100)
if show:
plot.show()
print 'plotting done.'
plot.close()