def get_training_image():
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
from matplotlib.figure import Figure
import seaborn as sns
import StringIO
fig = Figure()
df = pd.DataFrame.from_dict(get_flattened_training_data())
features = [f for f in df.columns if f not in ['mac', 'location']]
df = df.rename(columns=dict(zip(features, [POWER_SLAVE_PREFIX + f for f in features])))
sns_plot = sns.pairplot(df, hue="location", vars=[POWER_SLAVE_PREFIX + f for f in features])
png_output = StringIO.StringIO()
sns_plot.savefig(png_output, format='png')
canvas = FigureCanvas(fig)
canvas.print_png(png_output)
print png_output.getvalue()
return
python类pairplot()的实例源码
def visualize_housing_data(df):
sns.set(style='whitegrid', context='notebook')
cols = ['LSTAT', 'INDUS', 'NOX', 'RM', 'MEDV']
sns.pairplot(df[cols], size=2.5)
plt.show()
correlation_matrix = np.corrcoef(df[cols].values.T)
sns.set(font_scale=1.5)
heatmap = sns.heatmap(
correlation_matrix,
cbar=True,
annot=True,
square=True,
fmt='.2f',
annot_kws={'size': 15},
yticklabels=cols,
xticklabels=cols,
)
plt.show()
def plot_pairplots(data, labels, alpha, mis, column_label, topk=5, prefix='', focus=''):
cmap = sns.cubehelix_palette(as_cmap=True, light=.9)
plt.rcParams.update({'font.size': 32})
m, nv = mis.shape
for j in range(m):
inds = np.where(np.logical_and(alpha[j] > 0, mis[j] > 0.))[0]
inds = inds[np.argsort(- alpha[j, inds] * mis[j, inds])][:topk]
if focus in column_label:
ifocus = column_label.index(focus)
if not ifocus in inds:
inds = np.insert(inds, 0, ifocus)
if len(inds) >= 2:
plt.clf()
subdata = data[:, inds]
columns = [column_label[i] for i in inds]
subdata = pd.DataFrame(data=subdata, columns=columns)
try:
sns.pairplot(subdata, kind="reg", diag_kind="kde", size=5, dropna=True)
filename = '{}/pairplots_regress/group_num={}.pdf'.format(prefix, j)
if not os.path.exists(os.path.dirname(filename)):
os.makedirs(os.path.dirname(filename))
plt.suptitle("Latent factor {}".format(j), y=1.01)
plt.savefig(filename, bbox_inches='tight')
plt.clf()
except:
pass
subdata['Latent factor'] = labels[:,j]
try:
sns.pairplot(subdata, kind="scatter", dropna=True, vars=subdata.columns.drop('Latent factor'), hue="Latent factor", diag_kind="kde", size=5)
filename = '{}/pairplots/group_num={}.pdf'.format(prefix, j)
if not os.path.exists(os.path.dirname(filename)):
os.makedirs(os.path.dirname(filename))
plt.suptitle("Latent factor {}".format(j), y=1.01)
plt.savefig(filename, bbox_inches='tight')
plt.close('all')
except:
pass
def stock():
#?????????????????, ?????????
stock_list = {"zsyh":"600036","jsyh":"601939","szzs":"000001","pfyh":"600000","msyh":"600061"}
for stock, code in stock_list.items():
globals()[stock] = tsh.get_hist_data(code,start="2015-01-01",end="2016-04-16")
stock_list2 = stock_list.keys()
#print(stock_list2)
sl = [globals()[st]["close"] for st in stock_list2]
df_close = pd.concat(sl,axis=1,join='inner')
df_close.columns = stock_list2
#print(df_close)
df_close.sort_index(ascending=True,inplace=True) #ascending ??????????????????
pc_ret = df_close.pct_change() #????????????????
print(pc_ret)
make_end_line()
print(pc_ret.mean())
make_end_line()
#????????????
plt.show(sns.jointplot("zsyh","jsyh",pc_ret,kind="hex")) #?? ????????1?????????? 0????? -1????????
plt.show(sns.jointplot("zsyh","jsyh",pc_ret,kind="scatter"))
plt.show(sns.jointplot("zsyh","szzs",pc_ret,kind="scatter"))
plt.show(sns.pairplot(pc_ret[["jsyh","zsyh","pfyh","msyh"]].dropna())) #??????????
print(pc_ret.std()) #????????????????????????????
make_end_line()
rets = pc_ret.dropna()
print(rets.mean())
make_end_line()
area = np.pi *20 #????
plt.scatter(rets.mean(),rets.std()) #???rets?????????xy?
plt.xlabel("Expected Return")#????xy????
plt.ylabel("Risk")
for label,x,y in zip(rets.columns,rets.mean(),rets.std()):
plt.annotate(
label,
xy = (x,y),xytext = (50,50),
textcoords = "offset points",ha = "right",va = "bottom",
arrowprops = dict(arrowstyle = "-",connectionstyle = "arc3,rad=-0.3"))
plt.show()
def loadDataSet(filename):
df = pd.read_excel(filename,sheetname=[1], header=None, skiprows=1)[1]
df = df.fillna(0)
df[2] = df[2]/100000000 # ?????
# zeros = df[df[0]==0]
# df = df.drop(zeros.index,axis=0)
df[2] = standard(df[2]) #?????
df[3] = standard(df[3]) #????
df[4] = standard(df[4]) #????
df[6] = standard(df[6]) #??????
df[9] = df[9].apply(map_01) # ?????
df[11] = df[11].apply(map_01) # ??????
df[14] = standard(df[14]) #???????
df[15] = standard(df[15].apply(map_rate)) #???????
df[16] = standard(df[16].apply(map_sub_rate)) #?????????
df[17] = standard(df[17]) #??????
prov_coding,province_dict = transcoding(df[10]) # province
enter_coding,enter_dict = transcoding(df[13]) # enterprise
target = df[2]
data = df[[3,4,6,9,11,14,15,16,17]]
data = pd.concat([data,prov_coding],axis=1)
data = pd.concat([data,enter_coding],axis=1)
import seaborn as sns
# sns.pairplot(df, x_vars=[3,4,6,9,11,14,15,16,17], y_vars=2, size=7, aspect=0.8, kind='reg')
return mat(data),mat(target).T
def loadDataSet(filename):
sheet = 1
df = pd.read_excel(filename,sheetname=[sheet], header=None, skiprows=1)[sheet]
df = df.dropna(how='any',thresh=df.shape[1]/2) # drop those rows
df = df.dropna(how='any')
df = df.fillna(0)
df[2] = df[2]/100000000 # ?????
df = df.sort_values(2).reset_index()
# zeros = df[df[0]==0]
# df = df.drop(zeros.index,axis=0)
df[2] = (df[2]) #?????
df[3] = standard(df[3]) #????
df[4] = standard(df[4]) #????
rate_type,rate_dict = transcoding(df[5]) # ????
df[6] = standard(df[6]) #??????
# market,market_dict = transcoding(df[7]) #????
platform,platform_dict = transcoding(df[8]) #????
df[9] = df[9].apply(map_01) # ?????
df[11] = df[11].apply(map_01) # ??????
nature,nature_dict = transcoding(df[12]) #????
df[14] = standard(df[14]) #???????
print( df.groupby(15).size())
df[15] = standard(df[15].apply(map_rate)) #???????
print( df.groupby(15).size())
df[16] = standard(df[16].apply(map_sub_rate)) #?????????
df[17] = standard(df[17]) #??????
target = df[2]
data = df[[3,4,6,9,11,14,15,16,17]]
# data = pd.concat([data,rate_type,platform,nature],axis=1)
import seaborn as sns
sns.pairplot(df, x_vars=[3,17,4,14,15,16,6,9,11], y_vars=2, size=5, aspect=0.8, kind='reg')
# sns.pairplot(df, vars=[2,4,14,15,17])
return np.mat(data),np.mat(target).T
def plot_auxiliary(all_vars, filename, table_size=4):
# All variables need to be (batch_size, sequence_length, dimension)
for i, a in enumerate(all_vars):
if a.ndim == 2:
all_vars[i] = np.expand_dims(a, 0)
dim = all_vars[0].shape[-1]
if dim == 2:
f, ax = plt.subplots(table_size, table_size, sharex='col', sharey='row', figsize=[12, 12])
idx = 0
for x in range(table_size):
for y in range(table_size):
for a in all_vars:
# Loop over the batch dimension
ax[x, y].plot(a[idx, :, 0], a[idx, :, 1], linestyle='-', marker='o', markersize=3)
# Plot starting point of the trajectory
ax[x, y].plot(a[idx, 0, 0], a[idx, 0, 1], 'r.', ms=12)
idx += 1
# plt.show()
plt.savefig(filename, format='png', bbox_inches='tight', dpi=80)
plt.close()
else:
df_list = []
for i, a in enumerate(all_vars):
df = pd.DataFrame(all_vars[i].reshape(-1, dim))
df['class'] = i
df_list.append(df)
df_all = pd.concat(df_list)
sns_plot = sns.pairplot(df_all, hue="class", vars=range(dim))
sns_plot.savefig(filename)
plt.close()
def visualize_hist_pairplot(X,y,selected_feature1,selected_feature2,features,diag_kind):
"""
Visualize the pairwise relationships (Histograms and Density Funcions) between classes and respective attributes
Keyword arguments:
X -- The feature vectors
y -- The target vector
selected_feature1 - First feature
selected_feature1 - Second feature
diag_kind -- Type of plot in the diagonal (Histogram or Density Function)
"""
#create data
joint_data=np.column_stack((X,y))
column_names=features
#create dataframe
df=pd.DataFrame(data=joint_data,columns=column_names)
#plot
palette = sea.hls_palette()
splot=sea.pairplot(df, hue="Y", palette={0:palette[2],1:palette[0]},vars=[selected_feature1,selected_feature2],diag_kind=diag_kind)
splot.fig.suptitle('Pairwise relationship: '+selected_feature1+" vs "+selected_feature2)
splot.set(xticklabels=[])
# plt.subplots_adjust(right=0.94, top=0.94)
#save fig
output_dir = "img"
save_fig(output_dir,'{}/{}_{}_hist_pairplot.png'.format(output_dir,selected_feature1,selected_feature2))
# plt.show()
def plot_scatter(df, features, target, tag='eda', directory=None):
r"""Plot a scatterplot matrix, also known as a pair plot.
Parameters
----------
df : pandas.DataFrame
The dataframe containing the features.
features: list of str
The features to compare in the scatterplot.
target : str
The target variable for contrast.
tag : str
Unique identifier for the plot.
directory : str, optional
The full specification of the plot location.
Returns
-------
None : None.
References
----------
https://seaborn.pydata.org/examples/scatterplot_matrix.html
"""
logger.info("Generating Scatter Plot")
# Get the feature subset
features.append(target)
df = df[features]
# Generate the pair plot
sns.set()
sns_plot = sns.pairplot(df, hue=target)
# Save the plot
write_plot('seaborn', sns_plot, 'scatter_plot', tag, directory)
#
# Function plot_facet_grid
#
def comparison_1dim(by='Country', include_WEPP=True, include_VRE=False,
year=2015, how='hbar', figsize=(7,5)):
"""
Plots a horizontal bar chart with capacity on x-axis, ``by`` on y-axis.
Parameters
----------
by : string, defines how to group data
Allowed values: 'Country' or 'Fueltype'
"""
red_w_wepp, red_wo_wepp, wepp, statistics = gather_comparison_data(include_WEPP=include_WEPP,
include_VRE=include_VRE,
year=year)
if include_WEPP:
stats = lookup([red_w_wepp, red_wo_wepp, wepp, statistics],
keys=['Matched dataset w/ WEPP', 'Matched dataset w/o WEPP',
'WEPP only', 'Statistics OPSD'], by=by)/1000
else:
stats = lookup([red_wo_wepp, statistics],
keys=['Matched dataset w/o WEPP', 'Statistics OPSD'],
by=by)/1000
if how == 'hbar':
with sns.axes_style('darkgrid'):
font={'size' : 24}
plt.rc('font', **font)
fig, ax = plt.subplots(figsize=figsize)
stats.plot.barh(ax=ax, stacked=False, colormap='jet')
ax.set_xlabel('Installed Capacity [GW]')
ax.yaxis.label.set_visible(False)
#ax.set_facecolor('#d9d9d9') # gray background
ax.set_axisbelow(True) # puts the grid behind the bars
ax.grid(color='white', linestyle='dotted') # adds white dotted grid
ax.legend(loc='best')
ax.invert_yaxis()
return fig, ax
if how == 'scatter':
stats.loc[:, by] = stats.index.astype(str) #Needed for seaborne
if len(stats.columns)-1 >= 3:
g = sns.pairplot(stats, diag_kind='kde', hue=by, palette='Set2',
size=figsize[1], aspect=figsize[0]/figsize[1])
else:
g = sns.pairplot(stats, diag_kind='kde', hue=by, palette='Set2',
size=figsize[1], aspect=figsize[0]/figsize[1],
x_vars=stats.columns[0], y_vars=stats.columns[1])
for i in range(0, len(g.axes)):
for j in range(0, len(g.axes[0])):
g.axes[i,j].set(xscale='log', yscale='log', xlim=(1,200), ylim=(1,200))
return g.fig, g.axes