def munros(edge_length=10):
"""Number of munros in different rectangular areas of Scotland.
Parameters
----------
edge_length : float
Size of the rectangular grid in minutes.
Returns
-------
h : physt.histogram_nd.Histogram2D
Histogram in latitude and longitude.
"""
data = load_dataset("munros")
return h2(data["lat"], data["long"], "fixed_width", edge_length / 60, name="munros", title="Munros of Scotland")
python类load_dataset()的实例源码
def load_dataset(name):
"""Load example dataset.
If seaborn is present, its datasets can be loaded.
Physt also includes some datasets in CSV format.
Parameters
----------
name : str
Returns
-------
dataset : pandas.DataFrame
"""
# Our custom datasets:
try:
try:
import pandas as pd
except ImportError:
raise RuntimeError("Pandas not installed.")
import pkgutil
import io
binary_data = pkgutil.get_data('physt', 'examples/{0}.csv'.format(name))
return pd.read_csv(io.BytesIO(binary_data))
except FileNotFoundError:
pass
# Seaborn datasets?
try:
import seaborn as sns
import warnings
with warnings.catch_warnings():
warnings.simplefilter("ignore")
if name in sns.get_dataset_names():
return sns.load_dataset(name)
except ImportError:
pass
# Fall through
raise RuntimeError("Dataset {0} not available.".format(name))
def iris_h1(x="sepal_length"):
"""One-dimensional histogram of classical iris data.
Parameters
----------
x : str
Name of the property to be histogrammed
(sepal_length, sepal_width, petal_length, petal_width)
"""
iris = load_dataset("iris")
return h1(iris[x], "human", 20, name="iris")
def iris_h2(x="sepal_length", y="sepal_width"):
"""Two-dimensional histogram of classical iris data.
Parameters
----------
x, y : str
Names of the properties to be histogrammed
(sepal_length, sepal_width, petal_length, petal_width)
"""
iris = load_dataset("iris")
return h2(iris[x], iris[y], "human", 20, name="iris")
def testcase1():
tips=sns.load_dataset('tips')
sns.jointplot('total_bill','tips',tips,kind='reg')
def titanic_1():
titanic = sns.load_dataset('titanic')
print titanic.head()
# survived pclass sex age ......
# 0 0 male 22
# 1 1 1 female 38.0
# 2 1 3 female 26.0
# 3 1 1 female 35.0
# 4 0 3 male 35.0
print titanic.groupby('sex')[['survived']].mean()
# survived
# sex
# female 0.742038
# male 0.188908
print titanic.groupby(['sex', 'class'])['survived'].aggregate('mean').unstack()
# class First Second Third
# sex
# female 0.968085 0.921053 0.500000
# male 0.368852 0.157407 0.135447
print titanic.pivot_table('survived', index='sex', columns='class')
# class First Second Third
# sex
# female 0.968085 0.921053 0.500000
# male 0.368852 0.157407 0.135447
age = pd.cut(titanic['age'], [0, 18, 80])
print titanic.pivot_table('survived', ['sex', age], 'class')
# class First Second Third
# sex age
# female (0, 18] 0.909091 1.000000 0.511628
# (18, 80] 0.972973 0.900000 0.423729
# male (0, 18] 0.800000 0.600000 0.215686
# (18, 80] 0.375000 0.071429 0.133663
print titanic.pivot_table(index='sex', columns='class',
aggfunc={'survived': sum, 'fare': 'mean'})
print titanic.pivot_table('survived', index='sex', columns='class', margins=True)
# class First Second Third All
# sex
# female 0.968085 0.921053 0.500000 0.742038
# male 0.368852 0.157407 0.135447 0.188908
# All 0.629630 0.472826 0.242363 0.383838