python类load_dataset()的实例源码

__init__.py 文件源码 项目:physt 作者: janpipek 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def munros(edge_length=10):
        """Number of munros in different rectangular areas of Scotland.

        Parameters
        ----------
        edge_length : float
            Size of the rectangular grid in minutes.

        Returns
        -------
        h : physt.histogram_nd.Histogram2D
            Histogram in latitude and longitude.
        """
        data = load_dataset("munros")
        return h2(data["lat"], data["long"], "fixed_width", edge_length / 60, name="munros", title="Munros of Scotland")
__init__.py 文件源码 项目:physt 作者: janpipek 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def load_dataset(name):
    """Load example dataset.

    If seaborn is present, its datasets can be loaded.
    Physt also includes some datasets in CSV format.

    Parameters
    ----------
    name : str

    Returns
    -------
    dataset : pandas.DataFrame
    """
    # Our custom datasets:
    try:
        try:
            import pandas as pd
        except ImportError:
            raise RuntimeError("Pandas not installed.")
        import pkgutil
        import io
        binary_data = pkgutil.get_data('physt', 'examples/{0}.csv'.format(name))
        return pd.read_csv(io.BytesIO(binary_data))
    except FileNotFoundError:
        pass

    # Seaborn datasets?
    try:
        import seaborn as sns
        import warnings
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            if name in sns.get_dataset_names():
                    return sns.load_dataset(name)
    except ImportError:
        pass

    # Fall through
    raise RuntimeError("Dataset {0} not available.".format(name))
__init__.py 文件源码 项目:physt 作者: janpipek 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def iris_h1(x="sepal_length"):
        """One-dimensional histogram of classical iris data.

        Parameters
        ----------
        x : str
            Name of the property to be histogrammed
            (sepal_length, sepal_width, petal_length, petal_width)
        """
        iris = load_dataset("iris")
        return h1(iris[x], "human", 20, name="iris")
__init__.py 文件源码 项目:physt 作者: janpipek 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def iris_h2(x="sepal_length", y="sepal_width"):
        """Two-dimensional histogram of classical iris data.

        Parameters
        ----------
        x, y : str
            Names of the properties to be histogrammed
            (sepal_length, sepal_width, petal_length, petal_width)
        """
        iris = load_dataset("iris")
        return h2(iris[x], iris[y], "human", 20, name="iris")
seaborn_usage.py 文件源码 项目:base_function 作者: Rockyzsu 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def testcase1():
    tips=sns.load_dataset('tips')
    sns.jointplot('total_bill','tips',tips,kind='reg')
pd_pivot.py 文件源码 项目:python_utils 作者: Jayhello 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def titanic_1():
    titanic = sns.load_dataset('titanic')
    print titanic.head()
    #    survived  pclass     sex   age  ......
    #           0       0    male    22
    # 1         1       1  female  38.0
    # 2         1       3  female  26.0
    # 3         1       1  female  35.0
    # 4         0       3    male  35.0

    print titanic.groupby('sex')[['survived']].mean()
    #         survived
    # sex
    # female  0.742038
    # male    0.188908

    print titanic.groupby(['sex', 'class'])['survived'].aggregate('mean').unstack()
    # class      First    Second     Third
    # sex
    # female  0.968085  0.921053  0.500000
    # male    0.368852  0.157407  0.135447

    print titanic.pivot_table('survived', index='sex', columns='class')
    # class      First    Second     Third
    # sex
    # female  0.968085  0.921053  0.500000
    # male    0.368852  0.157407  0.135447

    age = pd.cut(titanic['age'], [0, 18, 80])
    print titanic.pivot_table('survived', ['sex', age], 'class')
    # class               First    Second     Third
    # sex    age
    # female (0, 18]   0.909091  1.000000  0.511628
    #        (18, 80]  0.972973  0.900000  0.423729
    # male   (0, 18]   0.800000  0.600000  0.215686
    #        (18, 80]  0.375000  0.071429  0.133663

    print titanic.pivot_table(index='sex', columns='class',
                              aggfunc={'survived': sum, 'fare': 'mean'})

    print titanic.pivot_table('survived', index='sex', columns='class', margins=True)
    # class      First    Second     Third       All
    # sex
    # female  0.968085  0.921053  0.500000  0.742038
    # male    0.368852  0.157407  0.135447  0.188908
    # All     0.629630  0.472826  0.242363  0.383838


问题


面经


文章

微信
公众号

扫码关注公众号