python类pivot_table()的实例源码

test_pivot.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def test_pivot_dtaccessor(self):
        # GH 8103
        dates1 = ['2011-07-19 07:00:00', '2011-07-19 08:00:00',
                  '2011-07-19 09:00:00',
                  '2011-07-19 07:00:00', '2011-07-19 08:00:00',
                  '2011-07-19 09:00:00']
        dates2 = ['2013-01-01 15:00:00', '2013-01-01 15:00:00',
                  '2013-01-01 15:00:00',
                  '2013-02-01 15:00:00', '2013-02-01 15:00:00',
                  '2013-02-01 15:00:00']
        df = DataFrame({'label': ['a', 'a', 'a', 'b', 'b', 'b'],
                        'dt1': dates1, 'dt2': dates2,
                        'value1': np.arange(6, dtype='int64'),
                        'value2': [1, 2] * 3})
        df['dt1'] = df['dt1'].apply(lambda d: pd.Timestamp(d))
        df['dt2'] = df['dt2'].apply(lambda d: pd.Timestamp(d))

        result = pivot_table(df, index='label', columns=df['dt1'].dt.hour,
                             values='value1')

        exp_idx = Index(['a', 'b'], name='label')
        expected = DataFrame({7: [0, 3], 8: [1, 4], 9: [2, 5]},
                             index=exp_idx,
                             columns=Index([7, 8, 9], name='dt1'))
        tm.assert_frame_equal(result, expected)

        result = pivot_table(df, index=df['dt2'].dt.month,
                             columns=df['dt1'].dt.hour,
                             values='value1')

        expected = DataFrame({7: [0, 3], 8: [1, 4], 9: [2, 5]},
                             index=Index([1, 2], name='dt2'),
                             columns=Index([7, 8, 9], name='dt1'))
        tm.assert_frame_equal(result, expected)

        result = pivot_table(df, index=df['dt2'].dt.year.values,
                             columns=[df['dt1'].dt.hour, df['dt2'].dt.month],
                             values='value1')

        exp_col = MultiIndex.from_arrays(
            [[7, 7, 8, 8, 9, 9], [1, 2] * 3], names=['dt1', 'dt2'])
        expected = DataFrame(np.array([[0, 3, 1, 4, 2, 5]], dtype='int64'),
                             index=[2013], columns=exp_col)
        tm.assert_frame_equal(result, expected)

        result = pivot_table(df, index=np.array(['X', 'X', 'X',
                                                 'X', 'Y', 'Y']),
                             columns=[df['dt1'].dt.hour, df['dt2'].dt.month],
                             values='value1')
        expected = DataFrame(np.array([[0, 3, 1, np.nan, 2, np.nan],
                                       [np.nan, np.nan, np.nan,
                                        4, np.nan, 5]]),
                             index=['X', 'Y'], columns=exp_col)
        tm.assert_frame_equal(result, expected)
graphics.py 文件源码 项目:pecos 作者: sandialabs 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def plot_doy_heatmap(data, cmap='nipy_spectral', vmin=None, vmax=None, 
                     overlay=None, title=None, figsize=(7.0, 3.0)):
    """
    Create a day-of-year (X-axis) vs. time-of-day (Y-axis) heatmap.

    Parameters
    ----------
    data : pandas DataFrame or pandas Series
        Data (single column), indexed by time

    cmap : string (optional)
        Colomap, default = nipy_spectral

    vmin : float (optional)
        Colomap minimum, default = None (autoscale)

    vmax : float (optional)
        Colomap maximum, default = None (autoscale)

    overlay : pandas DataFrame (optional)
        Data to overlay on the heatmap.  
        Time index should be in day-of-year (X-axis) 
        Values should be in time-of-day in minutes (Y-axis)

    title : string (optional)
        Title, default = None

    figsize : tuple (optional)
        Figure size, default = (7.0, 3.0)
    """

    if type(data) is pd.core.series.Series:
        data = data.to_frame()

    # Convert data to a pivot table
    col_name = data.columns[0]
    data['X'] = data.index.dayofyear
    data['Y'] = data.index.hour*60 + \
                data.index.minute + \
                data.index.second/60 + \
                data.index.microsecond/(60*1000000.0)
    piv = pd.pivot_table(data,values=col_name,index=['Y'],columns=['X'],fill_value=np.NaN)

    # Create the heatmap
    plt.figure(figsize = figsize)
    fig, ax = plt.subplots(figsize=figsize)
    im = ax.imshow(piv, cmap=cmap, aspect='auto', vmin=vmin, vmax=vmax,
                   extent=[data['X'].min()-0.5,data['X'].max()+0.5, 
                           data['Y'].max()-0.5,data['Y'].min()+0.5])
    fig.colorbar(im, ax=ax)

    # Add overlay
    if type(overlay) is pd.core.frame.DataFrame:
        overlay.plot(ax=ax)

    # Add title and labels
    if title:
        ax.set_title(title)
    ax.set_xlabel("Day of the year")
    ax.set_ylabel("Time of day (minutes)")
    plt.tight_layout()


问题


面经


文章

微信
公众号

扫码关注公众号