analyticsutils.py 文件源码-python代码片段

def get_savings(data, column_debit='Debit', column_credit='Credit', dt_start=None, dt_end=None, aggregation_period='M'):
    """ Consumes the checking account data and returns the monthly savings rate.

    Args:
        data (dataframe): The panadas dataframe containing at least a debit and a credit column.
        column_debit (str): The column name for the debit column.
        column_credit (str): The column name for the credit column.
        dt_start (str): The start date (specific if given '2012-11-11' or the month '2012-11')
            from were the savings should be calculated.
        dt_end (str): The end date (specific if given '2012-11-11' or the month '2012-11')
            to were the savings should be calculated.
        aggregation_period (str): Single string character like 'M' for month specifying, over which period the savings
            are aggregated. A full specification can be found here:
            http://pandas.pydata.org/pandas-docs/stable/timeseries.html#timeseries-offset-aliases

    Returns:
        A pandas data frame, with an additional 'Savings' column and the time difference between start and end
        represented with a single row for each aggregation interval that is not null.

    """
    if not isinstance(data.index, pd.DatetimeIndex):
        logging.getLogger().error("A pandas datetimeindex is required for the given dataframe")
        return pd.DataFrame()

    # create a copy of the indexed original data frame
    aggregated = data[dt_start:dt_end][[column_debit, column_credit]].copy()

    aggregated = aggregated.groupby(pd.TimeGrouper(aggregation_period)).sum()
    aggregated = aggregated.fillna(0)

    aggregated['Savings'] = aggregated[column_credit] - aggregated[column_debit]
    return aggregated