def get_savings(data, column_debit='Debit', column_credit='Credit', dt_start=None, dt_end=None, aggregation_period='M'):
""" Consumes the checking account data and returns the monthly savings rate.
Args:
data (dataframe): The panadas dataframe containing at least a debit and a credit column.
column_debit (str): The column name for the debit column.
column_credit (str): The column name for the credit column.
dt_start (str): The start date (specific if given '2012-11-11' or the month '2012-11')
from were the savings should be calculated.
dt_end (str): The end date (specific if given '2012-11-11' or the month '2012-11')
to were the savings should be calculated.
aggregation_period (str): Single string character like 'M' for month specifying, over which period the savings
are aggregated. A full specification can be found here:
http://pandas.pydata.org/pandas-docs/stable/timeseries.html#timeseries-offset-aliases
Returns:
A pandas data frame, with an additional 'Savings' column and the time difference between start and end
represented with a single row for each aggregation interval that is not null.
"""
if not isinstance(data.index, pd.DatetimeIndex):
logging.getLogger().error("A pandas datetimeindex is required for the given dataframe")
return pd.DataFrame()
# create a copy of the indexed original data frame
aggregated = data[dt_start:dt_end][[column_debit, column_credit]].copy()
aggregated = aggregated.groupby(pd.TimeGrouper(aggregation_period)).sum()
aggregated = aggregated.fillna(0)
aggregated['Savings'] = aggregated[column_credit] - aggregated[column_debit]
return aggregated
评论列表
文章目录