def aggregate_ohlcv_panel(self,
fields,
ohlcv_panel,
items=None,
minor_axis=None):
"""
Convert an OHLCV Panel into a DataFrame by aggregating each field's
frame into a Series.
"""
vals = ohlcv_panel
if isinstance(ohlcv_panel, pd.Panel):
vals = ohlcv_panel.values
items = ohlcv_panel.items
minor_axis = ohlcv_panel.minor_axis
data = [
self.frame_to_series(
field,
vals[items.get_loc(field)],
minor_axis
)
for field in fields
]
return np.array(data)
python类Panel()的实例源码
def test_nan_filter_panel(self):
dates = pd.date_range('1/1/2000', periods=2, freq='B', tz='UTC')
df = pd.Panel(np.random.randn(2, 2, 2),
major_axis=dates,
items=[4, 5],
minor_axis=['price', 'volume'])
# should be filtered
df.loc[4, dates[0], 'price'] = np.nan
# should not be filtered, should have been ffilled
df.loc[5, dates[1], 'price'] = np.nan
source = DataPanelSource(df)
event = next(source)
self.assertEqual(5, event.sid)
event = next(source)
self.assertEqual(4, event.sid)
self.assertRaises(StopIteration, next, source)
def setUp(self):
self.env = TradingEnvironment()
self.days = self.env.trading_days[:5]
self.panel = pd.Panel({1: pd.DataFrame({
'price': [1, 1, 2, 4, 8], 'volume': [1e9, 1e9, 1e9, 1e9, 0],
'type': [DATASOURCE_TYPE.TRADE,
DATASOURCE_TYPE.TRADE,
DATASOURCE_TYPE.TRADE,
DATASOURCE_TYPE.TRADE,
DATASOURCE_TYPE.CLOSE_POSITION]},
index=self.days)
})
self.no_close_panel = pd.Panel({1: pd.DataFrame({
'price': [1, 1, 2, 4, 8], 'volume': [1e9, 1e9, 1e9, 1e9, 1e9],
'type': [DATASOURCE_TYPE.TRADE,
DATASOURCE_TYPE.TRADE,
DATASOURCE_TYPE.TRADE,
DATASOURCE_TYPE.TRADE,
DATASOURCE_TYPE.TRADE]},
index=self.days)
})
def request_prices(self, current_date, symbols):
"""Implementation of abstract base class method."""
# Reset the bar object for the latest assets requested.
self.bar = pd.Panel(
items=[PriceFields.current_price.value], major_axis=[current_date],
minor_axis=symbols
)
# Issue requests to Interactive Brokers for the latest price data of
# each asset in the list of bars.
for i, s in enumerate(symbols):
c = self.create_contract(s)
self.conn.reqMktData(i, c, "", True)
# Wait a moment.
sleep(0.5)
return self.bar
def init_class_fixtures(cls):
super(WithPanelBarReader, cls).init_class_fixtures()
finder = cls.asset_finder
trading_calendar = get_calendar('NYSE')
items = finder.retrieve_all(finder.sids)
major_axis = (
trading_calendar.sessions_in_range if cls.FREQUENCY == 'daily'
else trading_calendar.minutes_for_sessions_in_range
)(cls.START_DATE, cls.END_DATE)
minor_axis = ['open', 'high', 'low', 'close', 'volume']
shape = tuple(map(len, [items, major_axis, minor_axis]))
raw_data = np.arange(shape[0] * shape[1] * shape[2]).reshape(shape)
cls.panel = pd.Panel(
raw_data,
items=items,
major_axis=major_axis,
minor_axis=minor_axis,
)
cls.reader = PanelBarReader(trading_calendar, cls.panel, cls.FREQUENCY)
def test_duplicate_values(self):
UNIMPORTANT_VALUE = 57
panel = pd.Panel(
UNIMPORTANT_VALUE,
items=['a', 'b', 'b', 'a'],
major_axis=['c'],
minor_axis=['d'],
)
unused = ExplodingObject()
axis_names = ['items', 'major_axis', 'minor_axis']
for axis_order in permutations((0, 1, 2)):
transposed = panel.transpose(*axis_order)
with self.assertRaises(ValueError) as e:
PanelBarReader(unused, transposed, 'daily')
expected = (
"Duplicate entries in Panel.{name}: ['a', 'b'].".format(
name=axis_names[axis_order.index(0)],
)
)
self.assertEqual(str(e.exception), expected)
generic.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 35
收藏 0
点赞 0
评论 0
def swaplevel(self, i, j, axis=0):
"""
Swap levels i and j in a MultiIndex on a particular axis
Parameters
----------
i, j : int, string (can be mixed)
Level of index to be swapped. Can pass level name as string.
Returns
-------
swapped : type of caller (new object)
"""
axis = self._get_axis_number(axis)
result = self.copy()
labels = result._data.axes[axis]
result._data.set_axis(axis, labels.swaplevel(i, j))
return result
# ----------------------------------------------------------------------
# Rename
# TODO: define separate funcs for DataFrame, Series and Panel so you can
# get completion on keyword arguments.
generic.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 26
收藏 0
点赞 0
评论 0
def get(self, key, default=None):
"""
Get item from object for given key (DataFrame column, Panel slice,
etc.). Returns default value if not found.
Parameters
----------
key : object
Returns
-------
value : type of items contained in object
"""
try:
return self[key]
except (KeyError, ValueError, IndexError):
return default
test_resample.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 26
收藏 0
点赞 0
评论 0
def test_resample_panel(self):
rng = date_range('1/1/2000', '6/30/2000')
n = len(rng)
panel = Panel(np.random.randn(3, n, 5),
items=['one', 'two', 'three'],
major_axis=rng,
minor_axis=['a', 'b', 'c', 'd', 'e'])
result = panel.resample('M', axis=1).mean()
def p_apply(panel, f):
result = {}
for item in panel.items:
result[item] = f(panel[item])
return Panel(result, items=panel.items)
expected = p_apply(panel, lambda x: x.resample('M').mean())
tm.assert_panel_equal(result, expected)
panel2 = panel.swapaxes(1, 2)
result = panel2.resample('M', axis=2).mean()
expected = p_apply(panel2, lambda x: x.resample('M', axis=1).mean())
tm.assert_panel_equal(result, expected)
test_resample.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 29
收藏 0
点赞 0
评论 0
def test_resample_panel_numpy(self):
rng = date_range('1/1/2000', '6/30/2000')
n = len(rng)
panel = Panel(np.random.randn(3, n, 5),
items=['one', 'two', 'three'],
major_axis=rng,
minor_axis=['a', 'b', 'c', 'd', 'e'])
result = panel.resample('M', axis=1).apply(lambda x: x.mean(1))
expected = panel.resample('M', axis=1).mean()
tm.assert_panel_equal(result, expected)
panel = panel.swapaxes(1, 2)
result = panel.resample('M', axis=2).apply(lambda x: x.mean(2))
expected = panel.resample('M', axis=2).mean()
tm.assert_panel_equal(result, expected)
test_resample.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 31
收藏 0
点赞 0
评论 0
def test_panel_aggregation(self):
ind = pd.date_range('1/1/2000', periods=100)
data = np.random.randn(2, len(ind), 4)
wp = pd.Panel(data, items=['Item1', 'Item2'], major_axis=ind,
minor_axis=['A', 'B', 'C', 'D'])
tg = TimeGrouper('M', axis=1)
_, grouper, _ = tg._get_grouper(wp)
bingrouped = wp.groupby(grouper)
binagg = bingrouped.mean()
def f(x):
assert (isinstance(x, Panel))
return x.mean(1)
result = bingrouped.agg(f)
tm.assert_panel_equal(result, binagg)
test_base.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 29
收藏 0
点赞 0
评论 0
def test_binary_ops_docs(self):
from pandas import DataFrame, Panel
op_map = {'add': '+',
'sub': '-',
'mul': '*',
'mod': '%',
'pow': '**',
'truediv': '/',
'floordiv': '//'}
for op_name in ['add', 'sub', 'mul', 'mod', 'pow', 'truediv',
'floordiv']:
for klass in [Series, DataFrame, Panel]:
operand1 = klass.__name__.lower()
operand2 = 'other'
op = op_map[op_name]
expected_str = ' '.join([operand1, op, operand2])
self.assertTrue(expected_str in getattr(klass,
op_name).__doc__)
# reverse version of the binary ops
expected_str = ' '.join([operand2, op, operand1])
self.assertTrue(expected_str in getattr(klass, 'r' +
op_name).__doc__)
test_subclass.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 28
收藏 0
点赞 0
评论 0
def test_to_panel_expanddim(self):
# GH 9762
class SubclassedFrame(DataFrame):
@property
def _constructor_expanddim(self):
return SubclassedPanel
class SubclassedPanel(Panel):
pass
index = MultiIndex.from_tuples([(0, 0), (0, 1), (0, 2)])
df = SubclassedFrame({'X': [1, 2, 3], 'Y': [4, 5, 6]}, index=index)
result = df.to_panel()
self.assertTrue(isinstance(result, SubclassedPanel))
expected = SubclassedPanel([[[1, 2, 3]], [[4, 5, 6]]],
items=['X', 'Y'], major_axis=[0],
minor_axis=[0, 1, 2],
dtype='int64')
tm.assert_panel_equal(result, expected)
def getmtm(cf, alpha=0.95):
"""
Calcule la MtM (i.e. : les cash-flows moyens réalisés et un intervalle
de confiance) de chaque actif listé dans `cf`.
Paramètres
----------
cf : pandas.Panel
Cash-flows réalisés pour chaque simulation (`items`), chaque
actif (`major_axis`) et chaque date (`minor_axis`).
alpha : double compris entre 0. et 1.
Quantile à utiliser pour le calcul des intervalles de confiances.
"""
nsims = cf.shape[0]
cumvalues = cf.sum(axis=2)
mean = cumvalues.mean(axis=1)
std = cumvalues.std(axis=1)
res = {}
for key, val in mean.items():
res[key] = mkmcresults(val, std[key], nsims)
return res
def backtest(config_file, day_trade):
cfg = config.Config(config_file)
cfg.day_trade = day_trade
dfs = load_data(config_file)
trender = strategies[cfg.strategy](**cfg.strategy_parameters)
res = []
for df in dfs:
res.append(trender.backtest(data_frame=df))
final_panel = pd.Panel({os.path.basename(p['path']): df for p, df in
zip(cfg.data_path, res)})
profit_series = final_panel.sum(axis=0)['total_profit'].cumsum()
final_panel.to_excel(cfg.output_file)
if cfg.show:
profit_series.plot()
plt.xlabel('Time')
plt.ylabel('Profit')
plt.legend('Profit')
plt.show()
def trade_summary_all(self):
dct = OrderedDict()
panel = pd.Panel(self.trade_summary).swapaxes(0, 1)
for field in panel.keys():
if field.startswith(u"?"):
dct[field] = panel[field].apply(np.sum, axis=0)
for field in [u"??????", u"??????"]:
dct[field] = panel[field].apply(np.max, axis=0)
dct[u"??????"] = dct[u"???"] / dct[u"?????"]
dct[u"??????"] = dct[u"???"] / dct[u"?????"]
dct[u"????????"] = dct[u"?????"] / dct[u"?????"]
dct[u"????????"] = dct[u"?????"] / dct[u"?????"]
dct[u"??????"] = (dct[u"?????"] / dct[u"?????"]).astype(str)
dct[u"?????"] = dct[u"?????"].astype(str)
dct[u"??"] = dct[u"?????"] / dct[u"?????"]
orders = self.order_details
start = orders["??????"].iloc[0]
end = orders["??????"].iloc[-1]
dct[u"????"] = [_workdays(start, end), np.nan, np.nan]
result = pd.DataFrame(data=dct).T
return result
def history(self, symbol=None, frequency=None, fields=None, start=None, end=None, length=None, db=None):
if frequency is None:
frequency = self.frequency
try:
if symbol is None:
symbol = list(self._panels[frequency].items)
result = self._read_panel(symbol, frequency, fields, start, end, length)
if self.match(result, symbol, length):
return result
else:
raise KeyError()
except KeyError:
if symbol is None:
symbol = list(self._panels[self.frequency].items())
if end is None:
end = self.time
result = self._read_db(symbol, frequency, fields, start, end, length, db if db else self._db)
if isinstance(result, pd.Panel) and len(result.minor_axis) == 1:
return result.iloc[:, :, 0]
else:
return result
def match(result, items, length):
if length:
if isinstance(result, (pd.DataFrame, pd.Series)):
if len(result) == length:
return True
else:
return False
elif isinstance(result, pd.Panel):
if (len(items) == len(result.items)) and (len(result.major_axis) == length):
return True
else:
return False
else:
return False
else:
return True
def __init__(self, panel, context=None, side="L", frequency='D'):
"""
Create a PannelDataSupport with a pandas.Panel object.
Panel's inner data can be accessed using method history() and current()
context is a optional parameters, to
Args:
panel(pandas.Panel): Panel where real data stored in
context: default end bar number refer to context.real_bar_num
side(str): "L" or "R", "L" means bar's datetime refer to it's start time
"R" means bar's datetime refer to it's end time
"""
super(PanelDataSupport, self).__init__()
self._panel = panel
self._frequency = frequency
self._others = {}
self._date_index = self._panel.iloc[0].index
self._side = side
self._context = context
def reshape(data):
if isinstance(data, pd.DataFrame):
if len(data) == 1:
return data.iloc[0]
elif len(data.columns) == 1:
return data.iloc[:, 0]
else:
return data
elif isinstance(data, pd.Panel):
if len(data.major_axis) == 1:
return data.iloc[:, 0, :]
elif len(data.minor_axis) == 1:
return data.iloc[:, :, 0]
else:
return data
else:
return data
def getHisdatPanl(codes, days):
"""k?????????
codes: [list]
days: [turple]
return: [pandas.panel]"""
def gen():
start_day , end_day = days
d = {}
for code in codes:
df = getHisdatDf(code, start_day, end_day)
d[code] = df
panel = pd.Panel(d)
return panel
panel = agl.SerialMgr.serialAuto(gen)
if panel is None:
panel = gen()
return panel
def save_panel(self):
"""
Take all supplied data and create the final pandas Panel
:return: pandas Panel
"""
assert 0 not in self.dimensions
assert self.data_dict != {}
if self.dict_key == 'time':
assert len(self.data_dict) == self.dimensions[1]
panel = pd.Panel(self.data_dict, index=self.time_series, major_axis=self.entities, minor_axis=self.variables).transpose(1,0,2) # put entities into items
elif self.dict_key == 'entity':
assert len(self.data_dict) == self.dimensions[0]
panel = pd.Panel(self.data_dict, major_axis=self.time_series, index=self.entities, minor_axis=self.variables)
else:
# not a dict, but a 3D np array
panel = pd.Panel(self.data_dict, major_axis=self.time_series, index=self.entities, minor_axis=self.variables)
print(panel)
self.panel = panel
return panel
def test_numpy_3d():
n, t, k = 11, 7, 3
x = np.random.random((k, t, n))
dh = PanelData(x)
assert_equal(x, dh.values3d)
assert dh.nentity == n
assert dh.nobs == t
assert dh.nvar == k
assert_equal(np.reshape(x.T, (n * t, k)), dh.values2d)
items = ['entity.{0}'.format(i) for i in range(n)]
obs = [i for i in range(t)]
var_names = ['x.{0}'.format(i) for i in range(k)]
expected = pd.Panel(np.reshape(x, (k, t, n)), items=var_names,
major_axis=obs, minor_axis=items)
expected_frame = expected.swapaxes(1, 2).to_frame()
expected_frame.index.levels[0].name = 'entity'
expected_frame.index.levels[1].name = 'time'
assert_frame_equal(dh.dataframe, expected_frame)
def test_categorical_conversion():
t, n = 3, 1000
string = np.random.choice(['a', 'b', 'c'], (t, n))
num = np.random.randn(t, n)
p = pd.Panel({'a': string, 'b': num})
p = p[['a', 'b']]
panel = PanelData(p, convert_dummies=False)
df = panel.dataframe.copy()
df['a'] = pd.Categorical(df['a'])
panel = PanelData(df, convert_dummies=True)
df = panel.dataframe
assert df.shape == (3000, 3)
s = string.T.ravel()
a_locs = np.where(s == 'a')
b_locs = np.where(s == 'b')
c_locs = np.where(s == 'c')
assert np.all(df.loc[:, 'a.b'].values[a_locs] == 0.0)
assert np.all(df.loc[:, 'a.b'].values[b_locs] == 1.0)
assert np.all(df.loc[:, 'a.b'].values[c_locs] == 0.0)
assert np.all(df.loc[:, 'a.c'].values[a_locs] == 0.0)
assert np.all(df.loc[:, 'a.c'].values[b_locs] == 0.0)
assert np.all(df.loc[:, 'a.c'].values[c_locs] == 1.0)
def test_string_conversion():
t, n = 3, 1000
string = np.random.choice(['a', 'b', 'c'], (t, n))
num = np.random.randn(t, n)
p = pd.Panel({'a': string, 'b': num})
p = p[['a', 'b']]
panel = PanelData(p, var_name='OtherEffect')
df = panel.dataframe
assert df.shape == (3000, 3)
s = string.T.ravel()
a_locs = np.where(s == 'a')
b_locs = np.where(s == 'b')
c_locs = np.where(s == 'c')
assert np.all(df.loc[:, 'a.b'].values[a_locs] == 0.0)
assert np.all(df.loc[:, 'a.b'].values[b_locs] == 1.0)
assert np.all(df.loc[:, 'a.b'].values[c_locs] == 0.0)
assert np.all(df.loc[:, 'a.c'].values[a_locs] == 0.0)
assert np.all(df.loc[:, 'a.c'].values[b_locs] == 0.0)
assert np.all(df.loc[:, 'a.c'].values[c_locs] == 1.0)
def test_incorrect_time_axis():
x = np.random.randn(3, 3, 1000)
entities = ['entity.{0}'.format(i) for i in range(1000)]
time = ['time.{0}'.format(i) for i in range(3)]
var_names = ['var.{0}'.format(i) for i in range(3)]
p = pd.Panel(x, items=var_names, major_axis=time, minor_axis=entities)
with pytest.raises(ValueError):
PanelData(p)
df = p.swapaxes(1, 2).swapaxes(0, 1).to_frame()
with pytest.raises(ValueError):
PanelData(df)
time = [1, pd.datetime(1960, 1, 1), 'a']
var_names = ['var.{0}'.format(i) for i in range(3)]
p = pd.Panel(x, items=var_names, major_axis=time, minor_axis=entities)
with pytest.raises(ValueError):
PanelData(p)
df = p.swapaxes(1, 2).swapaxes(0, 1).to_frame()
with pytest.raises(ValueError):
PanelData(df)
def test_first_difference_errors(data):
if isinstance(data.x, pd.Panel):
x = data.x.iloc[:, [0], :]
y = data.y.iloc[[0], :]
else:
x = data.x[:, [0], :]
y = data.y[[0], :]
with pytest.raises(ValueError):
FirstDifferenceOLS(y, x)
if not isinstance(data.x, pd.Panel):
return
x = data.x.copy()
x['Intercept'] = 1.0
with pytest.raises(ValueError):
FirstDifferenceOLS(data.y, x)
def first_difference(self):
"""
Compute first differences of variables
Returns
-------
diffs : PanelData
Differenced values
"""
diffs = self.panel.values
diffs = diffs[:, 1:] - diffs[:, :-1]
diffs = Panel(diffs, items=self.panel.items,
major_axis=self.panel.major_axis[1:],
minor_axis=self.panel.minor_axis)
diffs = diffs.swapaxes(1, 2).to_frame(filter_observations=False)
diffs = diffs.reindex(self._frame.index).dropna(how='any')
return PanelData(diffs)
def mass_contaminant_consumed(node_results):
""" Mass of contaminant consumed, equation from [1].
Parameters
----------
node_results : pd.Panel
A pandas Panel containing node results.
Items axis = attributes, Major axis = times, Minor axis = node names
Mass of contaminant consumed uses 'demand' and quality' attrbutes.
References
----------
[1] EPA, U. S. (2015). Water security toolkit user manual version 1.3.
Technical report, U.S. Environmental Protection Agency
"""
maskD = np.greater(node_results['demand'], 0) # positive demand
deltaT = node_results['quality'].index[1] # this assumes constant timedelta
MC = node_results['demand']*deltaT*node_results['quality']*maskD # m3/s * s * kg/m3 - > kg
return MC
def volume_contaminant_consumed(node_results, detection_limit):
""" Volume of contaminant consumed, equation from [1].
Parameters
----------
node_results : pd.Panel
A pandas Panel containing node results.
Items axis = attributes, Major axis = times, Minor axis = node names
Volume of contaminant consumed uses 'demand' and quality' attrbutes.
detection_limit : float
Contaminant detection limit
References
----------
[1] EPA, U. S. (2015). Water security toolkit user manual version 1.3.
Technical report, U.S. Environmental Protection Agency
"""
maskQ = np.greater(node_results['quality'], detection_limit)
maskD = np.greater(node_results['demand'], 0) # positive demand
deltaT = node_results['quality'].index[1] # this assumes constant timedelta
VC = node_results['demand']*deltaT*maskQ*maskD # m3/s * s * bool - > m3
return VC