def minutes_for_days_in_range(self, start, end):
"""
Get all market minutes for the days between start and end, inclusive.
"""
start_date = self.normalize_date(start)
end_date = self.normalize_date(end)
all_minutes = []
for day in self.days_in_range(start_date, end_date):
day_minutes = self.market_minutes_for_day(day)
all_minutes.append(day_minutes)
# Concatenate all minutes and truncate minutes before start/after end.
return pd.DatetimeIndex(
np.concatenate(all_minutes), copy=False, tz='UTC',
)
python类DatetimeIndex()的实例源码
def get_early_closes(start, end):
# TSX closed at 1:00 PM on december 24th.
start = canonicalize_datetime(start)
end = canonicalize_datetime(end)
early_close_rules = []
early_close_rules.append(quarta_cinzas)
early_close_ruleset = rrule.rruleset()
for rule in early_close_rules:
early_close_ruleset.rrule(rule)
early_closes = early_close_ruleset.between(start, end, inc=True)
early_closes.sort()
return pd.DatetimeIndex(early_closes)
def getData():
filename = "300333.csv"
# df=pd.read_csv(filename,index_col='date')
df = pd.read_csv(filename)
print df['date'].values
#new_date= datetime.datetime(df['date'].values)
#
# not work
#date_list=[datetime.datetime.strptime(x,"%Y-%m-%d")for x in df['date'].values]
#??????????????
pd_date = pd.DatetimeIndex(df['date'].values)
#print date_list
df['date'] = pd_date
print df
new_df = df.set_index('date')
print "*" * 20
print new_df.index
print new_df.ix[0]
def configure_timeindex(self):
"""
"""
try:
ormclass = self._mapped['TempResolution']
tr = self.session.query(ormclass).filter(
ormclass.temp_id == self.temp_id).one()
except (KeyError, NoResultFound):
print('temp_id %s does not exist.' % self.temp_id)
timeindex = pd.DatetimeIndex(start=tr.start_time,
periods=tr.timesteps,
freq=tr.resolution)
self.timeindex = timeindex[self.start_snapshot - 1: self.end_snapshot]
def _validated_tuples_to_dataframe(self, validated_tuples):
if validated_tuples == []:
dts, values, estimateds = [], [], []
else:
dts, values, estimateds = zip(*validated_tuples)
if self.parse_dates:
dts = [dateutil.parser.parse(dt) for dt in dts]
index = pd.DatetimeIndex(dts)
if index.shape[0] > 0:
index = index.tz_convert(pytz.UTC)
df = pd.DataFrame(
{"value": values, "estimated": estimateds},
index=index,
columns=["value", "estimated"],
)
df.value = df.value.astype(float)
df.estimated = df.estimated.astype(bool)
return df
def _create_daily_stats(self, perfs):
# create daily and cumulative stats dataframe
daily_perfs = []
# TODO: the loop here could overwrite expected properties
# of daily_perf. Could potentially raise or log a
# warning.
for perf in perfs:
if 'daily_perf' in perf:
perf['daily_perf'].update(
perf['daily_perf'].pop('recorded_vars')
)
perf['daily_perf'].update(perf['cumulative_risk_metrics'])
daily_perfs.append(perf['daily_perf'])
else:
self.risk_report = perf
daily_dts = pd.DatetimeIndex(
[p['period_close'] for p in daily_perfs], tz='UTC'
)
daily_stats = pd.DataFrame(daily_perfs, index=daily_dts)
return daily_stats
def minutes_for_sessions_in_range(self, start_session_label,
end_session_label):
"""
Returns all the minutes for all the sessions from the given start
session label to the given end session label, inclusive.
Parameters
----------
start_session_label: pd.Timestamp
The label of the first session in the range.
end_session_label: pd.Timestamp
The label of the last session in the range.
Returns
-------
pd.DatetimeIndex
The minutes in the desired range.
"""
first_minute, _ = self.open_and_close_for_session(start_session_label)
_, last_minute = self.open_and_close_for_session(end_session_label)
return self.minutes_in_range(first_minute, last_minute)
def _special_dates(self, calendars, ad_hoc_dates, start_date, end_date):
"""
Union an iterable of pairs of the form (time, calendar)
and an iterable of pairs of the form (time, [dates])
(This is shared logic for computing special opens and special closes.)
"""
_dates = DatetimeIndex([], tz='UTC').union_many(
[
holidays_at_time(calendar, start_date, end_date, time_,
self.tz)
for time_, calendar in calendars
] + [
days_at_time(datetimes, time_, self.tz)
for time_, datetimes in ad_hoc_dates
]
)
return _dates[(_dates >= start_date) & (_dates <= end_date)]
def make_equity_daily_bar_data(cls):
days = cls.trading_calendar.sessions_in_range(
pd.Timestamp('2006-01-03', tz='UTC'),
pd.Timestamp('2006-01-09', tz='UTC')
)
return trades_by_sid_to_dfs(
{
0: factory.create_trade_history(
0,
np.arange(10.0, 10.0 + len(days), 1.0),
[10000] * len(days),
timedelta(days=1),
cls.sim_params,
cls.trading_calendar),
},
index=pd.DatetimeIndex(days),
)
def build_dataframe(self):
if not self.values.exists():
return pd.DataFrame()
# Am I really a programmer or just a lego assembler?
# Pandas makes my life at least 20 times easier.
df = pd.DataFrame.from_records(self.values, index=self.index_column)
# make the columns and labels prettier
if self.rename_columns:
df = df.rename(columns=self.column_mapping)
df.index.name = TIME_COLUMN_NAME
try:
df.index = df.index.tz_convert(self.user.pytz_timezone)
except AttributeError:
# if attribute-error means the index is just a regular Index and
# that only dates (and not time) was passed
df.index = pd.DatetimeIndex(df.index, tz=self.user.pytz_timezone)
# cast it as numerics if possible, otherwise if we're dealing with strings, ignore
df = df.apply(pd.to_numeric, errors='ignore')
return df
def round_timestamp_to_sleep_date(timeseries):
"""
Not my proudest function ... this isn't as efficient as it could be, but struggling
with some pandas syntax to find the perfect pandas one-line
This can be much more performant, but need time to sit down and figure it out
"""
sleep_dates = []
for value in timeseries:
if value.hour < SLEEP_CUTOFF_TIME:
result = value - pd.DateOffset(days=1)
else:
result = value
sleep_dates.append(result)
index = pd.DatetimeIndex(sleep_dates)
return index
Top_Trending.py 文件源码
项目:Trending-Places-in-OpenStreetMap
作者: geometalab
项目源码
文件源码
阅读 29
收藏 0
点赞 0
评论 0
def resample_missing_values(df, date, period):
df.set_index('date', inplace=True)
# For duplicate values for same coordinates, the maximum value is chosen rather than average.
df = (df.groupby(['lat', 'lon', 'countries'])).resample('D').max()
df.reset_index(['lat', 'lon', 'countries'], drop=True, inplace=True)
df['count'].fillna(0, inplace=True)
df.groupby(['lat', 'lon', 'countries']).fillna(method='ffill', inplace=True)
df.groupby(['lat', 'lon', 'countries']).fillna(method='bfill', inplace=True)
df.reset_index(inplace=True)
idx = pd.DatetimeIndex(start=date - dt.timedelta(days=(period - 1)), end=date, freq='D')
new_df = pd.DataFrame()
for index, group in df.groupby(['lat', 'lon', 'countries']):
group = expand_date_range(group, idx)
new_df = pd.concat([new_df, group])
new_df.rename(columns={'index': 'date'}, inplace=True)
return new_df
feature_engineering.py 文件源码
项目:smart-battery-for-smart-energy-usage
作者: AnatolyPavlov
项目源码
文件源码
阅读 37
收藏 0
点赞 0
评论 0
def transform(self, df):
temp = pd.DatetimeIndex(df.index)
df['weekday'] = temp.weekday
df_weekdays = df[df['weekday'] <= 4].drop('weekday', axis=1)
weekdays = extract_days(df_weekdays)
df_weekends = df[df['weekday'] > 4].drop('weekday', axis=1)
weekends = extract_days(df_weekends)
print 'weekdays: {}, weekends: {}'.format(len(weekdays), len(weekends))
print
part_of_week = self.environment_params['part_of_week'].values[0]
#
if part_of_week == 'weekdays':
print
print 'Selected weekdays only'
return df_weekdays
elif part_of_week == 'weekends':
print
print 'Selected weekends only'
return df_weekends
else:
print
print 'Selected all days of week'
return df.drop('weekday', axis=1)
def __setattr__(self, key, value):
if key in ['data', 'keys', 'index']:
self.__dict__[key] = value
else:
if type(value) == Column:
if key in self.keys:
self.data[self.keys.index(key)] = value.values
self.index[self.keys.index(key), :] = value.index
else:
self.add_column(key, value.values, value.index)
elif type(value) == np.ndarray:
if key in self.keys:
self.data[self.keys.index(key)] = value
else:
self.add_column(key, value)
elif type(value) == pd.DatetimeIndex:
if key in self.keys:
self.data[self.keys.index(key)] = value.values
else:
self.add_column(key, value)
def plot(self, sort_csv_file, forecast_csv_file, save_fig_file):
sort_df = pd.read_csv(sort_csv_file)
sort_df['date'] = pd.to_datetime(sort_df['date'], format='%Y-%m-%d')
sort_df = sort_df.set_index(pd.DatetimeIndex(sort_df['date']))
forecast_df = pd.read_csv(forecast_csv_file, header=None,
names=['date', 'aver'])
forecast_df['date'] = pd.to_datetime(forecast_df['date'], format='%Y-%m-%d')
forecast_df = forecast_df.set_index(pd.DatetimeIndex(forecast_df['date']))
forecast_df['aver'].plot(figsize=(20, 20), c='r', linewidth=3.0)
ax = sort_df['aver'].plot(figsize=(20, 20), linewidth=3.0)
plt.ylabel('price')
plt.xlabel('date')
ax.set_ylim(sort_df['aver'].min() * 0.8, sort_df['aver'].max() * 1.2)
plt.savefig(save_fig_file)
plt.cla()
plt.clf()
plt.close()
def minutes_for_days_in_range(self, start, end):
"""
Get all market minutes for the days between start and end, inclusive.
"""
start_date = self.normalize_date(start)
end_date = self.normalize_date(end)
all_minutes = []
for day in self.days_in_range(start_date, end_date):
day_minutes = self.market_minutes_for_day(day)
all_minutes.append(day_minutes)
# Concatenate all minutes and truncate minutes before start/after end.
return pd.DatetimeIndex(
np.concatenate(all_minutes), copy=False, tz='UTC',
)
def _hourly_range(self, init_date, time_frame):
"""
Returns DatetimeIndex trading week/s in hours.
"""
utcnow = datetime.utcnow()
tr_wk_str, tr_wk_end = self.get_trading_week(init_date)
if tr_wk_end > utcnow:
tr_wk_end = utcnow.replace(
minute=00,second=00, microsecond=00)
freq, interval_type, delta = self._data_frequency(time_frame)
dth = pd.date_range(str(tr_wk_str), str(tr_wk_end), freq=freq)
while (len(dth) % (300*int(time_frame[1:])) == 0) == False:
tr_wk_str = tr_wk_end + timedelta(**{interval_type: delta})
if tr_wk_str < utcnow:
tr_wk_str, tr_wk_end = self.get_trading_week(tr_wk_str)
if tr_wk_end > utcnow:
tr_wk_end = utcnow.replace(
minute=00,second=00, microsecond=00)
tr_wk_end += timedelta(hours=1)
dth = dth.append(
pd.date_range(str(tr_wk_str), str(tr_wk_end), freq=freq))
else:
break
return dth
def _daily_range(self, daily):
"""
Returns DatetimeIndex for daily values.
"""
max_bars = 299
utcnow = datetime.utcnow()
dtd = pd.DatetimeIndex([])
while daily < utcnow:
tr_wk_str, tr_wk_end = self.get_trading_week(daily)
hour = int(str(tr_wk_str.time())[:2])
daily += timedelta(days=1)
daily = daily.replace(hour=hour)
if daily >= tr_wk_end:
daily, tr_wk_end = self.get_trading_week(daily)
dtd = dtd.append(
pd.date_range(str(daily), str(daily)))
return dtd
test_timeseries.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 34
收藏 0
点赞 0
评论 0
def test_to_datetime_tz_pytz(self):
# xref 8260
tm._skip_if_no_pytz()
import pytz
us_eastern = pytz.timezone('US/Eastern')
arr = np.array([us_eastern.localize(datetime(year=2000, month=1, day=1,
hour=3, minute=0)),
us_eastern.localize(datetime(year=2000, month=6, day=1,
hour=3, minute=0))],
dtype=object)
result = pd.to_datetime(arr, utc=True)
expected = DatetimeIndex(['2000-01-01 08:00:00+00:00',
'2000-06-01 07:00:00+00:00'],
dtype='datetime64[ns, UTC]', freq=None)
tm.assert_index_equal(result, expected)
test_timeseries.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 49
收藏 0
点赞 0
评论 0
def test_normalize(self):
rng = date_range('1/1/2000 9:30', periods=10, freq='D')
result = rng.normalize()
expected = date_range('1/1/2000', periods=10, freq='D')
self.assertTrue(result.equals(expected))
rng_ns = pd.DatetimeIndex(np.array([1380585623454345752,
1380585612343234312]).astype(
"datetime64[ns]"))
rng_ns_normalized = rng_ns.normalize()
expected = pd.DatetimeIndex(np.array([1380585600000000000,
1380585600000000000]).astype(
"datetime64[ns]"))
self.assertTrue(rng_ns_normalized.equals(expected))
self.assertTrue(result.is_normalized)
self.assertFalse(rng.is_normalized)
test_timeseries.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 26
收藏 0
点赞 0
评论 0
def test_append_concat(self):
rng = date_range('5/8/2012 1:45', periods=10, freq='5T')
ts = Series(np.random.randn(len(rng)), rng)
df = DataFrame(np.random.randn(len(rng), 4), index=rng)
result = ts.append(ts)
result_df = df.append(df)
ex_index = DatetimeIndex(np.tile(rng.values, 2))
self.assertTrue(result.index.equals(ex_index))
self.assertTrue(result_df.index.equals(ex_index))
appended = rng.append(rng)
self.assertTrue(appended.equals(ex_index))
appended = rng.append([rng, rng])
ex_index = DatetimeIndex(np.tile(rng.values, 3))
self.assertTrue(appended.equals(ex_index))
# different index names
rng1 = rng.copy()
rng2 = rng.copy()
rng1.name = 'foo'
rng2.name = 'bar'
self.assertEqual(rng1.append(rng1).name, 'foo')
self.assertIsNone(rng1.append(rng2).name)
test_timeseries.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 28
收藏 0
点赞 0
评论 0
def test_sort_values(self):
idx = DatetimeIndex(['2000-01-04', '2000-01-01', '2000-01-02'])
ordered = idx.sort_values()
self.assertTrue(ordered.is_monotonic)
ordered = idx.sort_values(ascending=False)
self.assertTrue(ordered[::-1].is_monotonic)
ordered, dexer = idx.sort_values(return_indexer=True)
self.assertTrue(ordered.is_monotonic)
self.assert_numpy_array_equal(dexer, [1, 2, 0])
ordered, dexer = idx.sort_values(return_indexer=True, ascending=False)
self.assertTrue(ordered[::-1].is_monotonic)
self.assert_numpy_array_equal(dexer, [0, 2, 1])
test_timeseries.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 31
收藏 0
点赞 0
评论 0
def test_take(self):
dates = [datetime(2010, 1, 1, 14), datetime(2010, 1, 1, 15),
datetime(2010, 1, 1, 17), datetime(2010, 1, 1, 21)]
for tz in [None, 'US/Eastern', 'Asia/Tokyo']:
idx = DatetimeIndex(start='2010-01-01 09:00',
end='2010-02-01 09:00', freq='H', tz=tz,
name='idx')
expected = DatetimeIndex(dates, freq=None, name='idx', tz=tz)
taken1 = idx.take([5, 6, 8, 12])
taken2 = idx[[5, 6, 8, 12]]
for taken in [taken1, taken2]:
self.assertTrue(taken.equals(expected))
tm.assertIsInstance(taken, DatetimeIndex)
self.assertIsNone(taken.freq)
self.assertEqual(taken.tz, expected.tz)
self.assertEqual(taken.name, expected.name)
test_timeseries.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 31
收藏 0
点赞 0
评论 0
def test_dayfirst(self):
# GH 5917
arr = ['10/02/2014', '11/02/2014', '12/02/2014']
expected = DatetimeIndex([datetime(2014, 2, 10), datetime(2014, 2, 11),
datetime(2014, 2, 12)])
idx1 = DatetimeIndex(arr, dayfirst=True)
idx2 = DatetimeIndex(np.array(arr), dayfirst=True)
idx3 = to_datetime(arr, dayfirst=True)
idx4 = to_datetime(np.array(arr), dayfirst=True)
idx5 = DatetimeIndex(Index(arr), dayfirst=True)
idx6 = DatetimeIndex(Series(arr), dayfirst=True)
self.assertTrue(expected.equals(idx1))
self.assertTrue(expected.equals(idx2))
self.assertTrue(expected.equals(idx3))
self.assertTrue(expected.equals(idx4))
self.assertTrue(expected.equals(idx5))
self.assertTrue(expected.equals(idx6))
test_timeseries.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 29
收藏 0
点赞 0
评论 0
def test_slice_year(self):
dti = DatetimeIndex(freq='B', start=datetime(2005, 1, 1), periods=500)
s = Series(np.arange(len(dti)), index=dti)
result = s['2005']
expected = s[s.index.year == 2005]
assert_series_equal(result, expected)
df = DataFrame(np.random.rand(len(dti), 5), index=dti)
result = df.ix['2005']
expected = df[df.index.year == 2005]
assert_frame_equal(result, expected)
rng = date_range('1/1/2000', '1/1/2010')
result = rng.get_loc('2009')
expected = slice(3288, 3653)
self.assertEqual(result, expected)
test_timeseries.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 27
收藏 0
点赞 0
评论 0
def test_partial_slice(self):
rng = DatetimeIndex(freq='D', start=datetime(2005, 1, 1), periods=500)
s = Series(np.arange(len(rng)), index=rng)
result = s['2005-05':'2006-02']
expected = s['20050501':'20060228']
assert_series_equal(result, expected)
result = s['2005-05':]
expected = s['20050501':]
assert_series_equal(result, expected)
result = s[:'2006-02']
expected = s[:'20060228']
assert_series_equal(result, expected)
result = s['2005-1-1']
self.assertEqual(result, s.iloc[0])
self.assertRaises(Exception, s.__getitem__, '2004-12-31')
test_timeseries.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 26
收藏 0
点赞 0
评论 0
def test_timedelta(self):
# this is valid too
index = date_range('1/1/2000', periods=50, freq='B')
shifted = index + timedelta(1)
back = shifted + timedelta(-1)
self.assertTrue(tm.equalContents(index, back))
self.assertEqual(shifted.freq, index.freq)
self.assertEqual(shifted.freq, back.freq)
result = index - timedelta(1)
expected = index + timedelta(-1)
self.assertTrue(result.equals(expected))
# GH4134, buggy with timedeltas
rng = date_range('2013', '2014')
s = Series(rng)
result1 = rng - pd.offsets.Hour(1)
result2 = DatetimeIndex(s - np.timedelta64(100000000))
result3 = rng - np.timedelta64(100000000)
result4 = DatetimeIndex(s - pd.offsets.Hour(1))
self.assertTrue(result1.equals(result4))
self.assertTrue(result2.equals(result3))
def to_data_frame(self):
index = pd.DatetimeIndex([self.timestamp])
df = pd.DataFrame(self.to_dict(), index=index, columns=self.headers())
df.index = df['timestamp']
df.drop('timestamp', 1, inplace=True)
return df
def frame_to_series(self, field, frame, columns=None):
"""
Convert a frame with a DatetimeIndex and sid columns into a series with
a sid index, using the aggregator defined by the given field.
"""
if isinstance(frame, pd.DataFrame):
columns = frame.columns
frame = frame.values
if not len(frame):
return pd.Series(
data=(0 if field == 'volume' else np.nan),
index=columns,
).values
if field in ['price', 'close']:
# shortcircuit for full last row
vals = frame[-1]
if np.all(~np.isnan(vals)):
return vals
return ffill(frame)[-1]
elif field == 'open':
return bfill(frame)[0]
elif field == 'volume':
return np.nansum(frame, axis=0)
elif field == 'high':
return np.nanmax(frame, axis=0)
elif field == 'low':
return np.nanmin(frame, axis=0)
else:
raise ValueError("Unknown field {}".format(field))
def fast_append_date_to_index(index, timestamp):
"""
Append a timestamp to a DatetimeIndex. DatetimeIndex.append does not
appear to work.
"""
return pd.DatetimeIndex(
np.hstack(
[
index.values,
[timestamp.asm8],
]
),
tz='UTC',
)