def _rawData(self):
'''
Select data from sites within site radius to be returned without stabilization.
'''
storeName = self.meta_data
keyList = self._validStations(storeName)
if len(keyList) == 0:
self._validInit = 0
else:
storeData_fn = DataFetcher.getDataLocation('pbo')
if storeData_fn is None:
print('Dataset not available')
return None
storeData = pd.HDFStore(storeData_fn)
mdyratio = self._mdyratio
smSet_all, smHdr_all = pbo_util.nostab_sys(storeName,storeData,[self._start_time,self._end_time],indx=keyList,mdyratio=mdyratio,
use_progress_bar = self.use_progress_bar)
self._smSet_all = smSet_all
self._smHdr_all = smHdr_all
storeData.close()
if len(self._smSet_all) == 0:
self._validInit = 0
else:
self._validInit = 1
python类HDFStore()的实例源码
def getAntennaLogs():
'''
Get antenna logs.
@return dictionary of data frames containing antenna logs
'''
meta_data = DataFetcher.getStationMetadata()
storeData_fn = DataFetcher.getDataLocation('pbo')
if storeData_fn is None:
print('Dataset not available')
return None
store = pd.HDFStore(storeData_fn, 'r')
logs = store['/antenna_logs']
antenna_dict = dict()
# for label in meta_data.keys():
# try:
# antenna_dict[label] = store['/antenna_log_' + label]
# except:
# pass
for label in meta_data.keys():
if len(logs[logs['Station'] == label]) > 0:
antenna_dict[label] = logs[logs['Station'] == label]['Date']
store.close()
return antenna_dict
def getStationMetadata():
'''
Get station metadata
@return data frame of station metadata
'''
store_location = data_util.getDataLocation('ngl_gps')
store = pd.HDFStore(store_location, 'r')
metadata = store['metadata']
store.close()
metadata.loc[:,'Lon'] = (metadata.loc[:,'Lon'] + 180) % 360 - 180
return metadata
two_sigma_financial_modelling.py 文件源码
项目:PortfolioTimeSeriesAnalysis
作者: MizioAnd
项目源码
文件源码
阅读 29
收藏 0
点赞 0
评论 0
def save_dataframe(self, df):
with pd.HDFStore(''.join([TwoSigmaFinModTools._save_path, 'train_debug', self.timestamp, '.h5']), "w") as train:
train.put("train_debug", df)
two_sigma_financial_modelling.py 文件源码
项目:PortfolioTimeSeriesAnalysis
作者: MizioAnd
项目源码
文件源码
阅读 30
收藏 0
点赞 0
评论 0
def load_dataframe():
dataframe_name = 'train_debug'
# one-hot encoded
# not one-hot
# date_time = '20170613_19h09m40s'
# date_time = '20170613_19h34m31s'
# date_time = '20170614_00h07m32s'
date_time = '20170619_11h47m22s'
with pd.HDFStore(''.join([TwoSigmaFinModTools._save_path, dataframe_name, date_time, '.h5']), 'r') as train:
return train.get(dataframe_name)
def write_hdf(hdf_fname, df, key, header):
"""
Output the contents of *df* and *header* to the HDF file
*hdf_fname* under identifier *key*.
"""
with PD.HDFStore(hdf_fname) as store:
store.put(key, df)
store.get_storer(key).attrs.header = header
return hdf_fname
def read_hdf(hdf_fname, key):
"""
Read contents of HDF file *hdf_fname* associated with *key* and
return a :class:`DataFrame`, header tuple.
"""
if not os.path.isfile(hdf_fname):
raise ValueError('file {} does not exist'.format(hdf_fname))
with PD.HDFStore(hdf_fname) as store:
df = store.get(key)
header = store.get_storer(key).attrs.header
return df, header
def setUp(self):
self.test_output_dir = mkdtemp(prefix="feagen_test_output_")
pandas_hdf_path = join(self.test_output_dir, "pandas.h5")
self.hdf_store = pd.HDFStore(pandas_hdf_path)
def __init__(self, hdf_path):
hdf_dir = os.path.dirname(hdf_path)
if hdf_dir != '':
mkdir_p(hdf_dir)
self.hdf_store = pd.HDFStore(hdf_path)
def _get_head_of_matrix(self):
try:
hdf = pandas.HDFStore(self.matrix_path)
key = hdf.keys()[0]
head_of_matrix = hdf.select(key, start=0, stop=1)
head_of_matrix.set_index(self.metadata['indices'], inplace=True)
self._head_of_matrix = head_of_matrix
except pandas.error.EmptyDataError:
self._head_of_matrix = None
def _write_hdf_to_buffer(self, df):
with pandas.HDFStore(
"data.h5",
mode="w",
driver="H5FD_CORE",
driver_core_backing_store=0) as out:
out["matrix"] = df
return out._handle.get_file_image()
def export_data_table(self, table, end_date, label, feature_names):
""" Save a data set as an HDF table for later reuse.
:param table: the DataFrame to save
:type table: pandas DataFrame
:param end_date: end of labeling period
:type end_date: a date format of some kind
:param label: name of the column containing labels
:type label: str
:param feature_names: names of the columns containing features
:type feature_names: list
:return: the prefix of the HDF filename
:rtype: str
"""
if type(end_date) == np.datetime64:
end_date = np.datetime_as_string(end_date,
timezone = 'local')[:10]
else:
end_date = end_date.to_datetime().date().isoformat()
file_name = self.export_metadata(end_date, label, feature_names)
file_path = '{0}/{1}.h5'.format(self.results_directory, file_name)
if not os.path.exists(file_path):
store = pd.HDFStore(file_path)
store['df'] = table
store.close()
self.upload_file_to_s3('{0}.h5'.format(file_name), 'hdf_bucket_name',
file_path)
print("uploaded hdf to s3")
return(file_name)
def __init__(self, hdfstore, tablename):
if isinstance(hdfstore, pd.HDFStore):
self.store = hdfstore
else:
self.store = pd.HDFStore(hdfstore, "r")
self.tablename = tablename
def __init__(self, store, tablename, vecnamecol, lengthcol):
if isinstance(store, pd.HDFStore):
self.store = store
else:
self.store = pd.HDFStore(store, "r")
self.tablename = tablename
self.vecnamecol = vecnamecol
self.lengthcol = lengthcol
def __init__(self, storesdict):
self.stores = {}
for key in storesdict:
if isinstance(storesdict[key], pd.HDFStore):
self.stores[key] = storesdict[key]
else:
self.stores[key] = pd.HDFStore(storesdict[key], "r")
self.indices = {}
def load_data(self, uid: str, prefix: str):
"""
Load data from a specified group (prefix) - gps or gravity, from the projects HDF5 store.
:param str uid: Datafile Unique Identifier
:param str prefix: Data type prefix [gps or gravity]
:return:
"""
with HDFStore(str(self.hdf_path)) as store:
try:
data = store.get('{}/{}'.format(prefix, uid))
except KeyError:
return None
else:
return data
def load(args, filename):
return pandas.HDFStore(filename)
def query_from_to(symbol,Start,today):
store = pd.HDFStore(Constants.StockHDF)
if not (symbol in store.keys()):
return None
store[]
def get_taylor_table():
store = pd.HDFStore(Constants.DatabaseTaylorCP)
StoredDF = pd.DataFrame()
for key in store.keys():
DF = store[key].tail(1)
DF['SymbolID'] = key
StoredDF = pd.concat([StoredDF, DF[['SymbolID', 'MO', 'MLo', 'MHi','TaylorDay']]], axis=0)
store.close()
return StoredDF.to_html()
def calc_oz_series_pandas(symbol, numWeeksBack=20, averageTf='W'):
timeFrameMap={'W':(1*numWeeksBack),
'3M':(numWeeksBack*15),
'Q':(numWeeksBack*15),
'M':(numWeeksBack*4)}
print(Constants.StockHDF)
store = pd.HDFStore(Constants.StockHDF)
symbolKey = symbol + '_'+ averageTf
today = datetime.datetime.now() # - datetime.timedelta(days=1)
day_of_week = today.weekday()
weekStart = today - datetime.timedelta(days=day_of_week + 1)
if not (symbolKey in store.keys()):
print('Symbol:'+symbol)
weekly_DF = getWeeklyDF(timeFrameMap[averageTf], symbol)
#print(weekly_DF)
newDF=calc_OZ_pandas(weekly_DF,averageTf=averageTf)
store[symbolKey] = newDF
store.flush()
#print('READ')
lenStore = len(store[symbolKey]) - 1
if not (store[symbolKey].index[lenStore].date() == weekStart.date()):
weekly_DF = getWeeklyDF(timeFrameMap[averageTf], symbol)
newDF=calc_OZ_pandas(weekly_DF,averageTf=averageTf)
store[symbolKey] = newDF
store.flush()
return store[symbolKey]