def get_matrix_and_metadata(matrix_path, metadata_path):
"""Retrieve a matrix in hdf format and
metadata about the matrix in yaml format
Returns: (tuple) matrix, metadata
"""
matrix = pandas.read_hdf(matrix_path)
with open(metadata_path) as f:
metadata = yaml.load(f)
return matrix, metadata
python类read_hdf()的实例源码
def read_smiles_data(filename):
import pandas as pd
h5f = pd.read_hdf(filename, 'table')
data = h5f['structure'][:]
# import gzip
# data = [line.split()[0].strip() for line in gzip.open(filename) if line]
return data
baseline.py 文件源码
项目:DREAM_invivo_tf_binding_prediction_challenge_baseline
作者: nboley
项目源码
文件源码
阅读 35
收藏 0
点赞 0
评论 0
def __init__(self,
labels_fname,
regions_fname=None,
max_n_rows=None,
load_cached=True):
self.labels_fname = labels_fname
self.regions_fname = regions_fname
self.max_n_rows = max_n_rows
self._hash = None
self.load_cached = load_cached
# extract the sample names from the header
#assert labels_fname.endswith("labels.tsv.gz"), \
# "Unrecognized labels filename '%s'" % labels_fname
self._init_header_data(labels_fname)
# extract the factor from the filename
self.factor = os.path.basename(labels_fname).split('.')[0]
# if we want to use a cached version...
if self.load_cached is True:
try:
print "Loading '%s'" % self.cached_fname
self.h5store = h5py.File(self.cached_fname)
self.data = pd.read_hdf(self.cached_fname, 'data')
except KeyError:
self.data = self._build_dataframe()
self.data.to_hdf(self.cached_fname, 'data')
print self.h5store
else:
self.data = self._build_dataframe()
return
baseline.py 文件源码
项目:DREAM_invivo_tf_binding_prediction_challenge_baseline
作者: nboley
项目源码
文件源码
阅读 25
收藏 0
点赞 0
评论 0
def load_or_build_motif_scores(self, fasta_fname):
try:
self.motif_scores = pd.read_hdf(self.cached_fname, 'motif_scores')
self.motif_scores.index = self.data.index
except KeyError:
self.motif_scores = self.build_motif_scores(fasta_fname)
self.motif_scores.to_hdf(self.cached_fname, 'motif_scores')
return self.motif_scores
baseline.py 文件源码
项目:DREAM_invivo_tf_binding_prediction_challenge_baseline
作者: nboley
项目源码
文件源码
阅读 29
收藏 0
点赞 0
评论 0
def load_or_build_dnase_fc_scores(self):
try:
self.dnase_fc_scores = pd.read_hdf(self.cached_fname, 'dnase_scores')
except KeyError:
self.dnase_fc_scores = self.build_dnase_fc_scores()
self.dnase_fc_scores.to_hdf(self.cached_fname, 'dnase_scores')
except IOError:
self.dnase_fc_scores = self.build_dnase_fc_scores()
return self.dnase_fc_scores
def __init__(self, path):
self._panel = pd.read_hdf(path)
def main(batch_size=10000):
posts_df = pd.read_hdf('nw_posts.hdf5', 'posts')
index_posts_in_elastic(posts_df, batch_size=batch_size)
def load_wv_pandas(fname):
return pd.read_hdf(fname, 'data')
def get_availableExchanges():
SymbolsDF = pd.read_hdf(Constants.InputFolder + 'Symbols.hdf', 'Symbols')
return SymbolsDF.EXCHANGE.drop_duplicates().values
def get_availableSymbols(SymbolFilter=None):
SymbolsDF = pd.read_hdf(Constants.InputFolder+'Symbols.hdf', 'Symbols')
if SymbolFilter == None :
DFNew = SymbolsDF.loc[lambda DF: DF.EXCHANGE == 'NYSE', :]
return DFNew.loc[DFNew.SYMBOL.str.match('[A-Z]{1,4}$'), :].SYMBOL.values
if not ('Exchange' in SymbolFilter.keys()):
DFNew = SymbolsDF.loc[lambda DF: DF.EXCHANGE == 'NYSE', :]
return DFNew.loc[DFNew.SYMBOL.str.match('[A-Z]{1,4}$'), :].SYMBOL.values
DFNew = SymbolsDF.loc[lambda DF: DF.EXCHANGE == SymbolFilter['Exchange'], :]
return DFNew.loc[DFNew.SYMBOL.str.match('[A-Z]{1,4}$'), :].SYMBOL.values
def get_availableSymbols(SymbolFilter=None):
DF=pd.read_hdf(Constants.InputFolder+'Symbols.hdf', 'OANDA')
return DF.instrument.values
def main():
DF = pd.read_hdf('/home/lc1bfrbl/Database/Oanda.hdf', 'WTICO_USD_H1')
TTT=CalcTaylorCycle(DF)
Index = (TTT.index.year == 2017) & (TTT.index.month == 6)
TTT[Index].MO.plot()
TTT[Index].MLo.plot()
TTT[Index].MHi.plot()
TTT[Index].High.plot()
TTT[Index].Low.plot()
def save_table(self, code, date):
TR_REQ_TIME_INTERVAL = 4
time.sleep(TR_REQ_TIME_INTERVAL)
data_81 = self.wrapper.get_data_opt10081(code, date)
time.sleep(TR_REQ_TIME_INTERVAL)
data_86 = self.wrapper.get_data_opt10086(code, date)
col_86 = ['???', '???', '??(??)', '???', '??', '??', '????', '???', '????',
'???', '????', '????', '????', '?????', '?????', '?????', '?????']
data = pd.concat([data_81, data_86.loc[:, col_86]], axis=1)
#con = sqlite3.connect("../data/stock.db")
try:
data = data.loc[data.index > int(self.kiwoom.start_date.strftime("%Y%m%d"))]
#orig_data = pd.read_sql("SELECT * FROM '%s'" % code, con, index_col='??').sort_index()
orig_data = pd.read_hdf("../data/hdf/%s.hdf" % code, 'day').sort_index()
end_date = orig_data.index[-1]
orig_data = orig_data.loc[orig_data.index < end_date]
data = data.loc[data.index >= end_date]
data = pd.concat([orig_data, data], axis=0)
except (FileNotFoundError, IndexError) as e:
print(e)
pass
finally:
data.index.name = '??'
if len(data) != 0:
#data.to_sql(code, con, if_exists='replace')
data.to_hdf('../data/hdf/%s.hdf'%code, 'day', mode='w')
def read_h5():
code_list = glob.glob('../data/stock/*.h5')
for code in code_list[:10]:
data = pd.read_hdf(code, 'table').sort_index()
data = data.loc[data.index >= str(20160101)]
data = data.loc[data.index <= str(20160630)]
print(data.head())
def superReadFile(filepath,**kwargs):
"""
Uses pandas.read_excel (on excel files) and returns a dataframe of the first sheet (unless sheet is specified in kwargs)
Uses superReadText (on .txt,.tsv, or .csv files) and returns a dataframe of the data.
One function to read almost all types of data files.
"""
if isinstance(filepath, pd.DataFrame):
return filepath
ext = os.path.splitext(filepath)[1].lower()
if ext in ['.xlsx', '.xls']:
kwargs.pop('dtype', None)
return pd.read_excel(filepath,**kwargs)
elif ext in ['.txt','.tsv','.csv']:
return superReadText(filepath, **kwargs)
elif ext in ['.gz', '.bz2', '.zip', 'xz']:
return superReadCSV(filepath, **kwargs)
elif ext in ['.h5']:
return pd.read_hdf(filepath)
else:
raise NotImplementedError("Unable to read '{}' files".format(ext))
test_pytables.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 37
收藏 0
点赞 0
评论 0
def test_conv_read_write(self):
path = create_tempfile(self.path)
try:
def roundtrip(key, obj, **kwargs):
obj.to_hdf(path, key, **kwargs)
return read_hdf(path, key)
o = tm.makeTimeSeries()
assert_series_equal(o, roundtrip('series', o))
o = tm.makeStringSeries()
assert_series_equal(o, roundtrip('string_series', o))
o = tm.makeDataFrame()
assert_frame_equal(o, roundtrip('frame', o))
o = tm.makePanel()
assert_panel_equal(o, roundtrip('panel', o))
# table
df = DataFrame(dict(A=lrange(5), B=lrange(5)))
df.to_hdf(path, 'table', append=True)
result = read_hdf(path, 'table', where=['index>2'])
assert_frame_equal(df[df.index > 2], result)
finally:
safe_remove(path)
test_pytables.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 33
收藏 0
点赞 0
评论 0
def test_round_trip_equals(self):
# GH 9330
df = DataFrame({"B": [1, 2], "A": ["x", "y"]})
with ensure_clean_path(self.path) as path:
df.to_hdf(path, 'df', format='table')
other = read_hdf(path, 'df')
tm.assert_frame_equal(df, other)
self.assertTrue(df.equals(other))
self.assertTrue(other.equals(df))
test_pytables.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 35
收藏 0
点赞 0
评论 0
def test_to_hdf_with_object_column_names(self):
# GH9057
# Writing HDF5 table format should only work for string-like
# column types
types_should_fail = [tm.makeIntIndex, tm.makeFloatIndex,
tm.makeDateIndex, tm.makeTimedeltaIndex,
tm.makePeriodIndex]
types_should_run = [tm.makeStringIndex, tm.makeCategoricalIndex]
if compat.PY3:
types_should_run.append(tm.makeUnicodeIndex)
else:
types_should_fail.append(tm.makeUnicodeIndex)
for index in types_should_fail:
df = DataFrame(np.random.randn(10, 2), columns=index(2))
with ensure_clean_path(self.path) as path:
with self.assertRaises(
ValueError, msg=("cannot have non-object label "
"DataIndexableCol")):
df.to_hdf(path, 'df', format='table', data_columns=True)
for index in types_should_run:
df = DataFrame(np.random.randn(10, 2), columns=index(2))
with ensure_clean_path(self.path) as path:
df.to_hdf(path, 'df', format='table', data_columns=True)
result = pd.read_hdf(
path, 'df', where="index = [{0}]".format(df.index[0]))
assert(len(result))
test_pytables.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 23
收藏 0
点赞 0
评论 0
def test_read_hdf_errors(self):
df = DataFrame(np.random.rand(4, 5),
index=list('abcd'),
columns=list('ABCDE'))
with ensure_clean_path(self.path) as path:
self.assertRaises(IOError, read_hdf, path, 'key')
df.to_hdf(path, 'df')
store = HDFStore(path, mode='r')
store.close()
self.assertRaises(IOError, read_hdf, store, 'df')
with open(path, mode='r') as store:
self.assertRaises(NotImplementedError, read_hdf, store, 'df')
test_pytables.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 29
收藏 0
点赞 0
评论 0
def test_read_nokey(self):
df = DataFrame(np.random.rand(4, 5),
index=list('abcd'),
columns=list('ABCDE'))
with ensure_clean_path(self.path) as path:
df.to_hdf(path, 'df', mode='a')
reread = read_hdf(path)
assert_frame_equal(df, reread)
df.to_hdf(path, 'df2', mode='a')
self.assertRaises(ValueError, read_hdf, path)