python类read_hdf()的实例源码

utils.py 文件源码 项目:triage 作者: dssg 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def get_matrix_and_metadata(matrix_path, metadata_path):
    """Retrieve a matrix in hdf format and
    metadata about the matrix in yaml format

    Returns: (tuple) matrix, metadata
    """
    matrix = pandas.read_hdf(matrix_path)
    with open(metadata_path) as f:
        metadata = yaml.load(f)
    return matrix, metadata
sample_gen.py 文件源码 项目:keras-molecules 作者: maxhodak 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def read_smiles_data(filename):
    import pandas as pd
    h5f = pd.read_hdf(filename, 'table')
    data = h5f['structure'][:]
    # import gzip
    # data = [line.split()[0].strip() for line in gzip.open(filename) if line]
    return data
baseline.py 文件源码 项目:DREAM_invivo_tf_binding_prediction_challenge_baseline 作者: nboley 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def __init__(self,
                 labels_fname,
                 regions_fname=None,
                 max_n_rows=None,
                 load_cached=True):
        self.labels_fname = labels_fname
        self.regions_fname = regions_fname
        self.max_n_rows = max_n_rows
        self._hash = None
        self.load_cached = load_cached
        # extract the sample names from the header
        #assert labels_fname.endswith("labels.tsv.gz"), \
        #    "Unrecognized labels filename '%s'" % labels_fname
        self._init_header_data(labels_fname)
        # extract the factor from the filename
        self.factor = os.path.basename(labels_fname).split('.')[0]

        # if we want to use a cached version...
        if self.load_cached is True:
            try:
                print "Loading '%s'" % self.cached_fname
                self.h5store = h5py.File(self.cached_fname)
                self.data = pd.read_hdf(self.cached_fname, 'data')
            except KeyError:
                self.data = self._build_dataframe()
                self.data.to_hdf(self.cached_fname, 'data')
                print self.h5store
        else:
            self.data = self._build_dataframe()

        return
baseline.py 文件源码 项目:DREAM_invivo_tf_binding_prediction_challenge_baseline 作者: nboley 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def load_or_build_motif_scores(self, fasta_fname):
        try:
            self.motif_scores = pd.read_hdf(self.cached_fname, 'motif_scores')
            self.motif_scores.index = self.data.index
        except KeyError:
            self.motif_scores = self.build_motif_scores(fasta_fname)
            self.motif_scores.to_hdf(self.cached_fname, 'motif_scores')
        return self.motif_scores
baseline.py 文件源码 项目:DREAM_invivo_tf_binding_prediction_challenge_baseline 作者: nboley 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def load_or_build_dnase_fc_scores(self):
        try:
            self.dnase_fc_scores = pd.read_hdf(self.cached_fname, 'dnase_scores')
        except KeyError:
            self.dnase_fc_scores = self.build_dnase_fc_scores()
            self.dnase_fc_scores.to_hdf(self.cached_fname, 'dnase_scores')
        except IOError:
            self.dnase_fc_scores = self.build_dnase_fc_scores()            
        return self.dnase_fc_scores
minute_bars.py 文件源码 项目:catalyst 作者: enigmampc 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def __init__(self, path):
        self._panel = pd.read_hdf(path)
hdf5_to_elastic.py 文件源码 项目:netwars 作者: i008 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def main(batch_size=10000):
    posts_df = pd.read_hdf('nw_posts.hdf5', 'posts')
    index_posts_in_elastic(posts_df, batch_size=batch_size)
utils.py 文件源码 项目:Tensorflow-Softmax-NER-RNNLM 作者: queue-han 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def load_wv_pandas(fname):
    return pd.read_hdf(fname, 'data')
IQData.py 文件源码 项目:PythonTrading 作者: F2011B 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def get_availableExchanges():
    SymbolsDF = pd.read_hdf(Constants.InputFolder + 'Symbols.hdf', 'Symbols')
    return SymbolsDF.EXCHANGE.drop_duplicates().values
IQData.py 文件源码 项目:PythonTrading 作者: F2011B 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def get_availableSymbols(SymbolFilter=None):
    SymbolsDF = pd.read_hdf(Constants.InputFolder+'Symbols.hdf', 'Symbols')

    if SymbolFilter == None :
        DFNew = SymbolsDF.loc[lambda DF: DF.EXCHANGE == 'NYSE', :]
        return DFNew.loc[DFNew.SYMBOL.str.match('[A-Z]{1,4}$'), :].SYMBOL.values

    if not ('Exchange' in SymbolFilter.keys()):
        DFNew = SymbolsDF.loc[lambda DF: DF.EXCHANGE == 'NYSE', :]
        return DFNew.loc[DFNew.SYMBOL.str.match('[A-Z]{1,4}$'), :].SYMBOL.values

    DFNew = SymbolsDF.loc[lambda DF: DF.EXCHANGE == SymbolFilter['Exchange'], :]
    return DFNew.loc[DFNew.SYMBOL.str.match('[A-Z]{1,4}$'), :].SYMBOL.values
Oanda.py 文件源码 项目:PythonTrading 作者: F2011B 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def get_availableSymbols(SymbolFilter=None):
    DF=pd.read_hdf(Constants.InputFolder+'Symbols.hdf', 'OANDA')
    return DF.instrument.values
TaylorCycle.py 文件源码 项目:PythonTrading 作者: F2011B 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def main():
    DF = pd.read_hdf('/home/lc1bfrbl/Database/Oanda.hdf', 'WTICO_USD_H1')
    TTT=CalcTaylorCycle(DF)
    Index = (TTT.index.year == 2017) & (TTT.index.month == 6)
    TTT[Index].MO.plot()
    TTT[Index].MLo.plot()
    TTT[Index].MHi.plot()
    TTT[Index].High.plot()
    TTT[Index].Low.plot()
save_data.py 文件源码 项目:PyTrader 作者: didw 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def save_table(self, code, date):
        TR_REQ_TIME_INTERVAL = 4
        time.sleep(TR_REQ_TIME_INTERVAL)
        data_81 = self.wrapper.get_data_opt10081(code, date)
        time.sleep(TR_REQ_TIME_INTERVAL)
        data_86 = self.wrapper.get_data_opt10086(code, date)
        col_86 = ['???', '???', '??(??)', '???', '??', '??', '????', '???', '????',
                  '???', '????', '????', '????', '?????', '?????', '?????', '?????']
        data = pd.concat([data_81, data_86.loc[:, col_86]], axis=1)
        #con = sqlite3.connect("../data/stock.db")
        try:
            data = data.loc[data.index > int(self.kiwoom.start_date.strftime("%Y%m%d"))]
            #orig_data = pd.read_sql("SELECT * FROM '%s'" % code, con, index_col='??').sort_index()
            orig_data = pd.read_hdf("../data/hdf/%s.hdf" % code, 'day').sort_index()
            end_date = orig_data.index[-1]
            orig_data = orig_data.loc[orig_data.index < end_date]
            data = data.loc[data.index >= end_date]
            data = pd.concat([orig_data, data], axis=0)
        except (FileNotFoundError, IndexError) as e:
            print(e)
            pass
        finally:
            data.index.name = '??'
            if len(data) != 0:
                #data.to_sql(code, con, if_exists='replace')
                data.to_hdf('../data/hdf/%s.hdf'%code, 'day', mode='w')
test.py 文件源码 项目:PyTrader 作者: didw 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def read_h5():
    code_list = glob.glob('../data/stock/*.h5')
    for code in code_list[:10]:
        data = pd.read_hdf(code, 'table').sort_index()
        data = data.loc[data.index >= str(20160101)]
        data = data.loc[data.index <= str(20160630)]
        print(data.head())
pandatools.py 文件源码 项目:zeex 作者: zbarge 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def superReadFile(filepath,**kwargs):
    """ 
    Uses pandas.read_excel (on excel files) and returns a dataframe of the first sheet (unless sheet is specified in kwargs)
    Uses superReadText (on .txt,.tsv, or .csv files) and returns a dataframe of the data.
    One function to read almost all types of data files.    
    """
    if isinstance(filepath, pd.DataFrame):
        return filepath

    ext = os.path.splitext(filepath)[1].lower()

    if ext in ['.xlsx', '.xls']:
        kwargs.pop('dtype', None)
        return pd.read_excel(filepath,**kwargs)

    elif ext in ['.txt','.tsv','.csv']:
        return superReadText(filepath, **kwargs)

    elif ext in ['.gz', '.bz2', '.zip', 'xz']:
        return superReadCSV(filepath, **kwargs)

    elif ext in ['.h5']:
        return pd.read_hdf(filepath)

    else:
        raise NotImplementedError("Unable to read '{}' files".format(ext))
test_pytables.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 37 收藏 0 点赞 0 评论 0
def test_conv_read_write(self):
        path = create_tempfile(self.path)
        try:
            def roundtrip(key, obj, **kwargs):
                obj.to_hdf(path, key, **kwargs)
                return read_hdf(path, key)

            o = tm.makeTimeSeries()
            assert_series_equal(o, roundtrip('series', o))

            o = tm.makeStringSeries()
            assert_series_equal(o, roundtrip('string_series', o))

            o = tm.makeDataFrame()
            assert_frame_equal(o, roundtrip('frame', o))

            o = tm.makePanel()
            assert_panel_equal(o, roundtrip('panel', o))

            # table
            df = DataFrame(dict(A=lrange(5), B=lrange(5)))
            df.to_hdf(path, 'table', append=True)
            result = read_hdf(path, 'table', where=['index>2'])
            assert_frame_equal(df[df.index > 2], result)

        finally:
            safe_remove(path)
test_pytables.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def test_round_trip_equals(self):
        # GH 9330
        df = DataFrame({"B": [1, 2], "A": ["x", "y"]})

        with ensure_clean_path(self.path) as path:
            df.to_hdf(path, 'df', format='table')
            other = read_hdf(path, 'df')
            tm.assert_frame_equal(df, other)
            self.assertTrue(df.equals(other))
            self.assertTrue(other.equals(df))
test_pytables.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def test_to_hdf_with_object_column_names(self):
        # GH9057
        # Writing HDF5 table format should only work for string-like
        # column types

        types_should_fail = [tm.makeIntIndex, tm.makeFloatIndex,
                             tm.makeDateIndex, tm.makeTimedeltaIndex,
                             tm.makePeriodIndex]
        types_should_run = [tm.makeStringIndex, tm.makeCategoricalIndex]

        if compat.PY3:
            types_should_run.append(tm.makeUnicodeIndex)
        else:
            types_should_fail.append(tm.makeUnicodeIndex)

        for index in types_should_fail:
            df = DataFrame(np.random.randn(10, 2), columns=index(2))
            with ensure_clean_path(self.path) as path:
                with self.assertRaises(
                        ValueError, msg=("cannot have non-object label "
                                         "DataIndexableCol")):
                    df.to_hdf(path, 'df', format='table', data_columns=True)

        for index in types_should_run:
            df = DataFrame(np.random.randn(10, 2), columns=index(2))
            with ensure_clean_path(self.path) as path:
                df.to_hdf(path, 'df', format='table', data_columns=True)
                result = pd.read_hdf(
                    path, 'df', where="index = [{0}]".format(df.index[0]))
                assert(len(result))
test_pytables.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def test_read_hdf_errors(self):
        df = DataFrame(np.random.rand(4, 5),
                       index=list('abcd'),
                       columns=list('ABCDE'))

        with ensure_clean_path(self.path) as path:
            self.assertRaises(IOError, read_hdf, path, 'key')
            df.to_hdf(path, 'df')
            store = HDFStore(path, mode='r')
            store.close()
            self.assertRaises(IOError, read_hdf, store, 'df')
            with open(path, mode='r') as store:
                self.assertRaises(NotImplementedError, read_hdf, store, 'df')
test_pytables.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def test_read_nokey(self):
        df = DataFrame(np.random.rand(4, 5),
                       index=list('abcd'),
                       columns=list('ABCDE'))
        with ensure_clean_path(self.path) as path:
            df.to_hdf(path, 'df', mode='a')
            reread = read_hdf(path)
            assert_frame_equal(df, reread)
            df.to_hdf(path, 'df2', mode='a')
            self.assertRaises(ValueError, read_hdf, path)


问题


面经


文章

微信
公众号

扫码关注公众号