python类HDFStore()的实例源码-面圈网

data_fetcher.py 文件源码项目：scikit-dataaccess 作者: MITHaystack 项目源码文件源码阅读 42 收藏 0 点赞 0 评论 0

def getStationMetadata():
        '''
        Retrieve metadata on groundwater wells

        @return pandas dataframe with groundwater well information
        '''

        data_file = DataFetcher.getDataLocation('groundwater')
        if data_file is None:
            print('Dataset not available')
            return None

        store = pd.HDFStore(data_file,'r')
        meta_data = store['meta_data']
        store.close()


        return meta_data

data_fetcher.py 文件源码项目：scikit-dataaccess 作者: MITHaystack 项目源码文件源码阅读 49 收藏 0 点赞 0 评论 0

def getAntennaLogs():
        '''
        Retrieve information about antenna changes

        @return dictionary of antenna changes
        '''
        store_location = data_util.getDataLocation('ngl_gps')
        store = pd.HDFStore(store_location, 'r')
        logs_df = store['ngl_steps']
        store.close()

        metadata = DataFetcher.getStationMetadata()

        logs_dict = OrderedDict()

        for station in metadata.index:
            offset_dates = logs_df[logs_df['Station']==station].index.unique()
            offset_dates = pd.Series(offset_dates)
            logs_dict[station] = offset_dates

        return logs_dict

genetic.py 文件源码项目：future-price-predictor 作者: htfy96 项目源码文件源码阅读 39 收藏 0 点赞 0 评论 0

def __init__(self, db_h5_file,
                 future_time = 20, lookback = 100, read_first_k_table = -1, normalize=True, two_class=False):
        super(DBGeneticReader, self).__init__()
        self._db = pd.HDFStore(db_h5_file)
        self._future_time = future_time
        self._lookback = lookback
        self._db_len = 0
        self._two_class = two_class

        self._tables = []
        for k in self._db:
            self._db_len += len(self._db[k]) - future_time - lookback
            t = self._db[k].iloc[:, 4:].astype('float32')
            t['AveragePrice'] = (t['AskPrice1'] + t['BidPrice1']) / 2
            if normalize:
                t = (t - t.mean()) / (t.std() + 1e-10)
            self._tables.append(t)
            if read_first_k_table != -1 and len(self._tables) == read_first_k_table:
                break

storage.py 文件源码项目：triage 作者: dssg 项目源码文件源码阅读 38 收藏 0 点赞 0 评论 0

def _read_hdf_from_buffer(self, buffer):
        with pandas.HDFStore(
                "data.h5",
                mode="r",
                driver="H5FD_CORE",
                driver_core_backing_store=0,
                driver_core_image=buffer.read()) as store:

            if len(store.keys()) > 1:
                raise Exception('Ambiguous matrix store. More than one dataframe in the hdf file.')

            try:
                return store["matrix"]

            except KeyError:
                print("The hdf file should contain one and only key, matrix.")
                return store[store.keys()[0]]

utils.py 文件源码项目：triage 作者: dssg 项目源码文件源码阅读 62 收藏 0 点赞 0 评论 0

def fake_metta(matrix_dict, metadata):
    """Stores matrix and metadata in a metta-data-like form

    Args:
    matrix_dict (dict) of form { columns: values }.
        Expects an entity_id to be present which it will use as the index
    metadata (dict). Any metadata that should be set

    Yields:
        tuple of filenames for matrix and metadata
    """
    matrix = pandas.DataFrame.from_dict(matrix_dict).set_index('entity_id')
    with tempfile.NamedTemporaryFile() as matrix_file:
        with tempfile.NamedTemporaryFile('w') as metadata_file:
            hdf = pandas.HDFStore(matrix_file.name)
            hdf.put('title', matrix, data_columns=True)
            matrix_file.seek(0)

            yaml.dump(metadata, metadata_file)
            metadata_file.seek(0)
            yield (matrix_file.name, metadata_file.name)

utils.py 文件源码项目：triage 作者: dssg 项目源码文件源码阅读 66 收藏 0 点赞 0 评论 0

def fake_metta(matrix_dict, metadata):
    """Stores matrix and metadata in a metta-data-like form

    Args:
    matrix_dict (dict) of form { columns: values }.
        Expects an entity_id to be present which it will use as the index
    metadata (dict). Any metadata that should be set

    Yields:
        tuple of filenames for matrix and metadata
    """
    matrix = pandas.DataFrame.from_dict(matrix_dict).set_index('entity_id')
    with tempfile.NamedTemporaryFile() as matrix_file:
        with tempfile.NamedTemporaryFile('w') as metadata_file:
            hdf = pandas.HDFStore(matrix_file.name)
            hdf.put('title', matrix, data_columns=True)
            matrix_file.seek(0)

            yaml.dump(metadata, metadata_file)
            metadata_file.seek(0)
            yield (matrix_file.name, metadata_file.name)

kagglegym.py 文件源码项目：Two-Sigma-Financial-Modeling-Challenge 作者: xiaofeiwen 项目源码文件源码阅读 81 收藏 0 点赞 0 评论 0

def __init__(self):
        with pd.HDFStore("../input/train.h5", "r") as hfdata:
            self.timestamp = 0
            fullset = hfdata.get("train")
            self.unique_timestamp = fullset["timestamp"].unique()
            # Get a list of unique timestamps
            # use the first half for training and
            # the second half for the test set
            n = len(self.unique_timestamp)
            i = int(n/2)
            timesplit = self.unique_timestamp[i]
            self.n = n
            self.unique_idx = i
            self.train = fullset[fullset.timestamp < timesplit]
            self.test = fullset[fullset.timestamp >= timesplit]

            # Needed to compute final score
            self.full = self.test.loc[:, ['timestamp', 'y']]
            self.full['y_hat'] = 0.0
            self.temp_test_y = None

utils.py 文件源码项目：Quantrade 作者: quant-trade 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def hdfone_filenames(folder: str, path_to: str) -> List[str]:
    filenames = []
    try:
        assert isinstance(folder, str), "folder isn't string: %s" % folder
        assert isinstance(path_to, str), "path_to isn't string: %s" % path_to

        if settings.DATA_TYPE == "hdfone":
            f = join(settings.DATA_PATH, "hdfone.hdfone")
            if isfile(f):
                with HDFStore(f) as hdf:
                    filenames = [f for f in hdf.keys() if folder in f]
                hdf.close()
        else:
            filenames = multi_filenames(path_to_history=path_to)
    except Exception as err:
        print(colored.red("hdfone_filenames: {}".format(err)))

    return filenames

minute_bars.py 文件源码项目：catalyst 作者: enigmampc 项目源码文件源码阅读 50 收藏 0 点赞 0 评论 0

def write(self, frames):
        """
        Write the frames to the target HDF5 file, using the format used by
        ``pd.Panel.to_hdf``

        Parameters
        ----------
        frames : iter[(int, DataFrame)] or dict[int -> DataFrame]
            An iterable or other mapping of sid to the corresponding OHLCV
            pricing data.
        """
        with HDFStore(self._path, 'w',
                      complevel=self._complevel, complib=self._complib) \
                as store:
            panel = pd.Panel.from_dict(dict(frames))
            panel.to_hdf(store, 'updates')
        with tables.open_file(self._path, mode='r+') as h5file:
            h5file.set_node_attr('/', 'version', 0)

project.py 文件源码项目：DGP 作者: DynamicGravitySystems 项目源码文件源码阅读 42 收藏 0 点赞 0 评论 0

def add_data(self, packet: DataPacket):
        """
        Import a DataFrame into the project
        :param packet: DataPacket custom class containing file path, dataframe, data type and flight association
        :return: Void
        """
        self.log.debug("Ingesting data and exporting to hdf5 store")

        file_uid = 'f' + uuid.uuid4().hex[1:]  # Fixes NaturalNameWarning by ensuring first char is letter ('f').

        with HDFStore(str(self.hdf_path)) as store:
            # Separate data into groups by data type (GPS & Gravity Data)
            # format: 'table' pytables format enables searching/appending, fixed is more performant.
            store.put('{}/{}'.format(packet.data_type, file_uid), packet.data, format='fixed', data_columns=True)
            # Store a reference to the original file path
            self.data_map[file_uid] = packet.path
        try:
            flight = self.flights[packet.flight.uid]
            if packet.data_type == 'gravity':
                flight.gravity = file_uid
            elif packet.data_type == 'gps':
                flight.gps = file_uid
        except KeyError:
            return False

minitrees.py 文件源码项目：hax 作者: XENON1T 项目源码文件源码阅读 46 收藏 0 点赞 0 评论 0

def save_cache_file(data, cache_file, **kwargs):
    """Save minitree dataframe + cut history to a cache file
    Any kwargs will be passed to pandas HDFStore. Defaults are:
        complib='blosc'
        complevel=9
    """
    kwargs.setdefault('complib', 'blosc')
    kwargs.setdefault('complevel', 9)
    dirname = os.path.dirname(cache_file)
    if dirname and not os.path.exists(dirname):
        os.makedirs(dirname)
    store = pd.HDFStore(cache_file, **kwargs)
    store.put('data', data)

    # Store the cuts history for the data
    store.get_storer('data').attrs.cut_history = cuts._get_history(data)
    store.close()

kagglegym.py 文件源码项目：two_sigma_financial_modeling 作者: WayneDW 项目源码文件源码阅读 44 收藏 0 点赞 0 评论 0

def __init__(self):
        with pd.HDFStore("../input/train.h5", "r") as hfdata:
            self.timestamp = 0
            fullset = hfdata.get("train")
            self.unique_timestamp = fullset["timestamp"].unique()
            # Get a list of unique timestamps
            # use the first half for training and
            # the second half for the test set
            n = len(self.unique_timestamp)
            i = int(n/2)
            timesplit = self.unique_timestamp[i]
            self.n = n
            self.unique_idx = i
            self.train = fullset[fullset.timestamp < timesplit]
            self.test = fullset[fullset.timestamp >= timesplit]

            self.y_test_full = self.test['y'] # Just in case the full labels are needed later
            self.y_pred_full = []
            self.temp_test_y = None
            self.ID_COL_NAME = 'id'
            self.SAMPLE_COL_NAME = 'sample'
            self.TARGET_COL_NAME = 'y'
            self.TIME_COL_NAME = 'timestamp'

pre_node_feed_fr2auto.py 文件源码项目：skp_edu_docker 作者: TensorMSA 项目源码文件源码阅读 40 收藏 0 点赞 0 评论 0

def _init_frame_node_parm_with_data(self):
        """
        init pamr s need to be calculated
        :return:s
        """
        try :
            store = pd.HDFStore(self.input_paths[0])
            chunk = store.select('table1',
                                 start=0,
                                 stop=100)

            for col_name in self.encode_col:
                if (self.encode_len.get(col_name) == None):
                    if (chunk[col_name].dtype in ['int', 'float']):
                        self.encode_len[col_name] = 1
                        self.input_size = self.input_size + 1
                    else:
                        self.encode_len[col_name] = self.word_vector_size
                        self.input_size = self.input_size + self.word_vector_size
                    self.encode_onehot[col_name] = OneHotEncoder(self.word_vector_size)
                    self.encode_dtype[col_name] = str(chunk[col_name].dtype)
        except Exception as e :
            raise Exception ("error on wcnn feed parm prepare : {0}".format(e))

pre_node_feed_fr2auto.py 文件源码项目：skp_edu_docker 作者: TensorMSA 项目源码文件源码阅读 39 收藏 0 点赞 0 评论 0

def _frame_parser(self, file_path, index):
        """
        parse nlp data
        :return:
        """
        try :
            store = pd.HDFStore(file_path)
            chunk = store.select('table1',
                                 start=index.start,
                                 stop=index.stop)
            input_vector = []
            count = index.stop - index.start

            for col_name in self.encode_col:
                if (chunk[col_name].dtype == 'O'):
                    input_vector.append(list(map(lambda x: self.encode_onehot[col_name].get_vector(x),
                                             chunk[col_name][0:count].tolist())))
                else :
                    input_vector.append(np.array(list(map(lambda x: [self._filter_nan(x)], chunk[col_name][0:count].tolist()))))
            return self._flat_data(input_vector, len(chunk[col_name][0:count].tolist()))
        except Exception as e :
            raise Exception (e)
        finally:
            store.close()

pre_node_feed_fr2attn.py 文件源码项目：skp_edu_docker 作者: TensorMSA 项目源码文件源码阅读 41 收藏 0 点赞 0 评论 0

def _init_frame_node_parm_with_data(self):
        """
        init pamr s need to be calculated
        :return:s
        """
        try :
            store = pd.HDFStore(self.input_paths[0])
            chunk = store.select('table1',
                                 start=0,
                                 stop=100)

            for col_name in self.encode_col:
                if (self.encode_len.get(col_name) == None):
                    if (chunk[col_name].dtype in ['int', 'float']):
                        self.encode_len[col_name] = 1
                        self.input_size = self.input_size + 1
                    else:
                        self.encode_len[col_name] = self.word_vector_size
                        self.input_size = self.input_size + self.word_vector_size
                    self.encode_onehot[col_name] = OneHotEncoder(self.word_vector_size)
                    self.encode_dtype[col_name] = str(chunk[col_name].dtype)
        except Exception as e :
            raise Exception ("error on wcnn feed parm prepare : {0}".format(e))

pre_node_feed_fr2attn.py 文件源码项目：skp_edu_docker 作者: TensorMSA 项目源码文件源码阅读 38 收藏 0 点赞 0 评论 0

def _frame_parser(self, file_path, index):
        """
        parse nlp data
        :return:
        """
        try :
            store = pd.HDFStore(file_path)
            chunk = store.select('table1',
                                 start=index.start,
                                 stop=index.stop)
            input_vector = []
            count = index.stop - index.start

            for col_name in self.encode_col:
                if (chunk[col_name].dtype == 'O'):
                    input_vector.append(list(map(lambda x: self.encode_onehot[col_name].get_vector(x),
                                             chunk[col_name][0:count].tolist())))
                else :
                    input_vector.append(np.array(list(map(lambda x: [self._filter_nan(x)], chunk[col_name][0:count].tolist()))))
            return self._flat_data(input_vector, len(chunk[col_name][0:count].tolist()))
        except Exception as e :
            raise Exception (e)
        finally:
            store.close()

pre_node_feed_fr2attn.py 文件源码项目：skp_edu_docker 作者: TensorMSA 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def _nlp_parser(self, file_path, index):
        """
        parse nlp data
        :return:
        """
        try :
            store = pd.HDFStore(file_path)
            chunk = store.select('table1',
                                 start=index.start,
                                 stop=index.stop)
            count = index.stop - index.start
            if (self.encode_col in chunk):
                encode = self.encode_pad(self._preprocess(chunk[self.encode_col].values)[0:count],
                                         max_len=self.encode_len)
                return self._word_embed_data(self.embed_type, encode)
            else:
                warnings.warn("not exists column names requested !!")
                return [['#'] * self.encode_len]
        except Exception as e :
            raise Exception (e)
        finally:
            store.close()

pre_node_feed_fr2wv.py 文件源码项目：skp_edu_docker 作者: TensorMSA 项目源码文件源码阅读 37 收藏 0 点赞 0 评论 0

def _convert_data_format(self, file_path, index):
        """

        :param obj:
        :param index:
        :return:
        """
        try :
            return_data = []
            store = pd.HDFStore(file_path)
            chunk = store.select('table1',
                               start=index.start,
                               stop=index.stop)

            for column in self.column_list :
                for line in self._preprocess(chunk[column].values)[index.start:index.stop] :
                    return_data = return_data + line
            return [return_data]
        except Exception as e :
            raise Exception (e)
        finally:
            store.close()

utils.py 文件源码项目：catwalk 作者: dssg 项目源码文件源码阅读 45 收藏 0 点赞 0 评论 0

def fake_metta(matrix_dict, metadata):
    """Stores matrix and metadata in a metta-data-like form

    Args:
    matrix_dict (dict) of form { columns: values }.
        Expects an entity_id to be present which it will use as the index
    metadata (dict). Any metadata that should be set

    Yields:
        tuple of filenames for matrix and metadata
    """
    matrix = pandas.DataFrame.from_dict(matrix_dict).set_index('entity_id')
    with tempfile.NamedTemporaryFile() as matrix_file:
        with tempfile.NamedTemporaryFile('w') as metadata_file:
            hdf = pandas.HDFStore(matrix_file.name)
            hdf.put('title', matrix, data_columns=True)
            matrix_file.seek(0)

            yaml.dump(metadata, metadata_file)
            metadata_file.seek(0)
            yield (matrix_file.name, metadata_file.name)

storage.py 文件源码项目：catwalk 作者: dssg 项目源码文件源码阅读 48 收藏 0 点赞 0 评论 0

def _read_hdf_from_buffer(self, buffer):
        with pandas.HDFStore(
                "data.h5",
                mode="r",
                driver="H5FD_CORE",
                driver_core_backing_store=0,
                driver_core_image=buffer.read()) as store:

            if len(store.keys()) > 1:
                raise Exception('Ambiguous matrix store. More than one dataframe in the hdf file.')

            try:
                return store["matrix"]

            except KeyError:
                print("The hdf file should contain one and only key, matrix.")
                return store[store.keys()[0]]

test_save_read.py 文件源码项目：paysage 作者: drckf 项目源码文件源码阅读 43 收藏 0 点赞 0 评论 0

def test_grbm_reload():
    vis_layer = layers.BernoulliLayer(num_vis)
    hid_layer = layers.GaussianLayer(num_hid)
    # create some extrinsics
    grbm = model.Model([vis_layer, hid_layer])
    with tempfile.NamedTemporaryFile() as file:
        # save the model
        store = pandas.HDFStore(file.name, mode='w')
        grbm.save(store)
        store.close()
        # reload
        store = pandas.HDFStore(file.name, mode='r')
        grbm_reload = model.Model.from_saved(store)
        store.close()
    # check the two models are consistent
    vis_data = vis_layer.random((num_samples, num_vis))
    data_state = model_utils.State.from_visible(vis_data, grbm)
    dropout_scale = model_utils.State.dropout_rescale(grbm)
    vis_orig = grbm.deterministic_iteration(1, data_state, dropout_scale).units[0]
    vis_reload = grbm_reload.deterministic_iteration(1, data_state, dropout_scale).units[0]
    assert be.allclose(vis_orig, vis_reload)

model.py 文件源码项目：paysage 作者: drckf 项目源码文件源码阅读 48 收藏 0 点赞 0 评论 0

def save(self, store: pandas.HDFStore) -> None:
        """
        Save a model to an open HDFStore.

        Notes:
            Performs an IO operation.

        Args:
            store (pandas.HDFStore)

        Returns:
            None

        """
        # save the config as an attribute
        config = self.get_config()
        store.put('model', pandas.DataFrame())
        store.get_storer('model').attrs.config = config
        # save the parameters
        for i in range(self.num_weights):
            key = os.path.join('weights', 'weights'+str(i))
            self.weights[i].save_params(store, key)
        for i in range(self.num_layers):
            key = os.path.join('layers', 'layers'+str(i))
            self.layers[i].save_params(store, key)

layer.py 文件源码项目：paysage 作者: drckf 项目源码文件源码阅读 57 收藏 0 点赞 0 评论 0

def save_params(self, store, key):
        """
        Save the parameters to a HDFStore.

        Notes:
            Performs an IO operation.

        Args:
            store (pandas.HDFStore): the writeable stream for the params.
            key (str): the path for the layer params.

        Returns:
            None

        """
        for i, ip in enumerate(self.params):
            df_params = pandas.DataFrame(be.to_numpy_array(ip))
            store.put(os.path.join(key, 'parameters', 'key'+str(i)), df_params)

weights.py 文件源码项目：paysage 作者: drckf 项目源码文件源码阅读 40 收藏 0 点赞 0 评论 0

def save_params(self, store, key):
        """
        Save the parameters to a HDFStore.

        Notes:
            Performs an IO operation.

        Args:
            store (pandas.HDFStore): the writeable stream for the params.
            key (str): the path for the layer params.

        Returns:
            None

        """
        for i, ip in enumerate(self.params):
            df_params = pandas.DataFrame(be.to_numpy_array(ip))
            store.put(os.path.join(key, 'parameters', 'key'+str(i)), df_params)

weights.py 文件源码项目：paysage 作者: drckf 项目源码文件源码阅读 47 收藏 0 点赞 0 评论 0

def load_params(self, store, key):
        """
        Load the parameters from an HDFStore.

        Notes:
            Performs an IO operation.

        Args:
            store (pandas.HDFStore): the readable stream for the params.
            key (str): the path for the layer params.

        Returns:
            None

        """
        params = []
        for i, ip in enumerate(self.params):
            params.append(be.float_tensor(
                store.get(os.path.join(key, 'parameters', 'key'+str(i))).as_matrix()
            ).squeeze()) # collapse trivial dimensions to a vector
        self.params = self.params.__class__(*params)

model.py 文件源码项目：rnnlab 作者: phueb 项目源码文件源码阅读 41 收藏 0 点赞 0 评论 0

def get_trajs_mat(self, cols, traj):
        if traj == 'avg_probe_pp':
            with pd.HDFStore(self.pp_traj_df_path, mode='r') as store:
                df_traj = store.select('pp_traj_df', columns=cols)
                trajs_mat = df_traj.values.transpose()
        elif traj == 'avg_probe_ba':
            with pd.HDFStore(self.ba_traj_df_path, mode='r') as store:
                df_traj = store.select('ba_traj_df', columns=cols)
                trajs_mat = df_traj.values.transpose()
        elif 'cat_task' in traj:
            with pd.HDFStore(self.cat_task_traj_df_path, mode='r') as store:
                columns = [traj.replace('cat_task_', '') + '_fold{}'.format(i) for i in cols]
                df_traj = store.select('cat_task_traj_df', columns=columns)
                trajs_mat = df_traj.values.transpose()
        elif 'syn_task' in traj:
            with pd.HDFStore(self.syn_task_traj_df_path, mode='r') as store:
                columns = [traj.replace('syn_task_', '') + '_fold{}'.format(i) for i in cols]
                df_traj = store.select('syn_task_traj_df', columns=columns)
                trajs_mat = df_traj.values.transpose()
        else:
            raise AttributeError('rnnlab: Invalid argument passed to "traj".')
        return trajs_mat

Criteo_all.py 文件源码项目：Ads-RecSys-Datasets 作者: Atomu2014 项目源码文件源码阅读 40 收藏 0 点赞 0 评论 0

def __iter__(self, gen_type='train', batch_size=None, shuffle_block=False, random_sample=False, split_fields=False,
                 on_disk=True, squeeze_output=False, **kwargs):
        gen_type = gen_type.lower()

        if on_disk:
            print('on disk...')

            for hdf_X, hdf_y in self._files_iter_(gen_type=gen_type, shuffle_block=shuffle_block):
                # num_of_lines = pd.HDFStore(hdf_y, mode='r').get_storer('fixed').shape[0]

                X_all = pd.read_hdf(hdf_X, mode='r').as_matrix()
                y_all = pd.read_hdf(hdf_y, mode='r').as_matrix()

                gen = self.generator(X_all, y_all, batch_size, shuffle=random_sample)
                for X, y in gen:
                    if split_fields:
                        X = np.split(X, self.max_length, axis=1)
                        for i in range(self.max_length):
                            X[i] -= self.feat_min[i]
                    if squeeze_output:
                        y = y.squeeze()
                    yield X, y
        else:
            print('not implemented')

calibration.py 文件源码项目：dragonboard_testbench 作者: cta-observatory 项目源码文件源码阅读 47 收藏 0 点赞 0 评论 0

def read_offsets(offsets_file):
    offsets = np.zeros(
            shape=(8, 2, 4096, 40),
            dtype='f4')

    def name_to_channel_gain_id(name):
        _, channel, gain = name.split('_')
        channel = int(channel)
        gain_id = {'high': 0, 'low': 1}[gain]
        return channel, gain_id

    with pd.HDFStore(offsets_file) as st:
        for name in st.keys():
            channel, gain_id = name_to_channel_gain_id(name)
            df = st[name]
            df.sort_values(["cell", "sample"], inplace=True)
            offsets[channel, gain_id] = df["median"].values.reshape(-1, 40)

    return offsets

data_fetcher.py 文件源码项目：scikit-dataaccess 作者: MITHaystack 项目源码文件源码阅读 41 收藏 0 点赞 0 评论 0

def cacheData(self, data_specification):
        '''
        Cache Kepler data locally

        @param data_specification: List of kepler IDs
        '''

        kid_list = data_specification

        data_location = DataFetcher.getDataLocation('kepler')

        if data_location == None:
            data_location = os.path.join(os.path.expanduser('~'),'.skdaccess','kepler')
            os.makedirs(data_location, exist_ok=True)
            data_location = os.path.join(data_location, 'kepler_data.h5')
            DataFetcher.setDataLocation('kepler', data_location)

        store = pd.HDFStore(data_location)

        missing_kid_list = []
        for kid in kid_list:
            if 'kid_' + kid not in store:
                missing_kid_list.append(kid)


        if len(missing_kid_list) > 0:
            print("Downloading data for " + str(len(missing_kid_list)) + " star(s)")
            missing_kid_data = self.downloadKeplerData(missing_kid_list)

            for kid,data in missing_kid_data.items():
                store.put('kid_' + kid, data)

        store.close()

data_fetcher.py 文件源码项目：scikit-dataaccess 作者: MITHaystack 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def output(self):
        ''' 
        Output kepler data wrapper

        @return DataWrapper
        '''
        kid_list = self.ap_paramList[0]()
        kid_list = [ str(kid).zfill(9) for kid in kid_list ]

        self.cacheData(kid_list)

        data_location = DataFetcher.getDataLocation('kepler')

        kid_data = dict()

        store = pd.HDFStore(data_location)

        for kid in kid_list:
            kid_data[kid] = store['kid_' + kid]
            # If downloaded using old skdaccess version
            # switch index
            if kid_data[kid].index.name == 'TIME':
                kid_data[kid]['TIME'] = kid_data[kid].index
                kid_data[kid].set_index('CADENCENO', inplace=True)


        store.close()                
        kid_data = OrderedDict(sorted(kid_data.items(), key=lambda t: t[0]))

        # If a list of quarters is specified, only select data in those quarters
        if self.quarter_list != None:        
            for kid in kid_list:
                kid_data[kid] = kid_data[kid][kid_data[kid]['QUARTER'].isin(self.quarter_list)]


        return TableWrapper(kid_data, default_columns = ['PDCSAP_FLUX'], default_error_columns = ['PDCSAP_FLUX_ERR'])