python类load()的实例源码

DataIO.py 文件源码 项目:kaggle 作者: RankingAI 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def LoadFromHdfFile(InputDir, mode = 'train'):

        if(mode == 'train'):
            data = pd.read_hdf(path_or_buf= '%s/train.hdf' % InputDir, key='train')
        elif(mode == 'valid'):
            data = pd.read_hdf(path_or_buf= '%s/valid.hdf' % InputDir, key='valid')
        else:
            data = pd.read_hdf(path_or_buf= '%s/test.hdf' % InputDir, key='test')

        return data

    ## class method, load data with pkl format
DataIO.py 文件源码 项目:kaggle 作者: RankingAI 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def LoadFromPklFile(InputDir):

        with open('%s/train.pkl' % InputDir, 'rb') as i_file:
            TrainData = pickle.load(i_file)
        i_file.close()

        with open('%s/test.pkl' % InputDir, 'rb') as i_file:
            TestData = pickle.load(i_file)
        i_file.close()

        return TrainData,TestData

    ## class method, load data with text format
connector.py 文件源码 项目:catalearn 作者: Catalearn 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def download_and_unzip_result(url, job_hash):
    r = requests.get(url, stream=True)
    status_check(r)
    total_size = int(r.headers.get('content-length', 0))
    with open('download.zip', 'wb') as f:
        pbar = tqdm(total=total_size, unit='B', unit_scale=True)

        chunk_size = 1024 * 32  # 32kb
        for data in r.iter_content(chunk_size):
            f.write(data)
            pbar.update(chunk_size)
        # again there might be a pbar issue here
        pbar.close()

    zip_content = open("download.zip", "rb").read()
    z = ZipFile(io.BytesIO(zip_content))
    z.extractall()
    remove('download.zip')

    result = None  # output of the script
    new_files = None  # names of new files created by the script

    pickle_path = path.abspath(path.join(job_hash, job_hash + '.pkl'))
    if path.isfile(pickle_path):
        with open(pickle_path, 'rb') as f:
            # Hack: a workaround for dill's pickling problem
            # import_all()
            result = dill.load(f)
            # unimport_all()
        remove(pickle_path)

    if path.isdir(job_hash):
        new_files = listdir(job_hash)
        for name in new_files:
            rename(path.join(job_hash, name), name)
        rmtree(job_hash)

    return result, new_files
simple.py 文件源码 项目:combine-DT-with-NN-in-RL 作者: Burning-Bear 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def load(path, num_cpu=16):
        with open(path, "rb") as f:
            model_data, act_params = dill.load(f)
        act = build_act(**act_params)
        sess = U.make_session(num_cpu=num_cpu)
        sess.__enter__()
        with tempfile.TemporaryDirectory() as td:
            arc_path = os.path.join(td, "packed.zip")
            with open(arc_path, "wb") as f:
                f.write(model_data)

            zipfile.ZipFile(arc_path, 'r', zipfile.ZIP_DEFLATED).extractall(td)
            U.load_state(os.path.join(td, "model"))

        return ActWrapper(act, act_params)
simple.py 文件源码 项目:combine-DT-with-NN-in-RL 作者: Burning-Bear 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def load(path, num_cpu=16):
        with open(path, "rb") as f:
            model_data, act_params = dill.load(f)
        act = build_act(**act_params)
        sess = U.make_session(num_cpu=num_cpu)
        sess.__enter__()
        with tempfile.TemporaryDirectory() as td:
            arc_path = os.path.join(td, "packed.zip")
            with open(arc_path, "wb") as f:
                f.write(model_data)

            zipfile.ZipFile(arc_path, 'r', zipfile.ZIP_DEFLATED).extractall(td)
            U.load_state(os.path.join(td, "model"))

        return ActWrapper(act, act_params)
user_logging_tests.py 文件源码 项目:auto_ml 作者: doordash 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def test_verify_features_does_not_work_by_default():
    df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset()
    ml_predictor = utils.train_basic_binary_classifier(df_titanic_train)

    file_name = ml_predictor.save(str(random.random()))

    with open(file_name, 'rb') as read_file:
        saved_ml_pipeline = dill.load(read_file)
    os.remove(file_name)
    try:
        keras_file_name = file_name[:-5] + '_keras_deep_learning_model.h5'
        os.remove(keras_file_name)
    except:
        pass


    with warnings.catch_warnings(record=True) as w:

        results = saved_ml_pipeline.named_steps['final_model'].verify_features(df_titanic_test)

        print('Here are the caught warnings:')
        print(w)

        assert len(w) == 1

        assert results == None
user_logging_tests.py 文件源码 项目:auto_ml 作者: doordash 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def test_verify_features_finds_no_missing_features_when_none_are_missing():
        np.random.seed(0)

        df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset()

        column_descriptions = {
            'survived': 'output'
            , 'embarked': 'categorical'
            , 'pclass': 'categorical'
            , 'sex': 'categorical'
        }


        ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions)
        ml_predictor.train(df_titanic_train, verify_features=True)

        file_name = ml_predictor.save(str(random.random()))

        with open(file_name, 'rb') as read_file:
            saved_ml_pipeline = dill.load(read_file)
        os.remove(file_name)

        missing_features = saved_ml_pipeline.named_steps['final_model'].verify_features(df_titanic_test)
        print('missing_features')
        print(missing_features)


        print("len(missing_features['prediction_not_training'])")
        print(len(missing_features['prediction_not_training']))
        print("len(missing_features['training_not_prediction'])")
        print(len(missing_features['training_not_prediction']))
        assert len(missing_features['prediction_not_training']) == 0
        assert len(missing_features['training_not_prediction']) == 0
utils_models.py 文件源码 项目:auto_ml 作者: doordash 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def load_ml_model(file_name):

    with open(file_name, 'rb') as read_file:
        base_pipeline = dill.load(read_file)

    if isinstance(base_pipeline, utils_categorical_ensembling.CategoricalEnsembler):
        for step in base_pipeline.transformation_pipeline.named_steps:
            pipeline_step = base_pipeline.transformation_pipeline.named_steps[step]

            try:
                if pipeline_step.get('model_name', 'reallylongnonsensicalstring')[:12] == 'DeepLearning':
                    pipeline_step.model = insert_deep_learning_model(pipeline_step, file_name)
            except AttributeError:
                pass

        for step in base_pipeline.trained_models:
            pipeline_step = base_pipeline.trained_models[step]

            try:
                if pipeline_step.get('model_name', 'reallylongnonsensicalstring')[:12] == 'DeepLearning':
                    pipeline_step.model = insert_deep_learning_model(pipeline_step, file_name)
            except AttributeError:
                pass

    else:

        for step in base_pipeline.named_steps:
            pipeline_step = base_pipeline.named_steps[step]
            try:
                if pipeline_step.get('model_name', 'reallylongnonsensicalstring')[:12] == 'DeepLearning':
                    pipeline_step.model = insert_deep_learning_model(pipeline_step, file_name)
            except AttributeError:
                pass

    return base_pipeline

# Keeping this here for legacy support
simple.py 文件源码 项目:rl-attack-detection 作者: yenchenlin 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def load(path, num_cpu=16):
        with open(path, "rb") as f:
            model_data, act_params = dill.load(f)
        act = deepq.build_act(**act_params)
        sess = U.make_session(num_cpu=num_cpu)
        sess.__enter__()
        with tempfile.TemporaryDirectory() as td:
            arc_path = os.path.join(td, "packed.zip")
            with open(arc_path, "wb") as f:
                f.write(model_data)

            zipfile.ZipFile(arc_path, 'r', zipfile.ZIP_DEFLATED).extractall(td)
            U.load_state(os.path.join(td, "model"))

        return ActWrapper(act, act_params)
core.py 文件源码 项目:BMSpy 作者: romeopatrick11 项目源码 文件源码 阅读 17 收藏 0 点赞 0 评论 0
def Load(file):
    """ Loads a model from specified file """
    with open(file,'rb') as file:
        model=dill.load(file)
        return model
main.py 文件源码 项目:hakkuframework 作者: 4shadoww 项目源码 文件源码 阅读 16 收藏 0 点赞 0 评论 0
def update_session(fname=None):
    import dill as pickle
    if fname is None:
        fname = conf.session
    try:
        s = pickle.load(gzip.open(fname,"rb"))
    except IOError:
        s = pickle.load(open(fname,"rb"))
    scapy_session = builtins.__dict__["scapy_session"]
    scapy_session.update(s)


################
##### Main #####
################
utils.py 文件源码 项目:hakkuframework 作者: 4shadoww 项目源码 文件源码 阅读 16 收藏 0 点赞 0 评论 0
def load_object(fname):
    import dill as pickle
    return pickle.load(gzip.open(fname,"rb"))
io.py 文件源码 项目:marseille 作者: vene 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def load_csr(f, return_y=False):
    npz = np.load(f)
    X = csr_matrix((npz['data'], npz['indices'], npz['indptr']),
                   shape=npz['shape'])

    if return_y:
        return X, npz['y']
    else:
        return X
io.py 文件源码 项目:marseille 作者: vene 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def load_results(key, args):
    fn = cache_fname(key, args)
    with open(fn, "rb") as f:
        return dill.load(f)
exp_baseline_linear.py 文件源码 项目:marseille 作者: vene 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def saga_score_struct_cache(*args):

    arghash = sha1(repr(("score_struct",) + args).encode('utf-8')).hexdigest()
    fn = "res/baseline_linear_{}.dill".format(arghash)

    try:
        with open(fn, 'rb') as f:
            out = dill.load(f)
        logging.info("Loaded cached version.")
    except FileNotFoundError:
        logging.info("Computing...")
        out = saga_score_struct(*args)
        with open(fn, 'wb') as f:
            dill.dump(out, f)

    return out
exp_linear.py 文件源码 项目:marseille 作者: vene 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def linear_cv_score(dataset, alpha, l1_ratio, constraints):

    fn = cache_fname("linear_cv_score", (dataset, alpha, l1_ratio,
                                         constraints))
    if os.path.exists(fn):
        logging.info("Loading {}".format(fn))
        with open(fn, "rb") as f:
            return dill.load(f)

    load, ids = get_dataset_loader(dataset, split="train")
    n_folds = 5 if dataset == 'ukp' else 3

    scores = []
    for k, (tr, val) in enumerate(KFold(n_folds).split(ids)):
        Y_marg, bl = saga_decision_function(dataset, k, alpha, alpha, l1_ratio)

        val_docs = list(load(ids[val]))
        Y_true = [doc.label for doc in val_docs]
        Y_pred = bl.fast_decode(Y_marg, val_docs, constraints)

        scores.append(bl._score(Y_true, Y_pred))

    with open(fn, "wb") as f:
        logging.info("Saving {}".format(fn))
        dill.dump(scores, f)
    return scores
exp_svmstruct.py 文件源码 项目:marseille 作者: vene 项目源码 文件源码 阅读 17 收藏 0 点赞 0 评论 0
def svmstruct_cv_score(dataset, C, class_weight, constraints,
                       compat_features, second_order_features):

    fn = cache_fname("svmstruct_cv_score", (dataset, C, class_weight,
                                            constraints, compat_features,
                                            second_order_features))

    if os.path.exists(fn):
        logging.info("Cached file already exists.")
        with open(fn, "rb") as f:
            return dill.load(f)

    load, ids = get_dataset_loader(dataset, split="train")

    n_folds = 5 if dataset == 'ukp' else 3

    # below are boolean logical ops
    grandparents = second_order_features and dataset == 'ukp'
    coparents = second_order_features
    siblings = second_order_features and dataset == 'cdcp'

    scores = []
    all_Y_pred = []

    for k, (tr, val) in enumerate(KFold(n_folds).split(ids)):
        train_docs = list(load(ids[tr]))
        val_docs = list(load(ids[val]))

        clf, Y_val, Y_pred = fit_predict(train_docs, val_docs, dataset, C,
                                         class_weight,
                                         constraints, compat_features,
                                         second_order_features, grandparents,
                                         coparents, siblings)
        all_Y_pred.extend(Y_pred)
        scores.append(clf.model._score(Y_val, Y_pred))

    with open(fn, "wb") as f:
        dill.dump((scores, all_Y_pred), f)

    return scores, all_Y_pred
computeengine.py 文件源码 项目:loman 作者: janusassetallocation 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def read_dill(file_):
        """
        Deserialize a computation from a file or file-like object

        :param file_: If string, writes to a file
        :type file_: File-like object, or string
        """
        if isinstance(file_, six.string_types):
            with open(file_, 'rb') as f:
                return dill.load(f)
        else:
            return dill.load(file_)
__init__.py 文件源码 项目:all2vec 作者: iheartradio 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def load(self, pkl, filepath):
        entity_type = pkl.get_entity_type(self._entity_type_id)
        self.__dict__ = entity_type.__dict__
        # initialize index
        self._ann_obj = AnnoyIndex(pkl.get_nfactor(), entity_type._metric)
        # mmap the file
        self._ann_obj.load(filepath)
__init__.py 文件源码 项目:all2vec 作者: iheartradio 项目源码 文件源码 阅读 15 收藏 0 点赞 0 评论 0
def load_entities(self, entities, file_getter):
        """Load underlying entities."""
        for k in entities:
            annoy_filepath = file_getter.get_file_path('{}.ann'.format(k))
            try:
                self._annoy_objects[k].load(self,
                                            annoy_filepath)
            except IOError as e:
                raise IOError(
                    "Error: cannot load file {0}, which was built "
                    "with the model. '{1}'".format(annoy_filepath, e)
                )


问题


面经


文章

微信
公众号

扫码关注公众号