python类load()的实例源码

classify.py 文件源码 项目:ISM2017 作者: ybayle 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def test_models(models_dir, test_dir, out_dir):
    models_dir = utils.abs_path_dir(models_dir) + "/"
    test_dir = utils.abs_path_dir(test_dir) + "/"
    utils.create_dir(out_dir)
    test_files = os.listdir(test_dir)
    models = os.listdir(models_dir)
    for model in models:
        utils.print_success(model)
        pred_dir = out_dir + model + "/"
        utils.create_dir(pred_dir)
        clf = joblib.load(models_dir + model + "/" + model + ".pkl")
        for index, test_file in enumerate(test_files):
            print(str(index) + "\t" + test_file)
            sys.stdout.write("\033[F")
            sys.stdout.write("\033[K")
            test_features = read_test_file(test_dir + test_file)
            predictions = clf.predict_proba(test_features)
            with open(pred_dir + test_file, "w") as filep:
                for pred in predictions:
                    filep.write(str(pred[0]) + "\n")
        sys.stdout.write("\033[K")
data_stream_recorder.py 文件源码 项目:autolab_core 作者: BerkeleyAutomation 项目源码 文件源码 阅读 28 收藏 0 点赞 0 评论 0
def _caches_to_file(cache_path, start, end, name, cb, concat):
    start_time = time()
    if concat:
        all_data = []
        for i in range(start, end):
            data = load(os.path.join(cache_path, "{0}.jb".format(i)))
            all_data.extend(data)
        dump(all_data, name, 3)
    else:
        target_path = os.path.join(cache_path, name[:-3])
        if not os.path.exists(target_path):
            os.makedirs(target_path)
        for i in range(start, end):
            src_file_path = os.path.join(cache_path, "{0}.jb".format(i))

            basename = os.path.basename(src_file_path)
            target_file_path = os.path.join(target_path, basename)
            shutil.move(src_file_path, target_file_path)

        finished_flag = os.path.join(target_path, '.finished')
        with open(finished_flag, 'a'):
            os.utime(finished_flag, None)

    logging.debug("Finished saving data to {0}. Took {1}s".format(name, time()-start_time))
    cb()
utils.py 文件源码 项目:Learning-to-navigate-without-a-map 作者: ToniRV 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def create_ds_from_splits(mat_file_prefix, db, splits):
    """Create datasets from splits of data.

    This function is created because of splits from larger datasets
    """
    for batch_idx in xrange(1, splits+1):
        # load data
        temp_data = load_mat_data(mat_file_prefix+str(batch_idx)+".mat")
        print ("[MESSAGE] Loaded %d-th split" % batch_idx)

        group_name = "grid_data_split_"+str(batch_idx)

        # save data within splits
        for key in data_dict:
            add_h5_group(group_name, db)
            add_h5_ds(temp_data[key], key, db, group_name)
            print ("[MESSAGE] %s: Saved %s" % (group_name, key))
plt_results1D.py 文件源码 项目:snn4hrl 作者: florensacc 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def plot_all_policy_at0(path_experiment,color,num_iter=100,fig_dir=None):
    mean_at_0 = []
    var_at_0 = []
    for itr in range(num_iter):
        data_bimodal_1d = joblib.load(os.path.join(path_experiment,'itr_{}.pkl'.format(itr)))
        poli = data_bimodal_1d['policy']
        action_at_0 = poli.get_action(np.array((0,)))
        mean_at_0.append(action_at_0[1]['mean'])
        var_at_0.append(action_at_0[1]['log_std'])
        # print "sampled action in iter {}: {}. Reward should be: {}".format(itr, action_at_0[0], reward(action_at_0[0]))
    itr = list(range(num_iter))
    plt.plot(itr,mean_at_0, color=color, label = 'mean at 0')
    plt.plot(itr, var_at_0, color=color * 0.7, label = 'logstd at 0')
    plt.title('How the policy variates accross iterations')
    plt.xlabel('iteration')
    plt.ylabel('mean and variance at 0')
    plt.legend(loc=3)
    if fig_dir:
        plt.savefig(os.path.join(fig_dir,'policy_progress'))
    else:
        print("No directory for saving plots")

## estimate by MC the policy at 0!
plt_results2D.py 文件源码 项目:snn4hrl 作者: florensacc 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def plot_all_policy_at0(path_experiment, color, num_iter=100, fig_dir=None):
    mean_at_0 = []
    var_at_0 = []
    for itr in range(num_iter):
        data_bimodal_1d = joblib.load(os.path.join(path_experiment, 'itr_{}.pkl'.format(itr)))
        poli = data_bimodal_1d['policy']
        action_at_0 = poli.get_action(np.array((0,)))
        mean_at_0.append(action_at_0[1]['mean'])
        var_at_0.append(action_at_0[1]['log_std'])
    itr = list(range(num_iter))
    plt.plot(itr, mean_at_0, color=color, label='mean at 0')
    plt.plot(itr, var_at_0, color=color * 0.7, label='logstd at 0')
    plt.title('How the policy variates accross iterations')
    plt.xlabel('iteration')
    plt.ylabel('mean and variance at 0')
    plt.legend(loc=3)
    if fig_dir:
        plt.savefig(os.path.join(fig_dir, 'policy_at_0'))
    else:
        print("No directory for saving plots")


## plot for all the experiments
train.py 文件源码 项目:korean_restaurant_reservation 作者: JudeLee19 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def __init__(self):

        et = EntityTracker()
        self.bow_enc = BoW_encoder()
        self.emb = UtteranceEmbed()
        at = ActionTracker(et)

        self.dataset, dialog_indices = Data(et, at).trainset

        train_indices = joblib.load('data/train_test_list/train_indices_759')
        test_indices = joblib.load('data/train_test_list/test_indices_759_949')

        self.dialog_indices_tr = train_indices
        self.dialog_indices_dev = test_indices

        obs_size = self.emb.dim + self.bow_enc.vocab_size + et.num_features
        self.action_templates = at.get_action_templates()
        action_size = at.action_size
        nb_hidden = 128

        self.net = LSTM_net(obs_size=obs_size,
                       action_size=action_size,
                       nb_hidden=nb_hidden)
entityRecognition.py 文件源码 项目:scikitcrf_NER 作者: ManikandanThangavelu 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def train(filePath):
    try:
        if not filePath.lower().endswith('json'):
            return {'success':False,'message':'Training file should be in json format'}
        with open(filePath) as file:
            ent_data = json.load(file)
        dataset = [jsonToCrf(q, nlp) for q in ent_data['entity_examples']]
        X_train = [sent2features(s) for s in dataset]
        y_train = [sent2labels(s) for s in dataset]
        crf = sklearn_crfsuite.CRF(
        algorithm='lbfgs', 
        c1=0.1, 
        c2=0.1, 
        max_iterations=100, 
        all_possible_transitions=True
        )
        crf.fit(X_train, y_train)
        if(not os.path.exists("crfModel")):
            os.mkdir("crfModel")
        if(os.path.isfile("crfModel/classifier.pkl")):
            os.remove("crfModel/classifier.pkl")
        joblib.dump(crf,"crfModel/classifier.pkl")
        return {'success':True,'message':'Model Trained Successfully'}
    except Exception as ex:
        return {'success':False,'message':'Error while Training the model - '+str(ex)}
entityRecognition.py 文件源码 项目:scikitcrf_NER 作者: ManikandanThangavelu 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def predict(utterance):
    try:
        tagged = []
        finallist = []
        parsed = nlp(utterance)
        for i in range(len(parsed)):
            tagged.append((str(parsed[i]),parsed[i].tag_))
        finallist.append(tagged)
        test = [sent2features(s) for s in finallist]
        if(os.path.isfile("crfModel/classifier.pkl")):
            crf = joblib.load("crfModel/classifier.pkl")
        else:
            return {'success':False,'message':'Please Train the model first'}
        predicted = crf.predict(test)
        entityList = extractEntities(predicted[0],tagged)
        return {'success':True,'entitiesPredicted':entityList}
    except Exception as ex:
        return {'success':False,'message':'Error while pediction - '+str(ex)}
agent.py 文件源码 项目:deer 作者: VinF 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def setNetwork(self, fname, nEpoch=-1):
        """ Set values into the network

        Parameters
        -----------
        fname : string
            Name of the file where the values are
        nEpoch : int
            Epoch number (Optional)
        """

        basename = "nnets/" + fname

        if (nEpoch>=0):
            all_params = joblib.load(basename + ".epoch={}".format(nEpoch))
        else:
            all_params = joblib.load(basename)

        self._network.setAllParams(all_params)
document_scores.py 文件源码 项目:word2vec_pipeline 作者: NIHOPA 项目源码 文件源码 阅读 24 收藏 0 点赞 0 评论 0
def __init__(self, *args, **kwargs):
        super(score_simple, self).__init__(*args, **kwargs)

        f_db = os.path.join(
            kwargs['output_data_directory'],
            kwargs['term_frequency']['f_db']
        )
        if not os.path.exists(f_db):
            msg = "{} not computed yet, needed for TF methods!"
            raise ValueError(msg.format(f_db))

        score_config = simple_config.load()["score"]
        f_csv = os.path.join(
            score_config["output_data_directory"],
            score_config["term_document_frequency"]["f_db"],
        )
        IDF = pd.read_csv(f_csv)
        IDF = dict(zip(IDF["word"].values, IDF["count"].values))
        self.corpus_N = IDF.pop("__pipeline_document_counter")

        # Compute the IDF
        for key in IDF:
            IDF[key] = np.log(float(self.corpus_N) / (IDF[key] + 1))
        self.IDF = IDF
ddpg.py 文件源码 项目:rllabplusplus 作者: shaneshixiang 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def restore(self, checkpoint_dir=None):
        if checkpoint_dir is None: checkpoint_dir = logger.get_snapshot_dir()
        checkpoint_file = os.path.join(checkpoint_dir, 'params.chk')
        if os.path.isfile(checkpoint_file + '.meta'):
            sess = tf.get_default_session()
            saver = tf.train.Saver()
            saver.restore(sess, checkpoint_file)

            tabular_chk_file = os.path.join(checkpoint_dir, 'progress.csv.chk')
            if os.path.isfile(tabular_chk_file):
                tabular_file = os.path.join(checkpoint_dir, 'progress.csv')
                logger.remove_tabular_output(tabular_file)
                shutil.copy(tabular_chk_file, tabular_file)
                logger.add_tabular_output(tabular_file)

            pool_file = os.path.join(checkpoint_dir, 'pool.chk')
            if self.save_format == 'pickle':
                pickle_load(pool_file)
            elif self.save_format == 'joblib':
                self.pool = joblib.load(pool_file)
            else: raise NotImplementedError

            logger.log('Restored from checkpoint %s'%checkpoint_file)
        else:
            logger.log('No checkpoint %s'%checkpoint_file)
train.py 文件源码 项目:CIKM2017 作者: heliarmk 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def read_data_from_pkl(datafile):
    """
    read file in joblib.dump pkl
    :param datafile: filename of pkl
    :return: 
    """
    datas = joblib.load(datafile)
    for i in range(10):
        datas = np.random.permutation(datas)
    inputs, labels = [], []
    for data in datas:
        inputs.append(data["input"])
        labels.append(data["label"])

    inputs = np.array(inputs).reshape(-1, 15, 101, 101, 3).astype(np.float32)
    inputs -= np.mean(inputs, axis=(2, 3), keepdims=True)
    inputs /= np.std(inputs, axis=(2, 3), keepdims=True)
    labels = np.array(labels).reshape(-1, 1).astype(np.float32)

    return inputs, labels
senti_models.py 文件源码 项目:senti 作者: stevenxxiu 项目源码 文件源码 阅读 36 收藏 0 点赞 0 评论 0
def _fit_embedding_word(self, embedding_type, construct_docs, tokenize_, d=None):
        if embedding_type == 'google':
            embeddings_ = joblib.load('data/google/GoogleNews-vectors-negative300.pickle')
            embeddings_ = SimpleNamespace(X=embeddings_.syn0, vocab={w: v.index for w, v in embeddings_.vocab.items()})
        elif embedding_type == 'twitter':
            estimator = Pipeline([
                ('tokenize', MapCorporas(tokenize_)),
                ('word2vec', MergeSliceCorporas(CachedFitTransform(Word2Vec(
                    sg=1, size=d, window=10, hs=0, negative=5, sample=1e-3, min_count=1, iter=20, workers=16
                ), self.memory))),
            ]).fit([self.train_docs, self.unsup_docs[:10**6], self.val_docs, self.test_docs])
            embeddings_ = estimator.named_steps['word2vec'].estimator
            embeddings_ = SimpleNamespace(X=embeddings_.syn0, vocab={w: v.index for w, v in embeddings_.vocab.items()})
        else:
            embeddings_ = SimpleNamespace(X=np.empty((0, d)), vocab={})
        estimator = Pipeline([
            ('tokenize', MapCorporas(tokenize_)),
            # 0.25 is chosen so the unknown vectors have approximately the same variance as google pre-trained ones
            ('embeddings', MapCorporas(Embeddings(
                embeddings_, rand=lambda shape: get_rng().uniform(-0.25, 0.25, shape).astype('float32'),
                include_zero=True
            ))),
        ])
        estimator.fit(construct_docs)
        return estimator.named_steps['embeddings'].estimator
cnn_embed.py 文件源码 项目:dstc6_dialogue_breakdown_task 作者: JudeLee19 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def __init__(self, num_rnn_hidden, num_classes, utter_embed,
                 sequence_length, filter_sizes, num_filters, config, l2_reg_lambda=0.0):

        self.num_hidden = num_rnn_hidden
        self.num_classes = num_classes
        self.utter_embed = utter_embed
        self.logger = config.logger

        self.sequence_length = sequence_length
        self.embedding_size = utter_embed.get_vector_size()
        self.filter_sizes = filter_sizes
        self.num_filters = num_filters
        self.l2_reg_lambda = l2_reg_lambda

        self.config = config

        self.cate_mapping_dict = joblib.load('./dbdc3/data/cate_mapping_dict')
utils.py 文件源码 项目:open-database 作者: mitaffinity 项目源码 文件源码 阅读 21 收藏 0 点赞 0 评论 0
def read(self, unpickler):
        """Reconstruct the array."""
        filename = os.path.join(unpickler._dirname, self.filename)
        # Load the array from the disk
        # use getattr instead of self.allow_mmap to ensure backward compat
        # with NDArrayWrapper instances pickled with joblib < 0.9.0
        allow_mmap = getattr(self, 'allow_mmap', True)
        memmap_kwargs = ({} if not allow_mmap
                         else {'mmap_mode': unpickler.mmap_mode})
        array = unpickler.np.load(filename, **memmap_kwargs)
        # Reconstruct subclasses. This does not work with old
        # versions of numpy
        if (hasattr(array, '__array_prepare__') and
            self.subclass not in (unpickler.np.ndarray,
                                  unpickler.np.memmap)):
            # We need to reconstruct another subclass
            new_array = unpickler.np.core.multiarray._reconstruct(
                self.subclass, (0,), 'b')
            return new_array.__array_prepare__(array)
        else:
            return array
data_helpers.py 文件源码 项目:dnn_page_vectors 作者: ankit-cliqz 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def get_vocab_index_embedding_weights(self, embedding_dim, embedding_weights_masking, load_embeddings_pickled=False, load_vocab_pickled=False):
        embedding_weights = []
        if embedding_weights_masking==True:
            masking_value = "masked"  # For masked embedding weights leave it blank "", else for masked use "_non_masked"
        else:
            masking_value = "non_masked"  # For masked embedding weights leave it blank "", else for masked use "_non_masked"

        # Load data from files
        if load_vocab_pickled:
            vocab_index_dict = joblib.load(self.conf.vocab_index_file.format(masking_value))
            vocab_set = joblib.load(self.conf.vocab_set_file.format(masking_value))

        else:
            vocab_set, vocab_index_dict = self.generate_vocabulary_set(masking=embedding_weights_masking)


        if self.conf.feature_level == "word":
            embedding_weights = self.load_word_embeddings_compact(embedding_dim, vocab_set,
                                                                           masking=embedding_weights_masking,
                                                                           use_pickled=load_embeddings_pickled)
        return embedding_weights, vocab_index_dict
fixtures.py 文件源码 项目:cesium_web 作者: cesium-ml 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def add_file(prediction, create, value, *args, **kwargs):
        train_featureset = prediction.model.featureset
        fset_data, data = featurize.load_featureset(train_featureset.file_uri)
        if 'class' in prediction.dataset.name or 'regr' in prediction.dataset.name:
            labels = data['labels']
        else:
            labels = []
        model_data = joblib.load(prediction.model.file_uri)
        if hasattr(model_data, 'best_estimator_'):
            model_data = model_data.best_estimator_
        preds = model_data.predict(fset_data)
        pred_probs = (pd.DataFrame(model_data.predict_proba(fset_data),
                                   index=fset_data.index.astype(str),
                                   columns=model_data.classes_)
                      if hasattr(model_data, 'predict_proba') else [])
        all_classes = model_data.classes_ if hasattr(model_data, 'classes_') else []
        pred_path = pjoin(TMP_DIR, '{}.npz'.format(str(uuid.uuid4())))
        featurize.save_featureset(fset_data, pred_path, labels=labels,
                                  preds=preds, pred_probs=pred_probs)
        prediction.file_uri = pred_path
        DBSession().commit()
run_model.py 文件源码 项目:Mmodel 作者: gxrtbf 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def train_model_group(model_info,function='logistic'):

    model = joblib.load(getTheFile('models/{}.m').format(function))
    base_info = model_config.model_dim
    user_id = model_info['user_id']
    del model_info['user_id']

    for key,value in base_info.items():
        if key not in model_info.columns:
            model_info[key] = value

    #model_info = transform_data.transform_with_woe(model_info)
    model_info = model_info[sorted(model_info.columns)]
    x = np.matrix(model_info)
    yt = model.predict(x)
    ytp = model.predict_proba(x)
    return user_id,yt,ytp
utils.py 文件源码 项目:vin-keras 作者: neka-nat 项目源码 文件源码 阅读 23 收藏 0 点赞 0 评论 0
def process_map_data(path):
    data = joblib.load(path)

    im_data = data['im']
    value_data = data['value']
    state_data = data['state']
    label_data = np.array([np.eye(1, 8, l)[0] for l in data['label']])

    num = im_data.shape[0]
    num_train = num - num / 5

    im_train = np.concatenate((np.expand_dims(im_data[:num_train], 1),
                               np.expand_dims(value_data[:num_train], 1)),axis=1).astype(dtype=np.float32)
    state_train = state_data[:num_train]
    label_train = label_data[:num_train]

    im_test = np.concatenate((np.expand_dims(im_data[num_train:], 1),
                              np.expand_dims(value_data[num_train:], 1)),axis=1).astype(dtype=np.float32)
    state_test = state_data[num_train:]
    label_test = label_data[num_train:]

    return (im_train, state_train, label_train), (im_test, state_test, label_test)
previous_runs.py 文件源码 项目:guacml 作者: guacml 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def load_previous_runs(self):
        if not os.path.isfile(self.prev_runs_file_):
            self.found_matching_run = False
            return

        with open(self.prev_runs_file_, 'r') as file:
            self.all_prev_runs_ = yaml.load(file)
            for run in self.all_prev_runs_:
                self.max_data_version_ = max(self.max_data_version_, run['input_data_version'])
                if run['input_data_hash'] == self.data_.df_hash:
                    self.max_config_version_ = max(self.max_config_version_, run['config_version'])
                    if run['config_hash'] == self.config_hash_:
                        if self.found_matching_run:
                            raise Exception('Duplicate previous run entries found.')
                        self.found_matching_run = True
                        self.run_ = run
                        self.data_folder_ = self.get_versioned_folder(run['input_data_version'],
                                                                      run['config_version'])

        if self.found_matching_run is None:
            self.found_matching_run = False
model.py 文件源码 项目:ProtScan 作者: gianlucacorrado 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def load(self, obj):
        """Load model from file."""
        self.__dict__.update(joblib.load(obj).__dict__)
utils.py 文件源码 项目:Learning-to-navigate-without-a-map 作者: ToniRV 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def load_grid_selection(path):
    """Load a selected grid from pickle."""
    if not os.path.isfile(path):
        raise ValueError("The file is not existed.""")

    with open(path, "r") as f:
        data = pickle.load(f)
        f.close()

    return data['environment'], data['gt'], data['po'], data['goal']
utils.py 文件源码 项目:Learning-to-navigate-without-a-map 作者: ToniRV 项目源码 文件源码 阅读 19 收藏 0 点赞 0 评论 0
def process_map_data(path, return_full=False):
    data = joblib.load(path)

    im_data = data['im']
    value_data = data['value']
    state_data = data['state']

    if return_full:
        im_full = np.concatenate((np.expand_dims(im_data, 1),
                                  np.expand_dims(value_data, 1)),
                                 axis=1).astype(dtype=np.uint8)
        return im_full, state_data, data['label'], data['sample_idx']

    label_data = np.array([np.eye(1, 8, l)[0] for l in data['label']])
    num = im_data.shape[0]
    num_train = num - num / 5

    im_train = np.concatenate((np.expand_dims(im_data[:num_train], 1),
                               np.expand_dims(value_data[:num_train], 1)),
                              axis=1).astype(dtype=np.float32)
    state_train = state_data[:num_train]
    label_train = label_data[:num_train]

    im_test = np.concatenate((np.expand_dims(im_data[num_train:], 1),
                              np.expand_dims(value_data[num_train:], 1)),
                             axis=1).astype(dtype=np.float32)
    state_test = state_data[num_train:]
    label_test = label_data[num_train:]

    return (im_train, state_train, label_train), \
           (im_test, state_test, label_test), data['sample_idx']
utils.py 文件源码 项目:Learning-to-navigate-without-a-map 作者: ToniRV 项目源码 文件源码 阅读 20 收藏 0 点赞 0 评论 0
def create_grid_8_dataset(mat_file_path, db_name, save_dir):
    """Convert grid 8x8 dataset from mat file to hdf5 format.

    Parameters
    ----------
    mat_file_path : str
        the path to the mat file
    db_name : str
        the name of the dataset
    save_dir : str
        the directory of the output path (must exist)
    """
    # load matlab data
    mat_data = load_mat_data(mat_file_path)
    print ("[MESSAGE] The Matlab data is loaded.")

    # init HDF5 database
    db = init_h5_db(db_name, save_dir)

    # save dataset
    for key in data_dict:
        add_h5_ds(mat_data[key], key, db)
        print ("[MESSAGE] Saved %s" % (key))

    db.flush()
    db.close()
    print ("[MESSAGE] The grid 8x8 dataset is saved at %s" % (save_dir))
utils.py 文件源码 项目:keras-text 作者: raghakot 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def load(file_name):
    if file_name.endswith('.json'):
        with open(file_name, 'r') as f:
            return jsonpickle.loads(f.read())

    if file_name.endswith('.npy'):
        return np.load(file_name)

    return joblib.load(file_name)
nb_utils.py 文件源码 项目:third_person_im 作者: bstadie 项目源码 文件源码 阅读 18 收藏 0 点赞 0 评论 0
def _load_experiments(self, data_folder, name_or_patterns):
        if not isinstance(name_or_patterns, (list, tuple)):
            name_or_patterns = [name_or_patterns]
        files = []
        for name_or_pattern in name_or_patterns:
            matched_files = glob(
                osp.join(data_folder, name_or_pattern))  # golb gives a list of all files satisfying pattern
            files += matched_files  # this will include twice the same file if it satisfies 2 patterns
        experiments = []
        progress_f = None
        params_f = None
        pkl_data = None
        for f in files:
            if os.path.isdir(f):
                try:
                    progress = self._read_data(osp.join(f, "progress.csv"))
                    params = self._read_params(osp.join(f, "params.json"))
                    params["exp_name"] = osp.basename(f)
                    if os.path.isfile(osp.join(f, "params.pkl")):
                        pkl_data = joblib.load(osp.join(f, "params.pkl"))
                        experiments.append(Experiment(progress, params, pkl_data))
                    else:
                        experiments.append(Experiment(progress, params))
                except Exception as e:
                    print(e)
            elif 'progress.csv' in f:  # in case you're giving as datafolder the dir that contains the files!
                progress_f = self._read_data(f)
            elif 'params.json' in f:
                params_f = self._read_params(f)
            elif 'params.pkl' in f:
                print('about to load', f)
                pkl_data = joblib.load(f)
        if params_f and progress_f:
            if pkl_data:
                experiments.append(Experiment(progress_f, params_f, pkl_data))
            else:
                experiments.append(Experiment(progress_f, params_f))

        self._experiments = experiments
pysax.py 文件源码 项目:motif-classify 作者: macks22 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def symbolize_signal(self, signal, parallel = None, n_jobs = -1):
        """
        Symbolize whole time-series signal to a sentence (vector of words),
        parallel can be {None, "ipython"}
        """
        window_index = self.sliding_window_index(len(signal))
        if parallel == None:
            return map(lambda wi: self.symbolize_window(signal[wi]), window_index)
        elif parallel == "ipython":
            ## too slow
            raise NotImplementedError("parallel parameter %s not supported" % parallel)
            #return self.iparallel_symbolize_signal(signal)
        elif parallel == "joblib":
            with tempfile.NamedTemporaryFile(delete=False) as f:
                tf = f.name
            print "save temp file at %s" % tf 
            tfiles = joblib.dump(signal, tf)
            xs = joblib.load(tf, "r")
            n_jobs = joblib.cpu_count() if n_jobs == -1 else n_jobs 
            window_index = list(window_index)
            batch_size = len(window_index) / n_jobs
            batches = chunk(window_index, batch_size)
            symbols = Parallel(n_jobs)(delayed(joblib_symbolize_window)(self, xs, batch) for batch in batches)
            for f in tfiles: os.unlink(f)
            return sum(symbols, [])
        else:
            raise NotImplementedError("parallel parameter %s not supported" % parallel)
pysax.py 文件源码 项目:motif-classify 作者: macks22 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def signal_to_paa_vector(self, signal, n_jobs = -1):
        window_index = self.sliding_window_index(len(signal))
        with tempfile.NamedTemporaryFile(delete=False) as f:
                tf = f.name
        print "save temp file at %s" % tf 
        tfiles = joblib.dump(signal, tf)
        xs = joblib.load(tf, "r")
        n_jobs = joblib.cpu_count() if n_jobs == -1 else n_jobs 
        window_index = list(window_index)
        batch_size = len(window_index) / n_jobs
        batches = chunk(window_index, batch_size)
        vecs = Parallel(n_jobs)(delayed(joblib_paa_window)(self, xs, batch) for batch in batches)
        for f in tfiles: os.unlink(f)
        return np.vstack(vecs)
document_scores.py 文件源码 项目:word2vec_pipeline 作者: NIHOPA 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def save_single(self):

        assert(self.V is not None)
        assert(self._ref is not None)

        # Set the size explictly as a sanity check
        size_n, dim_V = self.V.shape

        config_score = simple_config.load()["score"]
        f_db = os.path.join(
            config_score["output_data_directory"],
            config_score["document_scores"]["f_db"]
        )

        h5 = touch_h5(f_db)
        g  = h5.require_group(self.method)
        gx = g.require_group(self.current_filename)

        # Save the data array
        msg = "Saving {} {} ({})"
        print(msg.format(self.method, self.current_filename, size_n))

        for col in ["V", "_ref", "VX",
                    "VX_explained_variance_ratio_",
                    "VX_components_"]:
            if col in gx:
                #print "  Clearing", self.method, self.current_filename, col
                del gx[col]

        gx.create_dataset("V", data=self.V, **self.h5py_args)
        gx.create_dataset("_ref", data=self._ref, **self.h5py_args)
document_scores.py 文件源码 项目:word2vec_pipeline 作者: NIHOPA 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def compute_reduced_representation(self):

        if not self.compute_reduced:
            return None

        config_score = simple_config.load()["score"]
        f_db = os.path.join(
            config_score["output_data_directory"],
            config_score["document_scores"]["f_db"]
        )

        h5 = touch_h5(f_db)
        g = h5[self.method]

        keys = g.keys()
        V     = np.vstack([g[x]["V"][:] for x in keys])
        sizes = [g[x]["_ref"].shape[0] for x in keys]

        nc = self.reduced_n_components
        clf = IncrementalPCA(n_components=nc)

        msg = "Performing PCA on {}, ({})->({})"
        print(msg.format(self.method, V.shape[1], nc))

        VX = clf.fit_transform(V)
        EVR = clf.explained_variance_ratio_
        COMPONENTS = clf.components_

        for key, size in zip(keys, sizes):

            # Take slices equal to the size
            vx, VX = VX[:size,:], VX[size:, :]
            evr, EVR = EVR[:size], EVR[size:]
            com, COMPONENTS = COMPONENTS[:size,:], COMPONENTS[size:, :]

            g[key].create_dataset("VX", data=vx, **self.h5py_args)
            g[key].create_dataset("VX_explained_variance_ratio_", data=evr)
            g[key].create_dataset("VX_components_", data=com)

        h5.close()


问题


面经


文章

微信
公众号

扫码关注公众号