python类read_pickle()的实例源码-面圈网

scrape_ratings_threaded.py 文件源码项目：glassdoor-analysis 作者: THEdavehogue 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def load_pkl():
    '''
    loads a pickled DataFrame with the employers to scrape ratings for.

    INPUT:
        None

    OUTPUT:
        df: pandas DataFrame
        split: threshold of good/bad employer ratings
    '''
    df = pd.read_pickle(os.path.join('data', 'clean_employers.pkl'))
    df['company_id'] = df['company_id'].astype(int)
    df['num_ratings'] = df['num_ratings'].astype(int)
    split = df['overall_rating'].mean()
    return df, split

data.py 文件源码项目：kaggle-review 作者: daxiongshu 项目源码文件源码阅读 59 收藏 0 点赞 0 评论 0

def _load_o2p(self):
        if self.o2p:
            return
        path = self.flags.data_path
        p = "%s/o2p.pkl"%path
        if os.path.exists(p)==False:
            self._load_db()
            ops = self.pdDB.data['op_prior']
            ops = ops.append(self.pdDB.data['op_train'])
            o2p = ops.sort_values(['order_id', 'add_to_cart_order'])\
                .groupby('order_id')['product_id'].apply(list)
            o2p.to_pickle(p)
        else:
            o2p = pd.read_pickle(p)
        self.o2p = o2p
        print_mem_time("Loaded o2p %d"%len(o2p))

common.py 文件源码项目：singlecell-dash 作者: czbiohub 项目源码文件源码阅读 41 收藏 0 点赞 0 评论 0

def compute_cell_smushing(self):
        """Within each plate, find a 2d embedding of all cells"""
        grouped = self.genes.groupby(self.cell_metadata[self.SAMPLE_MAPPING])

        if os.path.exists(self.cell_smushed_cache_file):
            smusheds = pd.read_pickle(self.cell_smushed_cache_file)
            # if nothing is missing, return the cached version
            if not set(grouped.groups) - set(smusheds):
                return smusheds
        else:
            smusheds = {}

        for plate_name, genes_subset in grouped:
            if plate_name not in smusheds:
                cell_smusher = TSNE(metric='cosine', random_state=0)
                cell_smushed = pd.DataFrame(
                    cell_smusher.fit_transform(genes_subset),
                    index=genes_subset.index)
                smusheds[plate_name] = cell_smushed

        pd.to_pickle(smusheds, self.cell_smushed_cache_file)

        return smusheds

ml.py 文件源码项目：FHDMM 作者: aweinstein 项目源码文件源码阅读 45 收藏 0 点赞 0 评论 0

def fit_behavioral_data():
    """Fit a model for all subjects. """
    df = pd.read_pickle('data.pkl')
    subjects = df.index.get_level_values('subject').unique()
    data = np.empty((subjects.size, 10))
    cues = (0, 1)
    for i, subject in enumerate(subjects):
        print('Fitting model for subject {}'.format(subject))
        df_s = df.loc[subject]
        for cue in cues:
            ml = ML(df_s[df_s['cue']==cue])
            r = ml.ml_estimation()
            data[i,2*cue:(2*cue+2)] = r.x
            data[i,2*cue+4:2*cue+6] = np.sqrt(np.diag(r.hess_inv.todense()))
            data[i,cue+8] = r.fun

    model = pd.DataFrame(data, pd.Index(subjects, name='subject'),
                         ['alpha_0', 'beta_0', 'alpha_1', 'beta_1',
                          'se_alpha_0', 'se_beta_0', 'se_alpha_1', 'se_beta_1',
                          'NLL_0', 'NLL_1'])
    return model

ml.py 文件源码项目：FHDMM 作者: aweinstein 项目源码文件源码阅读 40 收藏 0 点赞 0 评论 0

def fit_single_subject(subject=4):
    df = pd.read_pickle('data.pkl')
    print('Fitting model for subject {}'.format(subject))
    df_s = df.loc[subject]

    cues = (0, 1, 2)
    for cue in cues:
        ml = ML(df_s[df_s['cue']==cue])
        r = ml.ml_estimation()
        H_inv = r.hess_inv.todense()
        print('\t cue:{:d}'.format(cue))
        print('\t\tr:\n\t\t\t{}\n'.format(r.x))
        print('\tInverse of Hessian:\n{}\n'.format(H_inv))


    globals().update(locals())

reader.py 文件源码项目：nuts-ml 作者: maet3608 项目源码文件源码阅读 39 收藏 0 点赞 0 评论 0

def _load_table(self, filepath):
        """
        Load table from file system.

        :param str filepath: Path to table in CSV, TSV, XLSX or
                   Pandas pickle format.
        :return: Pandas table
        :rtype: pandas.core.frame.DataFrame
        """
        _, ext = os.path.splitext(filepath.lower())
        if ext == '.tsv':
            return pd.read_table(filepath, **self.kwargs)
        if ext == '.csv':
            return pd.read_csv(filepath, **self.kwargs)
        if ext == '.xlsx':
            return pd.read_excel(filepath, **self.kwargs)
        return pd.read_pickle(filepath, **self.kwargs)

test_multi.py 文件源码项目：PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def test_legacy_pickle(self):
        if PY3:
            raise nose.SkipTest("testing for legacy pickles not "
                                "support on py3")

        path = tm.get_data_path('multiindex_v1.pickle')
        obj = pd.read_pickle(path)

        obj2 = MultiIndex.from_tuples(obj.values)
        self.assertTrue(obj.equals(obj2))

        res = obj.get_indexer(obj)
        exp = np.arange(len(obj))
        assert_almost_equal(res, exp)

        res = obj.get_indexer(obj2[::-1])
        exp = obj.get_indexer(obj[::-1])
        exp2 = obj2.get_indexer(obj2[::-1])
        assert_almost_equal(res, exp)
        assert_almost_equal(exp, exp2)

test_multi.py 文件源码项目：PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def test_legacy_v2_unpickle(self):

        # 0.7.3 -> 0.8.0 format manage
        path = tm.get_data_path('mindex_073.pickle')
        obj = pd.read_pickle(path)

        obj2 = MultiIndex.from_tuples(obj.values)
        self.assertTrue(obj.equals(obj2))

        res = obj.get_indexer(obj)
        exp = np.arange(len(obj))
        assert_almost_equal(res, exp)

        res = obj.get_indexer(obj2[::-1])
        exp = obj.get_indexer(obj[::-1])
        exp2 = obj2.get_indexer(obj2[::-1])
        assert_almost_equal(res, exp)
        assert_almost_equal(exp, exp2)

test_categorical.py 文件源码项目：PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码文件源码阅读 44 收藏 0 点赞 0 评论 0

def test_pickle_v0_14_1(self):

        # we have the name warning
        # 10482
        with tm.assert_produces_warning(UserWarning):
            cat = pd.Categorical(values=['a', 'b', 'c'],
                                 categories=['a', 'b', 'c', 'd'],
                                 name='foobar', ordered=False)
        pickle_path = os.path.join(tm.get_data_path(),
                                   'categorical_0_14_1.pickle')
        # This code was executed once on v0.14.1 to generate the pickle:
        #
        # cat = Categorical(labels=np.arange(3), levels=['a', 'b', 'c', 'd'],
        #                   name='foobar')
        # with open(pickle_path, 'wb') as f: pickle.dump(cat, f)
        #
        self.assert_categorical_equal(cat, pd.read_pickle(pickle_path))

test_categorical.py 文件源码项目：PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码文件源码阅读 41 收藏 0 点赞 0 评论 0

def test_pickle_v0_15_2(self):
        # ordered -> _ordered
        # GH 9347

        # we have the name warning
        # 10482
        with tm.assert_produces_warning(UserWarning):
            cat = pd.Categorical(values=['a', 'b', 'c'],
                                 categories=['a', 'b', 'c', 'd'],
                                 name='foobar', ordered=False)
        pickle_path = os.path.join(tm.get_data_path(),
                                   'categorical_0_15_2.pickle')
        # This code was executed once on v0.15.2 to generate the pickle:
        #
        # cat = Categorical(labels=np.arange(3), levels=['a', 'b', 'c', 'd'],
        #                   name='foobar')
        # with open(pickle_path, 'wb') as f: pickle.dump(cat, f)
        #
        self.assert_categorical_equal(cat, pd.read_pickle(pickle_path))

test_pickle.py 文件源码项目：PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码文件源码阅读 44 收藏 0 点赞 0 评论 0

def compare(self, vf, version):

        # py3 compat when reading py2 pickle
        try:
            data = pandas.read_pickle(vf)
        except (ValueError) as e:
            if 'unsupported pickle protocol:' in str(e):
                # trying to read a py3 pickle in py2
                return
            else:
                raise

        for typ, dv in data.items():
            for dt, result in dv.items():
                try:
                    expected = self.data[typ][dt]
                except (KeyError):
                    continue

                # use a specific comparator
                # if available
                comparator = getattr(self, "compare_{typ}_{dt}".format(
                    typ=typ, dt=dt), self.compare_element)
                comparator(result, expected, typ, version)
        return data

data_loader.py 文件源码项目：jsaicup2017 作者: SS1031 项目源码文件源码阅读 70 收藏 0 点赞 0 评论 0

def thunder():
    if os.path.exists('../dataset/thunder.pkl'):
        return pd.read_pickle('../dataset/thunder.pkl')

    thunder_df = pd.read_csv('../input/thunder.csv',
                             names=[
                                 'datetime',    # ????
                                 'lat',         # ??(10??)
                                 'lon',         # ??(10??)
                                 'type'         # ???, CG: ???, IC: ???
                             ])

    # ?????????
    thunder_df.datetime = pd.to_datetime(thunder_df.datetime)

    # observation_point_df.to_pickle('../dataset/observation_point.pkl')
    thunder_df = pd.concat([thunder_df, pd.get_dummies(thunder_df.type)], axis=1)
    thunder_df.to_pickle('../dataset/thunder_df.pkl')

    return thunder_df

datarepo.py 文件源码项目：tianchi_ijcai17 作者: LiuDongjing 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def __init__(self, baseDir='../temp/repo'):
        '''baseDir?????????'''
        self.dir = baseDir
        self.data = {}
        if not os.path.exists(self.dir):
            os.makedirs(self.dir)
            logging.info('?????: %s'%self.dir)

        #?????????????????????
        for p in os.listdir(self.dir):
            if os.path.isfile(
                    os.path.join(self.dir, p)):
                key = re.split(r'.', p)[0]
                path = os.path.join(self.dir, p)
                t = pd.read_pickle(path)
                logging.info('?%s???%s.'%(path, key))
                self.data[key] = t

utils.py 文件源码项目：kelvin-power-challenge 作者: alex-bauer 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def read_models_from_dir(dir):
    models = glob.glob(dir + '/*/')

    selected_models = filter(lambda x: 'bag' not in x, models)

    print selected_models
    bagged_oobs = []
    bagged_preds = []

    for model in selected_models:

        pred_file = model + '/' + 'preds.csv'
        oob_file = model + '/' + 'oob.pkl'

        oob = pd.read_pickle(oob_file)
        preds = pd.read_csv(pred_file)
        preds['ut_ms'] = pd.to_datetime(preds['ut_ms'], unit='ms')
        preds=preds.set_index('ut_ms')
        bagged_oobs.append(oob)
        bagged_preds.append(preds)

    return bagged_oobs, bagged_preds, selected_models

hillclimb.py 文件源码项目：kelvin-power-challenge 作者: alex-bauer 项目源码文件源码阅读 43 收藏 0 点赞 0 评论 0

def read_models_from_dir(dir):
    model_array = []

    models = glob.glob(dir + '/*/')

    selected_models = filter(lambda x: 'bag' not in x, models)

    print selected_models

    for model in selected_models:
        try:
            pred_file = model + '/' + 'preds.csv'
            oob_file = model + '/' + 'oob.pkl'

            oob = pd.read_pickle(oob_file)
            cols = [model + str(i) for i in oob.columns]
            print model, oob.shape
            preds = pd.read_csv(pred_file)
            preds['ut_ms'] = pd.to_datetime(preds['ut_ms'], unit='ms')
            preds = preds.set_index('ut_ms')
            model_array.append((Model(model, oob, preds, RMSE(target.loc[oob.index], oob))))
        except:
            print "Error! ", model
            pass
    return model_array

utils.py 文件源码项目：Instacart 作者: KazukiOnodera 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def load():

    global user_order, goods, pname2id, model

    user_order = pd.read_pickle('../input/mk/user_order.p')

    goods = pd.read_pickle('../input/mk/goods.p')

    pname2id = {}
    for k,v in zip(goods.product_name, goods.product_id):
        pname2id[k] = v

    model = load_instacart_vec()

    print('Activated utils.vec2pids, utils.pnames2ids')

    return

303_last_order_date.py 文件源码项目：Instacart 作者: KazukiOnodera 项目源码文件源码阅读 35 收藏 0 点赞 0 评论 0

def make(T):
    """
    T = 0
    folder = 'trainT-0'
    """
    if T==-1:
        folder = 'test'
    else:
        folder = 'trainT-'+str(T)

    label = pd.read_pickle('../feature/{}/label_reordered.p'.format(folder))

    df = pd.merge(label[['order_id', 'product_id']], 
                 tbl[['order_id', 'product_id','days_since_last_order_this_item']], 
                 on=['order_id', 'product_id'], how='left')

    df.to_pickle('../feature/{}/f303_order-product.p'.format(folder))
#==============================================================================
# main
#==============================================================================

501_concat.py 文件源码项目：Instacart 作者: KazukiOnodera 项目源码文件源码阅读 38 收藏 0 点赞 0 评论 0

def concat_pred_item(T, dryrun=False):
    if T==-1:
        name = 'test'
    else:
        name = 'trainT-'+str(T)

    df = utils.load_pred_item(name)

    df = pd.merge(df, pd.read_pickle('../feature/{}/f317_user-product.p'.format(name)), 
                  on=['user_id', 'product_id'],how='left')

    gc.collect()

    #==============================================================================
    print('output')
    #==============================================================================
    if dryrun == True:
        return df
    else:
        utils.to_pickles(df, '../feature/{}/all_apdx'.format(name), 20, inplace=True)

quora_predictor.py 文件源码项目：tensorflow-quorakaggle 作者: ram1988 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def trainModel(self):
        df = pd.read_pickle("./train_features.pkl")
        x_df = pd.concat([df.iloc[:,4:6],df.iloc[:,8]],axis=1)
        y_df = df.iloc[:,9]

        print(x_df)
        print(len(x_df))
        print(len(y_df))

        train_no = int(0.8 * len(df))
        #train_no = 100000
        print(train_no)

        train_df = x_df.iloc[0:train_no,:]
        train_labels = y_df.iloc[0:train_no]
        test_df = x_df.iloc[train_no:,:]
        test_labels = y_df.iloc[train_no:]

        self.model = LogisticClassifier(3)
        self.model.trainModel(train_df,train_labels)
        self.model.validateModel(test_df,test_labels)

prepare_data.py 文件源码项目：NeuralNetwork-ImageQA 作者: ayushoriginal 项目源码文件源码阅读 60 收藏 0 点赞 0 评论 0

def get_answers_matrix(split):
    if split == 'train':
        data_path = 'data/train_qa'
    elif split == 'val':
        data_path = 'data/val_qa'
    else:
        print('Invalid split!')
        sys.exit()

    df = pd.read_pickle(data_path)
    answers = df[['multiple_choice_answer']].values.tolist()
    answer_matrix = np.zeros((len(answers),1001))
    default_onehot = np.zeros(1001)
    default_onehot[1000] = 1.0

    for i, answer in enumerate(answers):
        answer_matrix[i] = answer_to_onehot_dict.get(answer[0].lower(),default_onehot)

    return answer_matrix

prepare_data.py 文件源码项目：NeuralNetwork-ImageQA 作者: ayushoriginal 项目源码文件源码阅读 48 收藏 0 点赞 0 评论 0

def get_questions_matrix(split):
    if split == 'train':
        data_path = 'data/train_qa'
    elif split == 'val':
        data_path = 'data/val_qa'
    else:
        print('Invalid split!')
        sys.exit()

    df = pd.read_pickle(data_path)
    questions = df[['question']].values.tolist()
    word_idx = ebd.load_idx()
    seq_list = []

    for question in questions:
        words = word_tokenize(question[0])
        seq = []
        for word in words:
            seq.append(word_idx.get(word,0))
        seq_list.append(seq)
    question_matrix = pad_sequences(seq_list)

    return question_matrix

submission_by_rule4.py 文件源码项目：tianchi_koubei_17 作者: codemayq 项目源码文件源码阅读 32 收藏 0 点赞 0 评论 0

def get_result_by_last_three_weeks_mean():
    data = pd.read_pickle(static_params.DATA_PATH + 'user_pay_last_three_weeks.pkl')

    result = pd.DataFrame(data['iid'])

    date = '2016-11-'
    index = 1
    for index in range(1,8):
        column = date + str(index)
        result[column]  = data.loc[:,['2016-10-' + str(index + 10),'2016-10-' + str(index + 17),'2016-10-' + str(index + 24)]].mean(1)

    data2 = result.copy()
    result = pd.merge(data2,result,on='iid')

    result.iloc[:,-4] = result.iloc[:,-4]*1.2
    result = result.astype(int)

    result.to_csv(static_params.DATA_PATH + 'submission.csv',header=None,index=None)

submission_by_rule3.py 文件源码项目：tianchi_koubei_17 作者: codemayq 项目源码文件源码阅读 43 收藏 0 点赞 0 评论 0

def get_result_by_last_two_weeks_mean():
    #???????????????
    data = pd.read_pickle(static_params.DATA_PATH + 'user_pay_last_two_weeks.pkl')

    print data

    result = pd.DataFrame(data['iid'])

    date = '2016-11-'
    index = 1
    for index in range(1,8):
        column = date + str(index)
        result[column]  = data.loc[:,['2016-10-' + str(index + 17),'2016-10-' + str(index + 24)]].mean(1)

    data2 = result.copy()
    result = pd.merge(data2,result,on='iid').astype(int)

    result.to_csv(static_params.DATA_PATH + 'submission.csv',header=None,index=None)

user_view_split_by_shop.py 文件源码项目：tianchi_koubei_17 作者: codemayq 项目源码文件源码阅读 39 收藏 0 点赞 0 评论 0

def user_view_split_by_shop():
    if(not os.path.exists(static_params.DATA_USER_VIEW_BY_SHOP_PATH)):
        os.mkdir(static_params.DATA_USER_VIEW_BY_SHOP_PATH)

    data = pd.read_pickle(static_params.DATA_PATH + 'user_view.pkl')
    print type(data)

    data.columns = ['uid','iid','time']
    print data

    data['iid'] = data['iid'].astype(str)
    data['time'] = data['time'].apply(get_date)

    grouped = data.groupby(['iid'],as_index=False)

    for name,group in grouped:
        f = open(static_params.DATA_USER_VIEW_BY_SHOP_PATH + str(name) + '.pkl', 'wb')
        cPickle.dump(group,f,-1)
        f.close()

pre_data.py 文件源码项目：kaggle-quora-solution-8th 作者: qqgeogor 项目源码文件源码阅读 36 收藏 0 点赞 0 评论 0

def get_extra_train():
##############################extra features##################################
    train_simhash_features=pd.read_csv('data/extra_feature/train_simhash_features.csv')
    train_selftrained_w2v_sim_dist=pd.read_pickle('data/extra_feature/train_selftrained_w2v_sim_dist.pkl')
    train_selftrained_glove_sim_dist=pd.read_pickle('data/extra_feature/train_selftrained_glove_sim_dist.pkl')
    train_pretrained_w2v_sim_dist=pd.read_pickle('data/extra_feature/train_pretrained_w2v_sim_dist.pkl')
    train_distinct_word_stats_selftrained_glove=pd.read_csv('data/extra_feature/train_distinct_word_stats_selftrained_glove.csv')
    train_distinct_word_stats_pretrained=pd.read_csv('data/extra_feature/train_distinct_word_stats_pretrained.csv')
    train_distinct_word_stats=pd.read_csv('data/extra_feature/train_distinct_word_stats.csv')


    X_train=np.hstack([train_simhash_features,
            train_selftrained_w2v_sim_dist,
            train_selftrained_glove_sim_dist,
            train_pretrained_w2v_sim_dist,
            train_distinct_word_stats_selftrained_glove,
            train_distinct_word_stats_pretrained,
            train_distinct_word_stats,])


    print X_train.shape

    return X_train

pre_data.py 文件源码项目：kaggle-quora-solution-8th 作者: qqgeogor 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def get_extra_test():  
##############################extra features##################################
    test_simhash_features=pd.read_csv('data/extra_feature/test_simhash_features.csv')
    test_selftrained_w2v_sim_dist=pd.read_pickle('data/extra_feature/test_selftrained_w2v_sim_dist.pkl')
    test_selftrained_glove_sim_dist=pd.read_pickle('data/extra_feature/test_selftrained_glove_sim_dist.pkl')
    test_pretrained_w2v_sim_dist=pd.read_pickle('data/extra_feature/test_pretrained_w2v_sim_dist.pkl')
    test_distinct_word_stats_selftrained_glove=pd.read_csv('data/extra_feature/test_distinct_word_stats_selftrained_glove.csv')
    test_distinct_word_stats_pretrained=pd.read_csv('data/extra_feature/test_distinct_word_stats_pretrained.csv')
    test_distinct_word_stats=pd.read_csv('data/extra_feature/test_distinct_word_stats.csv')


    X_test=np.hstack([    test_simhash_features,
    test_selftrained_w2v_sim_dist,
    test_selftrained_glove_sim_dist,
    test_pretrained_w2v_sim_dist,
    test_distinct_word_stats_selftrained_glove,
    test_distinct_word_stats_pretrained,
    test_distinct_word_stats,])


    print X_test.shape

    return X_test

split_data.py 文件源码项目：kaggle-quora-solution-8th 作者: qqgeogor 项目源码文件源码阅读 38 收藏 0 点赞 0 评论 0

def get_feature_importance(feature):
    import scipy.stats as sps
    import pandas as pd
    y_train = pd.read_csv('../data/train.csv')['is_duplicate']
    return  sps.spearmanr(feature,y_train)[0]

# import pickle
# pickle.dump(X_train,open("data_train.pkl", 'wb'), protocol=2)
#
# data_file=['test_deptree','test_glove_sim_dist','test_pca_glove',
#            'test_pca_pattern','test_w2w','test_pos','test_pca_char']
#
# path='../test/'
# for it in range(6):
#     tmp=[]
#     flist=[item+str(it) for item in data_file]
#     test=np.empty((400000,0))
#     if it==5:
#         test=np.empty((345796,0))
#     for f in flist:
#         test=np.hstack([test,pd.read_pickle(path+f+'.pkl')])
#     pickle.dump(test,open('data_test{0}.pkl'.format(it),'wb'),protocol=2)

split.py 文件源码项目：sportsball 作者: jgershen 项目源码文件源码阅读 46 收藏 0 点赞 0 评论 0

def split_cli():
  p = ArgumentParser()
  p.add_argument("expanded", default="expanded.pickle", help="Expanded pickle file targets.")
  p.add_argument("stripped", default="test.pickle", help="stripped data filename")
  p.add_argument("train", default="train.pickle", help="training filename")
  p.add_argument("test", default="test.pickle", help="test filename")
  p.add_argument("attrfile", default="attrs.txt", help="attrs to care about for NA purposes")
  p.add_argument("--na-strategy", default="drop", help="what to do with NA rows (default is drop them)")
  p.add_argument("--trainpct", default=70, type=int, help="percentage of data to put into training set")
  p.add_argument("--random", action='store_true', help="split train/test sets randomly (default is by time)")
  cfg = p.parse_args()

  strip_and_process_to_files(expanded_file=pd.read_pickle(cfg.expanded),
                             stripped_file=cfg.stripped,
                             attrfile=cfg.attrfile,
                             na_strategy=cfg.na_strategy)
  split_to_files(trainfile=cfg.train,
                 testfile=cfg.test,
                 stripped=cfg.stripped,
                 trainpct=cfg.trainpct,
                 split_randomly=cfg.random)

io.py 文件源码项目：sportsball 作者: jgershen 项目源码文件源码阅读 34 收藏 0 点赞 0 评论 0

def load_nf_histplayerinfo(sport, identifiers_to_load):
  """
  Load previously saved dataframes of numberfire prediction data.
  :param str sport: which sport!
  :param list[str] identifiers_to_load:  id of players to load
  :return dict[str, DataFrame]: dict of player -> prediction data for player
  """
  loaded = 0
  histplayerinfo_dict = {}
  for identifier in identifiers_to_load:
    target_file = get_histplayerinfo_filename(sport, identifier)
    if os.path.exists(target_file):
      histplayerinfo_dict[identifier] = pandas.read_pickle(target_file)
      # Attempt to convert the index to time based if possible
      if histplayerinfo_dict[identifier] is not None and 'date' in histplayerinfo_dict[identifier].columns:
        histplayerinfo_dict[identifier].set_index('date', inplace=True)
      loaded += 1
  return histplayerinfo_dict

io.py 文件源码项目：sportsball 作者: jgershen 项目源码文件源码阅读 404 收藏 0 点赞 0 评论 0

def load_nf_salaryinfo(sport, players):
  """
  Load previously saved dataframes of numberfire salary data
  :param list[str] players: players to load
  :return dict[str, DataFrame]: dict of player -> salary data for player
  """
  loaded = 0
  player_dict = {}
  for player in players:
    target_file = get_salary_filename(sport, player)
    if os.path.exists(target_file):
      player_dict[player] = pandas.read_pickle(target_file)
      # Attempt to convert the index to time based if possible
      if player_dict[player] is not None and 'date' in player_dict[player].columns:
        player_dict[player].set_index('date', inplace=True)
      loaded += 1
  return player_dict