python类read_excel()的实例源码

test_excel.py 文件源码 项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda 作者: SignalMedia 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def test_datetimes(self):

        # Test writing and reading datetimes. For issue #9139. (xref #9185)
        _skip_if_no_xlrd()

        datetimes = [datetime(2013, 1, 13, 1, 2, 3),
                     datetime(2013, 1, 13, 2, 45, 56),
                     datetime(2013, 1, 13, 4, 29, 49),
                     datetime(2013, 1, 13, 6, 13, 42),
                     datetime(2013, 1, 13, 7, 57, 35),
                     datetime(2013, 1, 13, 9, 41, 28),
                     datetime(2013, 1, 13, 11, 25, 21),
                     datetime(2013, 1, 13, 13, 9, 14),
                     datetime(2013, 1, 13, 14, 53, 7),
                     datetime(2013, 1, 13, 16, 37, 0),
                     datetime(2013, 1, 13, 18, 20, 52)]

        with ensure_clean(self.ext) as path:
            write_frame = DataFrame.from_items([('A', datetimes)])
            write_frame.to_excel(path, 'Sheet1')
            read_frame = read_excel(path, 'Sheet1', header=0)

            tm.assert_series_equal(write_frame['A'], read_frame['A'])

    # GH7074
shixin_zhixing_defend365.py 文件源码 项目:forward 作者: yajun0601 项目源码 文件源码 阅读 35 收藏 0 点赞 0 评论 0
def pingan_trust():
    result = get_total(start_date=START_DATE,end_date='2016-05-10')
#    issuers = pd.read_excel('????-SW??-????.xlsx',sheetname=[0], header = 0)[0]
#    issuers = my_db.getCompanyList()
    issuers = pd.read_excel('../peace/??????.xlsx',sheetname=[0], header = 0)[0]
    issuers.columns= ['name']
    focus = issuers.merge(result, on='name', how='left')

    focus = focus.sort_values('??',axis=0,ascending=False)
    import time
    time_str = time.strftime('%Y%m%d',time.localtime(time.time()))
    focus['rptDate']=time_str

    insert_into_db(focus)     

    report = focus.dropna(axis=0, how='any',thresh=3)
    report.to_excel("shixin_zhixing_bank.xlsx")
shixin_zhixing_defend_PINGAN.py 文件源码 项目:forward 作者: yajun0601 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def pingan_trust():
    result = get_total(start_date=START_DATE,end_date='2016-05-10')
#    issuers = pd.read_excel('????-SW??-????.xlsx',sheetname=[0], header = 0)[0]
#    issuers = my_db.getCompanyList()
    issuers = pd.read_excel('../peace/??????.xlsx',sheetname=[0], header = 0)[0]
    issuers.columns= ['name']
    focus = issuers.merge(result, on='name', how='left')

    focus = focus.sort_values('??',axis=0,ascending=False)
    import time
    time_str = time.strftime('%Y%m%d',time.localtime(time.time()))
    focus['rptDate']=time_str

    insert_into_db(focus)     

    report = focus.dropna(axis=0, how='any',thresh=3)
    report.to_excel("shixin_zhixing_bank.xlsx")
modle.py 文件源码 项目:forward 作者: yajun0601 项目源码 文件源码 阅读 31 收藏 0 点赞 0 评论 0
def getModle():
    fixedEvaluation = pd.read_excel(MODLE_FILE_NAME,sheetname=[0], header = 0, skiprows = [0])
    industryTbl = pd.read_excel(MODLE_FILE_NAME,sheetname=[1], index_col = 2,parse_cols="B:L",header = 3)
    trendTbl = pd.read_excel(MODLE_FILE_NAME,sheetname=[2], header = 2,skiprows=[0])
    fluctuationTbl = pd.read_excel(MODLE_FILE_NAME,sheetname=[3], header = 2,skiprows=[0])
    fixedScoreTble = pd.read_excel(MODLE_FILE_NAME,sheetname=[4], header = 0,skiprows=[0])

    df=pd.read_excel(DATA_FILE,sheetname=[1], header = 0,index_col=0,verbose=True)
    df[1].head().index
    df[1].head().columns

    df[1].head().describe()
    df[1].head().loc[:,['????','?????']]

    for i in range(df[1].head().iloc[1].count()):
        print(df[1].head().iloc[1][i])

    head = df[1].head()    
    head.values[0][1:40].reshape(13,3)
wind_plugin.py 文件源码 项目:PythonPackages 作者: wanhanwan 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def get_history_bar(field_names, start_date, end_date, **kwargs):
    field_info = pd.read_excel(argInfoWB,sheetname='????',engine='xlrd')
    if not isinstance(field_names,list):
        field_names = [field_names]
    # ?????????
    _l = []
    w.start()
    for fieldName in field_names:
        field_name = field_info[field_info['FactorName']==field_name]['FieldName'].iat[0]
        args = field_info[field_info['FactorName']==field_name]['Args'].iat[0]

        params = _parse_args(args,**kwargs)
        all_days = data_api.tc.get_trade_days(start_date, end_date)
        all_ids = data_api.get_history_ashare(all_days).index.levels[1].unique()

        data = w.wsd(
            list(map(tradecode_to_windcode, all_ids)), field_name, start_date, end_date, params)
        _l.append(_bar_to_dataframe(data))
    data = pd.concat(_l,axis=1)
    w.close()
    return data
fileio.py 文件源码 项目:spice-hate_speech_detection 作者: futurice 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def read_annotated_files(dirname):

    messages = []
    labels = np.zeros(0)

    filenames = glob.glob(os.path.join(dirname, '*.xls*'))

    for filename in filenames:
        print('Reading %s' % filename, end='. ', flush=True)

        df = pd.read_excel(filename)
        print("Found %d new samples" % df[df.LABEL.notnull()].shape[0])
        labels = np.hstack((labels, np.array(df[df.LABEL.notnull()].LABEL.tolist(),
                           dtype=int)))
        messages += df[df.LABEL.notnull()].text.tolist()


    return messages, labels
test_monitoring.py 文件源码 项目:pecos 作者: sandialabs 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def setUp(self):
        trans = {
            'Linear': ['A'],
            'Random': ['B'],
            'Wave': ['C','D']}

        system_name = 'Simple'
        file_name = join(simpleexampledir,'simple.xlsx')

        df = pd.read_excel(file_name)
        self.pm = pecos.monitoring.PerformanceMonitoring()
        self.pm.add_dataframe(df, system_name)
        self.pm.add_translation_dictionary(trans, system_name)
        self.pm.check_timestamp(900)
        clock_time = self.pm.get_clock_time()
        time_filter = (clock_time > 3*3600) & (clock_time < 21*3600)
        self.pm.add_time_filter(time_filter)
cross_val_splits.py 文件源码 项目:qtim_ROP 作者: QTIM-Lab 项目源码 文件源码 阅读 22 收藏 0 点赞 0 评论 0
def unet_cross_val(data_dir, out_dir, mapping, splits, unet_conf):

    # Load spreadsheet
    with pd.ExcelFile(mapping) as xls:
        df = pd.read_excel(xls, 'Sheet1').set_index('index')
        df['class'] = df['class'].map({'preplus': 'pre-plus', 'normal': 'normal', 'plus': 'plus'})

    img_dir = join(data_dir, 'images')
    seg_dir = join(data_dir, 'manual_segmentations')
    mask_dir = join(data_dir, 'masks')

    # Check whether all images exist
    check_images_exist(df, img_dir, seg_dir, mask_dir)

    # Now split into training and testing
    CVFile = sio.loadmat(splits)

    # # Combining Pre-Plus and Plus
    # trainPlusIndex = CVFile['trainPlusIndex'][0]
    # testPlusIndex = CVFile['testPlusIndex'][0]
    #
    # plus_dir = make_sub_dir(out_dir, 'trainTestPlus')
    # print "Generating splits for combined No and Pre-Plus"
    # generate_splits(trainPlusIndex, testPlusIndex, df, img_dir, mask_dir, seg_dir, plus_dir)

    # Combining No and Pre-Plus
    trainPrePIndex = CVFile['trainPrePIndex'][0]
    testPrePIndex = CVFile['testPrePIndex'][0]

    prep_dir = make_sub_dir(out_dir, 'trainTestPreP')
    print "Generating splits for combined Pre-Plus and Plus"
    generate_splits(trainPrePIndex, testPrePIndex, df, img_dir, mask_dir, seg_dir, prep_dir)

    # Train models
    train_and_test(prep_dir, unet_conf, processes=1)
    # train_and_test(plus_dir, unet_conf, processes=2)
pca_augmentation.py 文件源码 项目:qtim_ROP 作者: QTIM-Lab 项目源码 文件源码 阅读 32 收藏 0 点赞 0 评论 0
def pca_augmentation(data_h5, excel_path):

    f = h5py.File(data_h5, 'r')

    df1 = pd.read_excel(excel_path, sheetname=0, header=1)
    df1 = df1.rename(columns=lambda x: x.strip()).set_index('Image')  # strip whitespace

    df2 = pd.read_excel(excel_path, sheetname=1, header=1)
    df2 = df2.rename(columns=lambda x: x.strip()).set_index('Image')  # strip whitespace
    df = pd.concat([df1, df2])

    X = preprocess_data(f)
    X_mean = np.mean(X, axis=0)
    X = X - X_mean

    # PCA
    pca = PCA().fit(X)
DataFrameModel.py 文件源码 项目:qtpandas 作者: draperjames 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def setDataFrameFromFile(self, filepath, **kwargs):
        """
        Sets the model's dataFrame by reading a file.
        Accepted file formats:
            - .xlsx (sheet1 is read unless specified in kwargs)
            - .csv (comma separated unless specified in kwargs)
            - .txt (any separator)

        :param filepath: (str)
            The path to the file to be read.
        :param kwargs:
            pandas.read_csv(**kwargs) or pandas.read_excel(**kwargs)
        :return: None
        """
        df = superReadFile(filepath, **kwargs)
        self.setDataFrame(df, filePath=filepath)
settings.py 文件源码 项目:smalldata 作者: dannguyen 项目源码 文件源码 阅读 29 收藏 0 点赞 0 评论 0
def get_dataset_rows(meta):
    """

    if filename is a xlsx file, returns a list of list of dicts
    else: returns a list of dicts

    relies on get_dataset_local_filename() and meta['slug']

    right now wishing I had made this all OOP...
    """
    srcpath = meta['local_filepath']
    if meta['filetype'] == 'workbook':
        import pandas as pd
        sheetindices = list(range(len(meta['gsheet']['sheets'])))
        dfs = pd.read_excel(str(srcpath), sheetname=sheetindices)
        return [x.to_dict('records') for x in dfs.values()]
    else:
        # assume CSV
        return list(DictReader(srcpath.open('r')))
main.py 文件源码 项目:Travelling-salesman-problem-using-Ant-colony-optimization 作者: Ahsaan-566 项目源码 文件源码 阅读 26 收藏 0 点赞 0 评论 0
def initGraph(self, name):
        #file reading using pandas
        df = pd.read_excel(name, sheetname='Sheet1')
        dim = df['Dimension']
        x = df['X']
        y = df['Y']
        #print x,y
        self.numCities = len(dim)
        #set and fill the adjMatrix
        self.adjMatrix = [[1.0 for i in range(self.numCities)] for j in range(self.numCities) ]
        for i in range(self.numCities):
            for j in range(self.numCities):
        #fill the adjmatrix with city coordinates and calculate euclidean distances
                self.adjMatrix[i][j] = self.calEdge(x[i], x[j], y[i], y[j])

    #calculating edge weights using euclidean distances
extract_feature.py 文件源码 项目:Book_DeepLearning_Practice 作者: wac81 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def extract_lstm_test(dictionary, file_name, tag_num=CLASS_NUM, col_tag=0, col_content=1, length=MAX_LENGTH):
    contents = pd.read_excel(file_name, header=None)
    cw = lambda x: [word.encode('utf-8') for word in jieba.cut(x) if word not in stopwords and word.strip() != '' and word.encode('utf-8') in dictionary.index]
    contents['words'] = contents[col_content].apply(cw)
    get_sent = lambda x: list(dictionary['id'][x])
    contents['sent'] = contents['words'].apply(get_sent)  # ????,??????????
    print("Pad sequences (samples x time)")
    contents['sent'] = list(sequence.pad_sequences(contents['sent'], maxlen=length))
    x = np.array(list(contents['sent']))  # ???
    y = np.zeros((len(list(contents[col_tag])), tag_num))
    for i in range(len(list(contents[col_tag]))):
        for j in range(tag_num):
            if contents[col_tag][i] == j:
                y[i][j] = 1
    return x, y


# dictionary model ????
extract_feature.py 文件源码 项目:Book_DeepLearning_Practice 作者: wac81 项目源码 文件源码 阅读 27 收藏 0 点赞 0 评论 0
def extract_dictionary_feature(file_name, col_tag=0, col_content=1):
    # ????
    adv = codecs.open('./data/vocabulary/adv.txt', 'rb', encoding='utf-8').read().split('\n')
    inverse = codecs.open('./data/vocabulary/inverse.txt', 'rb', encoding='utf-8').read().split('\n')
    negdict = codecs.open('./data/vocabulary/negdict.txt', 'rb', encoding='utf-8').read().split('\n')
    posdict = codecs.open('./data/vocabulary/posdict.txt', 'rb', encoding='utf-8').read().split('\n')

    contents = pd.read_excel(file_name, header=None)

    print 'cut words...'
    cw = lambda x: [pair for pair in psg.lcut(x) if pair.word not in stopwords]
    contents['pairs'] = contents[col_content].apply(cw)
    matrix = reviews2matrix(list(contents['pairs']), posdict, negdict, inverse, adv)
    x = matrix2vec(matrix)
    y = list(contents[col_tag])
    return x, y
JJS.py 文件源码 项目:spider 作者: devzhan 项目源码 文件源码 阅读 34 收藏 0 点赞 0 评论 0
def read():
    df = pd.read_excel("jjs1.xlsx")
    data = list(df.ix[:, 7])
    prices=[]
    for item in data:
        if not str(item)=="nan":
            prices.append(int(item))
    print(prices[0:100])

    matplotlib.style.use('ggplot')#??ggplot??

    ts = pd.Series(prices[0:100], index=pd.date_range('1/1/2000', periods=100))
    plt.figure()
    df.plot.hist(alpha=0.5)
    plt.legend()
    plt.show()
    pass
sentiment_svm.py 文件源码 项目:patriots 作者: wdxtub 项目源码 文件源码 阅读 25 收藏 0 点赞 0 评论 0
def loadfile():
    neg=pd.read_excel(datadir + '/neg.xls',header=None,index=None)
    pos=pd.read_excel(datadir + '/pos.xls',header=None,index=None)

    cw = lambda x: list(jieba.cut(x))
    pos['words'] = pos[0].apply(cw)
    neg['words'] = neg[0].apply(cw)

    #print pos['words']
    #use 1 for positive sentiment, 0 for negative
    y = np.concatenate((np.ones(len(pos)), np.zeros(len(neg))))

    x_train, x_test, y_train, y_test = train_test_split(np.concatenate((pos['words'], neg['words'])), y, test_size=0.2)

    np.save(modeldir + '/y_train.npy',y_train)
    np.save(modeldir + '/y_test.npy',y_test)
    return x_train,x_test



#??????????????
classifying.py 文件源码 项目:stock 作者: datablood 项目源码 文件源码 阅读 30 收藏 0 点赞 0 评论 0
def get_hs300s():
    """
    ????300??????????
    Return
    --------
    DataFrame
        code :????
        name :????
        date :??
        weight:??
    """
    from tushare.stock.fundamental import get_stock_basics
    try:
        wt = pd.read_excel(ct.HS300_CLASSIFY_URL_FTP%(ct.P_TYPE['ftp'], ct.DOMAINS['idxip'],
                                                  ct.PAGES['hs300w']), parse_cols=[0, 3, 6])
        wt.columns = ct.FOR_CLASSIFY_W_COLS
        wt['code'] = wt['code'].map(lambda x :str(x).zfill(6))
        df = get_stock_basics()[['name']]
        df = df.reset_index()
        return pd.merge(df,wt)
    except Exception as er:
        print(str(er))
classifying.py 文件源码 项目:stock 作者: datablood 项目源码 文件源码 阅读 33 收藏 0 点赞 0 评论 0
def get_sz50s():
    """
    ????50???
    Return
    --------
    DataFrame
        code :????
        name :????
    """
    try:
        df = pd.read_excel(ct.HS300_CLASSIFY_URL_FTP%(ct.P_TYPE['ftp'], ct.DOMAINS['idxip'],
                                                  ct.PAGES['sz50b']), parse_cols=[0,1])
        df.columns = ct.FOR_CLASSIFY_B_COLS
        df['code'] = df['code'].map(lambda x :str(x).zfill(6))
        return df
    except Exception as er:
        print(str(er))
classifying.py 文件源码 项目:stock 作者: datablood 项目源码 文件源码 阅读 38 收藏 0 点赞 0 评论 0
def get_zz500s():
    """
    ????500???
    Return
    --------
    DataFrame
        code :????
        name :????
    """
    from tushare.stock.fundamental import get_stock_basics
    try:
#         df = pd.read_excel(ct.HS300_CLASSIFY_URL_FTP%(ct.P_TYPE['ftp'], ct.DOMAINS['idxip'],
#                                                   ct.PAGES['zz500b']), parse_cols=[0,1])
#         df.columns = ct.FOR_CLASSIFY_B_COLS
#         df['code'] = df['code'].map(lambda x :str(x).zfill(6))
        wt = pd.read_excel(ct.HS300_CLASSIFY_URL_FTP%(ct.P_TYPE['ftp'], ct.DOMAINS['idxip'],
                                                   ct.PAGES['zz500wt']), parse_cols=[0, 3, 6])
        wt.columns = ct.FOR_CLASSIFY_W_COLS
        wt['code'] = wt['code'].map(lambda x :str(x).zfill(6))
        df = get_stock_basics()[['name']]
        df = df.reset_index()
        return pd.merge(df,wt)
    except Exception as er:
        print(str(er))
methods.py 文件源码 项目:array_storage_benchmark 作者: mverleg 项目源码 文件源码 阅读 37 收藏 0 点赞 0 评论 0
def load(self, pth):
        with open(pth, 'r') as fh:
            data = read_excel(fh, sheetname='data')
            print(data.as_matrix(columns=data.columns[1:]))
            return data.as_matrix()



#todo: pandas formats - http://pandas.pydata.org/pandas-docs/stable/io.html
# hdf5
# sql

#todo: hdf5 - http://stackoverflow.com/a/9619713/723090

#todo: bloscpack http://stackoverflow.com/a/22225337/723090

#todo: pytables


问题


面经


文章

微信
公众号

扫码关注公众号