def test_datetimes(self):
# Test writing and reading datetimes. For issue #9139. (xref #9185)
_skip_if_no_xlrd()
datetimes = [datetime(2013, 1, 13, 1, 2, 3),
datetime(2013, 1, 13, 2, 45, 56),
datetime(2013, 1, 13, 4, 29, 49),
datetime(2013, 1, 13, 6, 13, 42),
datetime(2013, 1, 13, 7, 57, 35),
datetime(2013, 1, 13, 9, 41, 28),
datetime(2013, 1, 13, 11, 25, 21),
datetime(2013, 1, 13, 13, 9, 14),
datetime(2013, 1, 13, 14, 53, 7),
datetime(2013, 1, 13, 16, 37, 0),
datetime(2013, 1, 13, 18, 20, 52)]
with ensure_clean(self.ext) as path:
write_frame = DataFrame.from_items([('A', datetimes)])
write_frame.to_excel(path, 'Sheet1')
read_frame = read_excel(path, 'Sheet1', header=0)
tm.assert_series_equal(write_frame['A'], read_frame['A'])
# GH7074
python类read_excel()的实例源码
test_excel.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 31
收藏 0
点赞 0
评论 0
def pingan_trust():
result = get_total(start_date=START_DATE,end_date='2016-05-10')
# issuers = pd.read_excel('????-SW??-????.xlsx',sheetname=[0], header = 0)[0]
# issuers = my_db.getCompanyList()
issuers = pd.read_excel('../peace/??????.xlsx',sheetname=[0], header = 0)[0]
issuers.columns= ['name']
focus = issuers.merge(result, on='name', how='left')
focus = focus.sort_values('??',axis=0,ascending=False)
import time
time_str = time.strftime('%Y%m%d',time.localtime(time.time()))
focus['rptDate']=time_str
insert_into_db(focus)
report = focus.dropna(axis=0, how='any',thresh=3)
report.to_excel("shixin_zhixing_bank.xlsx")
def pingan_trust():
result = get_total(start_date=START_DATE,end_date='2016-05-10')
# issuers = pd.read_excel('????-SW??-????.xlsx',sheetname=[0], header = 0)[0]
# issuers = my_db.getCompanyList()
issuers = pd.read_excel('../peace/??????.xlsx',sheetname=[0], header = 0)[0]
issuers.columns= ['name']
focus = issuers.merge(result, on='name', how='left')
focus = focus.sort_values('??',axis=0,ascending=False)
import time
time_str = time.strftime('%Y%m%d',time.localtime(time.time()))
focus['rptDate']=time_str
insert_into_db(focus)
report = focus.dropna(axis=0, how='any',thresh=3)
report.to_excel("shixin_zhixing_bank.xlsx")
def getModle():
fixedEvaluation = pd.read_excel(MODLE_FILE_NAME,sheetname=[0], header = 0, skiprows = [0])
industryTbl = pd.read_excel(MODLE_FILE_NAME,sheetname=[1], index_col = 2,parse_cols="B:L",header = 3)
trendTbl = pd.read_excel(MODLE_FILE_NAME,sheetname=[2], header = 2,skiprows=[0])
fluctuationTbl = pd.read_excel(MODLE_FILE_NAME,sheetname=[3], header = 2,skiprows=[0])
fixedScoreTble = pd.read_excel(MODLE_FILE_NAME,sheetname=[4], header = 0,skiprows=[0])
df=pd.read_excel(DATA_FILE,sheetname=[1], header = 0,index_col=0,verbose=True)
df[1].head().index
df[1].head().columns
df[1].head().describe()
df[1].head().loc[:,['????','?????']]
for i in range(df[1].head().iloc[1].count()):
print(df[1].head().iloc[1][i])
head = df[1].head()
head.values[0][1:40].reshape(13,3)
def get_history_bar(field_names, start_date, end_date, **kwargs):
field_info = pd.read_excel(argInfoWB,sheetname='????',engine='xlrd')
if not isinstance(field_names,list):
field_names = [field_names]
# ?????????
_l = []
w.start()
for fieldName in field_names:
field_name = field_info[field_info['FactorName']==field_name]['FieldName'].iat[0]
args = field_info[field_info['FactorName']==field_name]['Args'].iat[0]
params = _parse_args(args,**kwargs)
all_days = data_api.tc.get_trade_days(start_date, end_date)
all_ids = data_api.get_history_ashare(all_days).index.levels[1].unique()
data = w.wsd(
list(map(tradecode_to_windcode, all_ids)), field_name, start_date, end_date, params)
_l.append(_bar_to_dataframe(data))
data = pd.concat(_l,axis=1)
w.close()
return data
def read_annotated_files(dirname):
messages = []
labels = np.zeros(0)
filenames = glob.glob(os.path.join(dirname, '*.xls*'))
for filename in filenames:
print('Reading %s' % filename, end='. ', flush=True)
df = pd.read_excel(filename)
print("Found %d new samples" % df[df.LABEL.notnull()].shape[0])
labels = np.hstack((labels, np.array(df[df.LABEL.notnull()].LABEL.tolist(),
dtype=int)))
messages += df[df.LABEL.notnull()].text.tolist()
return messages, labels
def setUp(self):
trans = {
'Linear': ['A'],
'Random': ['B'],
'Wave': ['C','D']}
system_name = 'Simple'
file_name = join(simpleexampledir,'simple.xlsx')
df = pd.read_excel(file_name)
self.pm = pecos.monitoring.PerformanceMonitoring()
self.pm.add_dataframe(df, system_name)
self.pm.add_translation_dictionary(trans, system_name)
self.pm.check_timestamp(900)
clock_time = self.pm.get_clock_time()
time_filter = (clock_time > 3*3600) & (clock_time < 21*3600)
self.pm.add_time_filter(time_filter)
def unet_cross_val(data_dir, out_dir, mapping, splits, unet_conf):
# Load spreadsheet
with pd.ExcelFile(mapping) as xls:
df = pd.read_excel(xls, 'Sheet1').set_index('index')
df['class'] = df['class'].map({'preplus': 'pre-plus', 'normal': 'normal', 'plus': 'plus'})
img_dir = join(data_dir, 'images')
seg_dir = join(data_dir, 'manual_segmentations')
mask_dir = join(data_dir, 'masks')
# Check whether all images exist
check_images_exist(df, img_dir, seg_dir, mask_dir)
# Now split into training and testing
CVFile = sio.loadmat(splits)
# # Combining Pre-Plus and Plus
# trainPlusIndex = CVFile['trainPlusIndex'][0]
# testPlusIndex = CVFile['testPlusIndex'][0]
#
# plus_dir = make_sub_dir(out_dir, 'trainTestPlus')
# print "Generating splits for combined No and Pre-Plus"
# generate_splits(trainPlusIndex, testPlusIndex, df, img_dir, mask_dir, seg_dir, plus_dir)
# Combining No and Pre-Plus
trainPrePIndex = CVFile['trainPrePIndex'][0]
testPrePIndex = CVFile['testPrePIndex'][0]
prep_dir = make_sub_dir(out_dir, 'trainTestPreP')
print "Generating splits for combined Pre-Plus and Plus"
generate_splits(trainPrePIndex, testPrePIndex, df, img_dir, mask_dir, seg_dir, prep_dir)
# Train models
train_and_test(prep_dir, unet_conf, processes=1)
# train_and_test(plus_dir, unet_conf, processes=2)
def pca_augmentation(data_h5, excel_path):
f = h5py.File(data_h5, 'r')
df1 = pd.read_excel(excel_path, sheetname=0, header=1)
df1 = df1.rename(columns=lambda x: x.strip()).set_index('Image') # strip whitespace
df2 = pd.read_excel(excel_path, sheetname=1, header=1)
df2 = df2.rename(columns=lambda x: x.strip()).set_index('Image') # strip whitespace
df = pd.concat([df1, df2])
X = preprocess_data(f)
X_mean = np.mean(X, axis=0)
X = X - X_mean
# PCA
pca = PCA().fit(X)
def setDataFrameFromFile(self, filepath, **kwargs):
"""
Sets the model's dataFrame by reading a file.
Accepted file formats:
- .xlsx (sheet1 is read unless specified in kwargs)
- .csv (comma separated unless specified in kwargs)
- .txt (any separator)
:param filepath: (str)
The path to the file to be read.
:param kwargs:
pandas.read_csv(**kwargs) or pandas.read_excel(**kwargs)
:return: None
"""
df = superReadFile(filepath, **kwargs)
self.setDataFrame(df, filePath=filepath)
def get_dataset_rows(meta):
"""
if filename is a xlsx file, returns a list of list of dicts
else: returns a list of dicts
relies on get_dataset_local_filename() and meta['slug']
right now wishing I had made this all OOP...
"""
srcpath = meta['local_filepath']
if meta['filetype'] == 'workbook':
import pandas as pd
sheetindices = list(range(len(meta['gsheet']['sheets'])))
dfs = pd.read_excel(str(srcpath), sheetname=sheetindices)
return [x.to_dict('records') for x in dfs.values()]
else:
# assume CSV
return list(DictReader(srcpath.open('r')))
main.py 文件源码
项目:Travelling-salesman-problem-using-Ant-colony-optimization
作者: Ahsaan-566
项目源码
文件源码
阅读 26
收藏 0
点赞 0
评论 0
def initGraph(self, name):
#file reading using pandas
df = pd.read_excel(name, sheetname='Sheet1')
dim = df['Dimension']
x = df['X']
y = df['Y']
#print x,y
self.numCities = len(dim)
#set and fill the adjMatrix
self.adjMatrix = [[1.0 for i in range(self.numCities)] for j in range(self.numCities) ]
for i in range(self.numCities):
for j in range(self.numCities):
#fill the adjmatrix with city coordinates and calculate euclidean distances
self.adjMatrix[i][j] = self.calEdge(x[i], x[j], y[i], y[j])
#calculating edge weights using euclidean distances
def extract_lstm_test(dictionary, file_name, tag_num=CLASS_NUM, col_tag=0, col_content=1, length=MAX_LENGTH):
contents = pd.read_excel(file_name, header=None)
cw = lambda x: [word.encode('utf-8') for word in jieba.cut(x) if word not in stopwords and word.strip() != '' and word.encode('utf-8') in dictionary.index]
contents['words'] = contents[col_content].apply(cw)
get_sent = lambda x: list(dictionary['id'][x])
contents['sent'] = contents['words'].apply(get_sent) # ????,??????????
print("Pad sequences (samples x time)")
contents['sent'] = list(sequence.pad_sequences(contents['sent'], maxlen=length))
x = np.array(list(contents['sent'])) # ???
y = np.zeros((len(list(contents[col_tag])), tag_num))
for i in range(len(list(contents[col_tag]))):
for j in range(tag_num):
if contents[col_tag][i] == j:
y[i][j] = 1
return x, y
# dictionary model ????
def extract_dictionary_feature(file_name, col_tag=0, col_content=1):
# ????
adv = codecs.open('./data/vocabulary/adv.txt', 'rb', encoding='utf-8').read().split('\n')
inverse = codecs.open('./data/vocabulary/inverse.txt', 'rb', encoding='utf-8').read().split('\n')
negdict = codecs.open('./data/vocabulary/negdict.txt', 'rb', encoding='utf-8').read().split('\n')
posdict = codecs.open('./data/vocabulary/posdict.txt', 'rb', encoding='utf-8').read().split('\n')
contents = pd.read_excel(file_name, header=None)
print 'cut words...'
cw = lambda x: [pair for pair in psg.lcut(x) if pair.word not in stopwords]
contents['pairs'] = contents[col_content].apply(cw)
matrix = reviews2matrix(list(contents['pairs']), posdict, negdict, inverse, adv)
x = matrix2vec(matrix)
y = list(contents[col_tag])
return x, y
def read():
df = pd.read_excel("jjs1.xlsx")
data = list(df.ix[:, 7])
prices=[]
for item in data:
if not str(item)=="nan":
prices.append(int(item))
print(prices[0:100])
matplotlib.style.use('ggplot')#??ggplot??
ts = pd.Series(prices[0:100], index=pd.date_range('1/1/2000', periods=100))
plt.figure()
df.plot.hist(alpha=0.5)
plt.legend()
plt.show()
pass
def loadfile():
neg=pd.read_excel(datadir + '/neg.xls',header=None,index=None)
pos=pd.read_excel(datadir + '/pos.xls',header=None,index=None)
cw = lambda x: list(jieba.cut(x))
pos['words'] = pos[0].apply(cw)
neg['words'] = neg[0].apply(cw)
#print pos['words']
#use 1 for positive sentiment, 0 for negative
y = np.concatenate((np.ones(len(pos)), np.zeros(len(neg))))
x_train, x_test, y_train, y_test = train_test_split(np.concatenate((pos['words'], neg['words'])), y, test_size=0.2)
np.save(modeldir + '/y_train.npy',y_train)
np.save(modeldir + '/y_test.npy',y_test)
return x_train,x_test
#??????????????
def get_hs300s():
"""
????300??????????
Return
--------
DataFrame
code :????
name :????
date :??
weight:??
"""
from tushare.stock.fundamental import get_stock_basics
try:
wt = pd.read_excel(ct.HS300_CLASSIFY_URL_FTP%(ct.P_TYPE['ftp'], ct.DOMAINS['idxip'],
ct.PAGES['hs300w']), parse_cols=[0, 3, 6])
wt.columns = ct.FOR_CLASSIFY_W_COLS
wt['code'] = wt['code'].map(lambda x :str(x).zfill(6))
df = get_stock_basics()[['name']]
df = df.reset_index()
return pd.merge(df,wt)
except Exception as er:
print(str(er))
def get_sz50s():
"""
????50???
Return
--------
DataFrame
code :????
name :????
"""
try:
df = pd.read_excel(ct.HS300_CLASSIFY_URL_FTP%(ct.P_TYPE['ftp'], ct.DOMAINS['idxip'],
ct.PAGES['sz50b']), parse_cols=[0,1])
df.columns = ct.FOR_CLASSIFY_B_COLS
df['code'] = df['code'].map(lambda x :str(x).zfill(6))
return df
except Exception as er:
print(str(er))
def get_zz500s():
"""
????500???
Return
--------
DataFrame
code :????
name :????
"""
from tushare.stock.fundamental import get_stock_basics
try:
# df = pd.read_excel(ct.HS300_CLASSIFY_URL_FTP%(ct.P_TYPE['ftp'], ct.DOMAINS['idxip'],
# ct.PAGES['zz500b']), parse_cols=[0,1])
# df.columns = ct.FOR_CLASSIFY_B_COLS
# df['code'] = df['code'].map(lambda x :str(x).zfill(6))
wt = pd.read_excel(ct.HS300_CLASSIFY_URL_FTP%(ct.P_TYPE['ftp'], ct.DOMAINS['idxip'],
ct.PAGES['zz500wt']), parse_cols=[0, 3, 6])
wt.columns = ct.FOR_CLASSIFY_W_COLS
wt['code'] = wt['code'].map(lambda x :str(x).zfill(6))
df = get_stock_basics()[['name']]
df = df.reset_index()
return pd.merge(df,wt)
except Exception as er:
print(str(er))
def load(self, pth):
with open(pth, 'r') as fh:
data = read_excel(fh, sheetname='data')
print(data.as_matrix(columns=data.columns[1:]))
return data.as_matrix()
#todo: pandas formats - http://pandas.pydata.org/pandas-docs/stable/io.html
# hdf5
# sql
#todo: hdf5 - http://stackoverflow.com/a/9619713/723090
#todo: bloscpack http://stackoverflow.com/a/22225337/723090
#todo: pytables