def getExcelData(self):
"""
get data from 'hsi_futures.xlsx'
Date | Open | High | Low | Close | SMAVG5 | SMAVG10 | SMAVG15 | Volume | VolumeSMAVG5
:return: data table
"""
df = pd.DataFrame()
xl = pd.ExcelFile("../dataManager/hsi_futures.xlsx")
# print xl.sheet_names
sheets = xl.sheet_names
for sheet in sheets:
df = df.append(pd.read_excel("../dataManager/hsi_futures.xlsx", sheet))
df['Date'] = pd.to_datetime(df['Date'])
df.sort_values("Date", ascending=True, inplace=True)
data = df.set_index([range(df.shape[0])])
return data
python类read_excel()的实例源码
def get_hs300s():
"""
????300??????????
Return
--------
DataFrame
code :????
name :????
date :??
weight:??
"""
from tushare.stock.fundamental import get_stock_basics
try:
wt = pd.read_excel(ct.HS300_CLASSIFY_URL_FTP%(ct.P_TYPE['ftp'], ct.DOMAINS['idxip'],
ct.PAGES['hs300w']), parse_cols=[0, 3, 6])
wt.columns = ct.FOR_CLASSIFY_W_COLS
wt['code'] = wt['code'].map(lambda x :str(x).zfill(6))
df = get_stock_basics()[['name']]
df = df.reset_index()
return pd.merge(df,wt)
except Exception as er:
print(str(er))
def get_sz50s():
"""
????50???
Return
--------
DataFrame
code :????
name :????
"""
try:
df = pd.read_excel(ct.HS300_CLASSIFY_URL_FTP%(ct.P_TYPE['ftp'], ct.DOMAINS['idxip'],
ct.PAGES['sz50b']), parse_cols=[0,1])
df.columns = ct.FOR_CLASSIFY_B_COLS
df['code'] = df['code'].map(lambda x :str(x).zfill(6))
return df
except Exception as er:
print(str(er))
def get_zz500s():
"""
????500???
Return
--------
DataFrame
code :????
name :????
"""
from tushare.stock.fundamental import get_stock_basics
try:
# df = pd.read_excel(ct.HS300_CLASSIFY_URL_FTP%(ct.P_TYPE['ftp'], ct.DOMAINS['idxip'],
# ct.PAGES['zz500b']), parse_cols=[0,1])
# df.columns = ct.FOR_CLASSIFY_B_COLS
# df['code'] = df['code'].map(lambda x :str(x).zfill(6))
wt = pd.read_excel(ct.HS300_CLASSIFY_URL_FTP%(ct.P_TYPE['ftp'], ct.DOMAINS['idxip'],
ct.PAGES['zz500wt']), parse_cols=[0, 3, 6])
wt.columns = ct.FOR_CLASSIFY_W_COLS
wt['code'] = wt['code'].map(lambda x :str(x).zfill(6))
df = get_stock_basics()[['name']]
df = df.reset_index()
return pd.merge(df,wt)
except Exception as er:
print(str(er))
def search():
#??
df = pd.read_excel("huatai2.xls")
input_m = 0.0
output_m = 0.0
for index, row in df.iterrows():
if row[u'??'] == u'??':
each_input = row[u'?????']
print u"??",
print each_input
input_m = input_m + each_input
#print type(money)
if row[u'??'] == u'??':
each_output = row[u'?????']
print u"??",
print each_output
#print type(money)
output_m = output_m + each_output
print "Sumary is %f" % (input_m - output_m)
def replace_test():
#???
df = pd.read_excel("huatai2.xls")
s1 = pd.Series(['a', 'b', 'c', 'd', 'e'])
#print s1
s2 = pd.Series(['1', '2', '3', '4', '5'])
#print s2
s3 = s1.replace(1, 'k')
#print s1
#print s3
print df
df.replace(['20160722', u'????', 2431.0, u'????', 13.00, 300.0, 3891.10, 3905.71, u'??'],
['20160722', '0', '0', '0', 0, 0, 0, 0, '0'], inplace=True)
#df.replace(['20160722'],['20160725','0','0','0',0,0,0,0,'0'],inplace=True)
print df
def load_articles(self):
"""
Loads the DataFrame with all the articles.
Return: DataFrame with the title, content, tags and author of all articles
"""
#parser = SafeConfigParser()
#parser.read('Config.ini')
#file_path = settings['IP_FILE_PATH']
#file_name = settings['IP_FILE_NAME']
#logging.debug("Directory Name : {0} and File name is {1} \n".format(file_path,file_name))
#logging.debug("Directory Name : {0} and File name is {1} \n".format(parser.get('Article_input_dir', 'ip_file_path'),parser.get('Article_input_file', 'ip_file_name'))
#file_path = '/Users/shwetanknagar/Downloads/Personal/Project Eventstreet/Boconni Project'
#file_name = os.path.basename("TestSet300_User_Ratings.xlsx")
path = os.path.join(self.ip_file_path, self.ip_file_name)
#commented by shwenag
#self.df = pd.read_csv('TrainSet700_User_Ratings.xlsx', encoding='utf-8') # Load articles in a DataFrame
self.df = pd.read_excel(path, na_values=['NA'], parse_cols = "A,B,C")
#self.df = self.df[['Sno', 'title', 'content_text']] # Slice to remove redundant columns
#commenting the below by shwenag
print(self.df)
logging.debug("Number of articles: {0} and no of columns are {1} \n".format(len(self.df),self.df.shape))
def load_articles(self):
"""
Loads the DataFrame with all the articles.
Return: DataFrame with the title, content, tags and author of all articles
"""
#parser = SafeConfigParser()
#parser.read('Config.ini')
#file_path = settings['IP_FILE_PATH']
#file_name = settings['IP_FILE_NAME']
#logging.debug("Directory Name : {0} and File name is {1} \n".format(file_path,file_name))
#logging.debug("Directory Name : {0} and File name is {1} \n".format(parser.get('Article_input_dir', 'ip_file_path'),parser.get('Article_input_file', 'ip_file_name'))
file_path = '/Users/shwetanknagar/Downloads/Personal/Project Eventstreet/Boconni Project'
file_name = os.path.basename("TestSet300_User_Ratings.xlsx")
path = os.path.join(file_path, file_name)
#commented by shwenag
#self.df = pd.read_csv('TrainSet700_User_Ratings.xlsx', encoding='utf-8') # Load articles in a DataFrame
self.df = pd.read_excel(path, na_values=['NA'], parse_cols = "A,B,C")
#self.df = self.df[['Sno', 'title', 'content_text']] # Slice to remove redundant columns
#commenting the below by shwenag
print(self.df)
logging.debug("Number of articles: {0} and no of columns are {1} \n".format(len(self.df),self.df.shape))
def test_load_xlsx(self):
url = 'http://test.com/the.xlsx'
self.url_pval.set_value(url)
self.url_pval.save()
xlsx_bytes = open(mock_xslx_path, "rb").read()
xlsx_table = pd.read_excel(mock_xslx_path)
# success case
with requests_mock.Mocker() as m:
m.get(url, content=xlsx_bytes, headers={'content-type': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'})
self.press_fetch_button()
response = self.get_render()
self.assertEqual(response.content, make_render_json(xlsx_table))
# malformed file should put module in error state
with requests_mock.Mocker() as m:
m.get(url, content=b"there's just no way this is xlsx", headers={'content-type': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'})
self.press_fetch_button()
self.wfmodule.refresh_from_db()
self.assertEqual(self.wfmodule.status, WfModule.ERROR)
price_data_London.py 文件源码
项目:smart-battery-for-smart-energy-usage
作者: AnatolyPavlov
项目源码
文件源码
阅读 26
收藏 0
点赞 0
评论 0
def main():
df = pd.read_excel('../data/Tariffs.xlsx')
df.loc[df['Tariff'] == 'Low', 'Tariff'] = 0.0399
df.loc[df['Tariff'] == 'Normal', 'Tariff'] = 0.1176
df.loc[df['Tariff'] == 'High', 'Tariff'] = 0.6720
#
ets = ExtractTimeSeries(datetime_col='TariffDateTime', yt_col='Tariff')
df = ets.transform(df)
#
day = pd.to_datetime('2013-12-27').date()
next_day = day + timedelta(days=1)
df_out = df.query('index >= @day and index < @next_day')
df_out.columns=['Tariff (UK Pounds)']
#
print_process('Saving Post-Processed Data')
path_to_price = '../clean_data/price_data_London.csv'
df_out.to_csv(path_to_price)
print 'Tariff data saved into: {}'.format(path_to_price)
print
def _load_table(self, filepath):
"""
Load table from file system.
:param str filepath: Path to table in CSV, TSV, XLSX or
Pandas pickle format.
:return: Pandas table
:rtype: pandas.core.frame.DataFrame
"""
_, ext = os.path.splitext(filepath.lower())
if ext == '.tsv':
return pd.read_table(filepath, **self.kwargs)
if ext == '.csv':
return pd.read_csv(filepath, **self.kwargs)
if ext == '.xlsx':
return pd.read_excel(filepath, **self.kwargs)
return pd.read_pickle(filepath, **self.kwargs)
normalizer.py 文件源码
项目:the-magical-csv-merge-machine
作者: entrepreneur-interet-general
项目源码
文件源码
阅读 27
收藏 0
点赞 0
评论 0
def read_excel(self, file):
# TODO: add iterator and return columns
excel_tab = pd.read_excel(file, dtype=str)
columns = excel_tab.columns
def make_gen(excel_tab, chunksize):
cursor = 0
chunk = excel_tab.iloc[:chunksize]
while chunk.shape[0]:
yield chunk
cursor += chunksize
chunk = excel_tab.iloc[cursor:cursor+chunksize]
tab = make_gen(excel_tab, self.CHUNKSIZE)
tab = (self._clean_header(tab_part) for tab_part in tab)
return tab, None, None, self._clean_column_names(columns)
def GetAllTodayData(self):
#???? ???? ?,??????????
filename=self.today+'_all_.xls'
#??data????
filename=os.path.join(self.path,filename)
if not os.path.exists(filename):
self.df_today_all=ts.get_today_all()
#?????
self.df_today_all.drop(self.df_today_all[self.df_today_all['turnoverratio']==0].index,inplace=True)
#??????????
#n1=self.df_today_all[self.df_today_all['turnoverratio']==0]
#n2=self.df_today_all.drop(n1.index)
#print n2
print self.df_today_all
self.df_today_all.to_excel(filename,sheet_name='All')
else:
self.df_today_all=pd.read_excel(filename,sheet_name='All')
print "File existed"
def count_up_down(filename):
total=[]
df=pd.read_excel(filename)
count= len(df[(df['changepercent']>=-10.2) & (df['changepercent']<-9)])
total.append(count)
for i in range(-9,9,1):
count= len(df[(df['changepercent']>=i*1.00) & (df['changepercent']<((i+1))*1.00)])
total.append(count)
count= len(df[(df['changepercent']>=9)])
total.append(count)
print total
df_figure=pd.Series(total,index=[range(-10,10)])
print df_figure
fg=df_figure.plot(kind='bar',table=True)
plt.show(fg)
def read_res(file): #read data from xls files
Size, R, xr,xl,xc,yu,yd,yc = [] ,[], [],[] ,[], [],[],[] #lists for areas, Fij's, deps cordinates
out = pd.read_excel(file, sheetname = "Out") #read model results
Rout = pd.read_excel(file, sheetname = "R") #read Fij's
Sizeout = pd.read_excel(file, sheetname = "Size") #read deps wanted sizes
Wout = pd.read_excel(file, sheetname = "W") # w1 and w2
w1 = float(Wout['w1'][0])
w2 = 1.0-w1
totx = float(out['totx'][0]) #total length in x axis
toty = float(out['toty'][0]) #total length in y axis
for d in range(len(Sizeout)): #insert data results into python lists
R.append([])
Size.append(float(Sizeout['Area'][d]))
xr.append(float(out['Xr'][d]))
xl.append(float(out['Xl'][d]))
xc.append((float(out['Xl'][d])+float(out['Xr'][d]))/2)
yu.append(float(out['Yu'][d]))
yd.append(float(out['Yd'][d]))
yc.append((float(out['Yu'][d])+float(out['Yd'][d]))/2)
for i in range(len(Rout)):
R[d].append(float(Rout.iloc[d,i]))
return Size, R, totx, toty, xr,xl,xc,yu,yd,yc, w1, w2
def groups(ofname):
df = pandas.read_excel('GC-VTPR.xlsx', sheetname='Groups')
entries = []
for i,row in df.iterrows():
entry = {
"Q_k": row['Qk'],
"R_k": row['Rk'],
"maingroup_name": row["main group name"],
"mgi": row['main group index'],
"sgi": row['sub group index'],
"subgroup_name": row["sub group name"]
}
entries.append(entry)
with open(ofname, 'w') as fp:
json.dump(entries, fp, indent = 2, sort_keys = True)
def interaction_parameters(ofname):
df = pandas.read_excel('GC-VTPR.xlsx', sheetname='InteractionParameters')
df = df.fillna(0.0)
entries = []
for i,row in df.iterrows():
entry = {
"a_ij": row['aij / K'],
"a_ji": row['aji / K'],
"b_ij": row['bij'],
"b_ji": row['bji'],
"c_ij": row['cij / K-1'],
"c_ji": row['cji / K-1'],
"mgi1": row['i'],
"mgi2": row['j']
}
entries.append(entry)
with open(ofname, 'w') as fp:
json.dump(entries, fp, indent = 2, sort_keys = True)
def _load_powerplant():
"""
attribute information:
features consist of hourly average ambient variables
- temperature (t) in the range 1.81 c and 37.11 c,
- ambient pressure (ap) in the range 992.89-1033.30 millibar,
- relative humidity (rh) in the range 25.56% to 100.16%
- exhaust vacuum (v) in teh range 25.36-81.56 cm hg
- net hourly electrical energy output (ep) 420.26-495.76 mw
the averages are taken from various sensors located around the
plant that record the ambient variables every second.
the variables are given without normalization.
"""
data_file = os.path.join(data_dir, 'power-plant/Folds5x2_pp.xlsx')
data = pd.read_excel(data_file)
x = data.values[:, :-1]
y = data.values[:, -1]
return x, y
def _fetch_data(self, dataset, query=None):
files = [(y, m) for y in query['years'] for m in query['months']]
frames = []
# Download and clean every monthly Excel file
for file in files:
year, month = file
url = self.BASE_URL.format(year=year, month=MONTHS[month])
frame = self._clean_data(pd.read_excel(url), year, month)
frames.append(frame)
# Yield individual rows of type Result from the dataframe
raw_data = pd.concat(frames)
for i, row in raw_data.iterrows():
val = row.pop('value')
yield Result(val, json.loads(row.to_json()))
test_excel.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 26
收藏 0
点赞 0
评论 0
def get_exceldf(self, basename, *args, **kwds):
"""
Return test data DataFrame. Test data path is defined by
pandas.util.testing.get_data_path()
Parameters
----------
basename : str
File base name, excluding file extension.
Returns
-------
df : DataFrame
"""
pth = os.path.join(self.dirpath, basename + self.ext)
return read_excel(pth, *args, **kwds)
test_excel.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 34
收藏 0
点赞 0
评论 0
def test_read_one_empty_col_no_header(self):
df = pd.DataFrame(
[["", 1, 100],
["", 2, 200],
["", 3, 300],
["", 4, 400]]
)
with ensure_clean(self.ext) as path:
df.to_excel(path, 'no_header', index=False, header=False)
actual_header_none = read_excel(
path,
'no_header',
parse_cols=[0],
header=None
)
actual_header_zero = read_excel(
path,
'no_header',
parse_cols=[0],
header=0
)
expected = DataFrame()
tm.assert_frame_equal(actual_header_none, expected)
tm.assert_frame_equal(actual_header_zero, expected)
test_excel.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 25
收藏 0
点赞 0
评论 0
def test_read_from_file_url(self):
# FILE
if sys.version_info[:2] < (2, 6):
raise nose.SkipTest("file:// not supported with Python < 2.6")
localtable = os.path.join(self.dirpath, 'test1' + self.ext)
local_table = read_excel(localtable)
try:
url_table = read_excel('file://localhost/' + localtable)
except URLError:
# fails on some systems
import platform
raise nose.SkipTest("failing on %s" %
' '.join(platform.uname()).strip())
tm.assert_frame_equal(url_table, local_table)
test_excel.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 34
收藏 0
点赞 0
评论 0
def test_read_excel_skiprows_list(self):
# GH 4903
actual = pd.read_excel(os.path.join(self.dirpath,
'testskiprows' + self.ext),
'skiprows_list', skiprows=[0, 2])
expected = DataFrame([[1, 2.5, pd.Timestamp('2015-01-01'), True],
[2, 3.5, pd.Timestamp('2015-01-02'), False],
[3, 4.5, pd.Timestamp('2015-01-03'), False],
[4, 5.5, pd.Timestamp('2015-01-04'), True]],
columns=['a', 'b', 'c', 'd'])
tm.assert_frame_equal(actual, expected)
actual = pd.read_excel(os.path.join(self.dirpath,
'testskiprows' + self.ext),
'skiprows_list', skiprows=np.array([0, 2]))
tm.assert_frame_equal(actual, expected)
test_excel.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 31
收藏 0
点赞 0
评论 0
def test_read_excel_squeeze(self):
# GH 12157
f = os.path.join(self.dirpath, 'test_squeeze' + self.ext)
actual = pd.read_excel(f, 'two_columns', index_col=0, squeeze=True)
expected = pd.Series([2, 3, 4], [4, 5, 6], name='b')
expected.index.name = 'a'
tm.assert_series_equal(actual, expected)
actual = pd.read_excel(f, 'two_columns', squeeze=True)
expected = pd.DataFrame({'a': [4, 5, 6],
'b': [2, 3, 4]})
tm.assert_frame_equal(actual, expected)
actual = pd.read_excel(f, 'one_column', squeeze=True)
expected = pd.Series([1, 2, 3], name='a')
tm.assert_series_equal(actual, expected)
test_excel.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 31
收藏 0
点赞 0
评论 0
def test_int_types(self):
_skip_if_no_xlrd()
for np_type in (np.int8, np.int16, np.int32, np.int64):
with ensure_clean(self.ext) as path:
# Test np.int values read come back as int (rather than float
# which is Excel's format).
frame = DataFrame(np.random.randint(-10, 10, size=(10, 2)),
dtype=np_type)
frame.to_excel(path, 'test1')
reader = ExcelFile(path)
recons = read_excel(reader, 'test1')
int_frame = frame.astype(np.int64)
tm.assert_frame_equal(int_frame, recons)
recons2 = read_excel(path, 'test1')
tm.assert_frame_equal(int_frame, recons2)
# test with convert_float=False comes back as float
float_frame = frame.astype(float)
recons = read_excel(path, 'test1', convert_float=False)
tm.assert_frame_equal(recons, float_frame,
check_index_type=False,
check_column_type=False)
test_excel.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 33
收藏 0
点赞 0
评论 0
def test_sheets(self):
_skip_if_no_xlrd()
with ensure_clean(self.ext) as path:
self.frame['A'][:5] = nan
self.frame.to_excel(path, 'test1')
self.frame.to_excel(path, 'test1', columns=['A', 'B'])
self.frame.to_excel(path, 'test1', header=False)
self.frame.to_excel(path, 'test1', index=False)
# Test writing to separate sheets
writer = ExcelWriter(path)
self.frame.to_excel(writer, 'test1')
self.tsframe.to_excel(writer, 'test2')
writer.save()
reader = ExcelFile(path)
recons = read_excel(reader, 'test1', index_col=0)
tm.assert_frame_equal(self.frame, recons)
recons = read_excel(reader, 'test2', index_col=0)
tm.assert_frame_equal(self.tsframe, recons)
np.testing.assert_equal(2, len(reader.sheet_names))
np.testing.assert_equal('test1', reader.sheet_names[0])
np.testing.assert_equal('test2', reader.sheet_names[1])
test_excel.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 30
收藏 0
点赞 0
评论 0
def test_colaliases(self):
_skip_if_no_xlrd()
with ensure_clean(self.ext) as path:
self.frame['A'][:5] = nan
self.frame.to_excel(path, 'test1')
self.frame.to_excel(path, 'test1', columns=['A', 'B'])
self.frame.to_excel(path, 'test1', header=False)
self.frame.to_excel(path, 'test1', index=False)
# column aliases
col_aliases = Index(['AA', 'X', 'Y', 'Z'])
self.frame2.to_excel(path, 'test1', header=col_aliases)
reader = ExcelFile(path)
rs = read_excel(reader, 'test1', index_col=0)
xp = self.frame2.copy()
xp.columns = col_aliases
tm.assert_frame_equal(xp, rs)
test_excel.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 35
收藏 0
点赞 0
评论 0
def test_to_excel_multiindex(self):
_skip_if_no_xlrd()
frame = self.frame
arrays = np.arange(len(frame.index) * 2).reshape(2, -1)
new_index = MultiIndex.from_arrays(arrays,
names=['first', 'second'])
frame.index = new_index
with ensure_clean(self.ext) as path:
frame.to_excel(path, 'test1', header=False)
frame.to_excel(path, 'test1', columns=['A', 'B'])
# round trip
frame.to_excel(path, 'test1', merge_cells=self.merge_cells)
reader = ExcelFile(path)
df = read_excel(reader, 'test1', index_col=[0, 1],
parse_dates=False)
tm.assert_frame_equal(frame, df)
# Test for Issue 11328. If column indices are integers, make
# sure they are handled correctly for either setting of
# merge_cells
test_excel.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 28
收藏 0
点赞 0
评论 0
def test_to_excel_multiindex_dates(self):
_skip_if_no_xlrd()
# try multiindex with dates
tsframe = self.tsframe.copy()
new_index = [tsframe.index, np.arange(len(tsframe.index))]
tsframe.index = MultiIndex.from_arrays(new_index)
with ensure_clean(self.ext) as path:
tsframe.index.names = ['time', 'foo']
tsframe.to_excel(path, 'test1', merge_cells=self.merge_cells)
reader = ExcelFile(path)
recons = read_excel(reader, 'test1',
index_col=[0, 1])
tm.assert_frame_equal(tsframe, recons)
self.assertEqual(recons.index.names, ('time', 'foo'))
test_excel.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 22
收藏 0
点赞 0
评论 0
def test_to_excel_multiindex_no_write_index(self):
_skip_if_no_xlrd()
# Test writing and re-reading a MI witout the index. GH 5616.
# Initial non-MI frame.
frame1 = DataFrame({'a': [10, 20], 'b': [30, 40], 'c': [50, 60]})
# Add a MI.
frame2 = frame1.copy()
multi_index = MultiIndex.from_tuples([(70, 80), (90, 100)])
frame2.index = multi_index
with ensure_clean(self.ext) as path:
# Write out to Excel without the index.
frame2.to_excel(path, 'test1', index=False)
# Read it back in.
reader = ExcelFile(path)
frame3 = read_excel(reader, 'test1')
# Test that it is the same as the initial frame.
tm.assert_frame_equal(frame1, frame3)