def load_file(file_name, sheet_name_list):
'''
load xlsx file into a dictionary indexed by sheet names
:param string file_name:name of file
:param [string] sheet_name_list: name of selected sheets in the xlsx file
:return: {string:DataFrame} raw_data: {name of sheet:pure data retrieved from xlsx
with column and index 0,1,2,...}
'''
print 'loading file...'
cut_head = 2
file = pd.ExcelFile(file_name)
raw_data = {}
# iterate over every sheet and retrieve useful data into raw_data
for i in range(len(sheet_name_list)):
print 'parsing sheet', sheet_name_list[i]
# parse a sheet from the whole file into a DataFrame with headers cut off
temp = file.parse(sheet_name_list[i]).iloc[cut_head:, :]
# now temp.dtype = object,because the data read in contains string.Here convert it to float
temp = temp.astype(np.float)
# reset index and column with 0,1,2,...,
temp.columns = range(temp.shape[1])
temp.index = range(temp.shape[0])
temp.fillna(0, inplace=True)
raw_data[sheet_name_list[i]] = temp
return raw_data
评论列表
文章目录