def render(wf_module, table):
file = wf_module.retrieve_fetched_file()
if file is None:
wf_module.set_ready(notify=True)
return
if file.name.endswith('.xls') or file.name.endswith('.xlsx') or file.name.endswith('.XLS') or file.name.endswith('.XLSX'):
try:
table_aux = pd.read_excel(file)
except XLRDError as e:
wf_module.set_error(str(e))
return None
sanitize_dataframe(table_aux)
wf_module.set_ready(notify=True)
return table_aux
elif file.name.endswith('.csv') or file.name.endswith('.CSV'):
try:
table_aux = pd.read_csv(file)
except ParserError as e:
wf_module.set_error(str(e))
return None
sanitize_dataframe(table_aux)
wf_module.set_ready(notify=True)
return table_aux
else:
wf_module.set_error('Unknown file type.', notify=True)
return None
python类XLRDError()的实例源码
test_excel.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 24
收藏 0
点赞 0
评论 0
def test_excel_table_sheet_by_index(self):
excel = self.get_excelfile('test1')
dfref = self.get_csv_refdf('test1')
df1 = read_excel(excel, 0, index_col=0)
df2 = read_excel(excel, 1, skiprows=[1], index_col=0)
tm.assert_frame_equal(df1, dfref, check_names=False)
tm.assert_frame_equal(df2, dfref, check_names=False)
df1 = excel.parse(0, index_col=0)
df2 = excel.parse(1, skiprows=[1], index_col=0)
tm.assert_frame_equal(df1, dfref, check_names=False)
tm.assert_frame_equal(df2, dfref, check_names=False)
df3 = read_excel(excel, 0, index_col=0, skipfooter=1)
df4 = read_excel(excel, 0, index_col=0, skip_footer=1)
tm.assert_frame_equal(df3, df1.ix[:-1])
tm.assert_frame_equal(df3, df4)
df3 = excel.parse(0, index_col=0, skipfooter=1)
df4 = excel.parse(0, index_col=0, skip_footer=1)
tm.assert_frame_equal(df3, df1.ix[:-1])
tm.assert_frame_equal(df3, df4)
import xlrd
with tm.assertRaises(xlrd.XLRDError):
read_excel(excel, 'asdf')
test_excel.py 文件源码
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda
作者: SignalMedia
项目源码
文件源码
阅读 26
收藏 0
点赞 0
评论 0
def test_excel_sheet_by_name_raise(self):
_skip_if_no_xlrd()
import xlrd
with ensure_clean(self.ext) as pth:
gt = DataFrame(np.random.randn(10, 2))
gt.to_excel(pth)
xl = ExcelFile(pth)
df = read_excel(xl, 0)
tm.assert_frame_equal(gt, df)
with tm.assertRaises(xlrd.XLRDError):
read_excel(xl, '0')
def get_courses(year, semester, category, area, subarea):
logger.debug('Crawling {year}-{semester} {category}-{area}-{subarea}'.format(
year=year,
semester=semester_name[semester],
category=category_name[category],
area="('{code}', '{name}')".format(code=area['code'], name=area['name']),
subarea="('{code}', '{name}')".format(code=subarea['code'], name=subarea['name'])
))
form = search_form(year, semester, category, area['code'], subarea['code'])
excel = requests.post(excel_url, form)
excel.raise_for_status()
if excel.content == b'':
return []
try:
workbook = xlrd.open_workbook(file_contents=excel.content)
except xlrd.XLRDError:
logger.error('Cannot open file: maybe this is not accessable semester.')
return []
sheet = workbook.sheet_by_index(0)
res = []
for row in range(3, sheet.nrows):
course = {
'year': year,
'semester': semester_name[semester],
'code': sheet.cell_value(row, 5),
'number': sheet.cell_value(row, 6),
'title': sheet.cell_value(row, 7),
'credit': sheet.cell_value(row, 9),
'category': sheet.cell_value(row, 0),
'language': sheet.cell_value(row, 19),
'area': area['name'],
'subarea': subarea['name'],
'collage': sheet.cell_value(row, 1),
'dept': sheet.cell_value(row, 2)
}
res.append(course)
return res
def __iter__(self):
"""Iterate over all of the lines in the file"""
self.start()
wb = open_workbook(filename=self.url.path)
ts = self.url.target_segment
# Without this check, failure to provide a target_segment will cause the return
# of the first worksheet.
#if not ts:
# raise RowGeneratorError("URL does not include target file in fragment: {}".format(self.url))
try:
try:
s = wb.sheets()[int(ts) if self.url.target_segment else 0]
except ValueError: # Segment is the workbook name, not the number
s = wb.sheet_by_name(ts)
except XLRDError as e:
raise RowGeneratorError("Failed to open Excel workbook: '{}' ".format(e))
for i in range(0, s.nrows):
yield self.srow_to_list(i, s)
self.finish()
def reader(stream, sheetname=None):
""" Read named sheet or first and only sheet from xlsx file
"""
book = xlrd.open_workbook(file_contents=stream.read())
if sheetname is None:
sheet, = book.sheets()
else:
try:
sheet = book.sheet_by_name(sheetname)
except xlrd.XLRDError:
return
datemode = sheet.book.datemode
for index in range(sheet.nrows):
yield [cell_value(cell, datemode) for cell in sheet.row(index)]