def excel2lines(file_name,sheet_indexes,header = False,annotation=False,date=False):
lines = []
num_annotators = False
workbook = xlrd.open_workbook(file_name)
#collect sheets
sheets = []
print(sheet_indexes)
for index in sheet_indexes:
print(index)
sheets.append(workbook.sheet_by_index(int(index)))
else:
sheets = workbook.sheets()
#for each sheet
for sheet in sheets:
sheetlines = []
if header:
first_row = 1
else:
first_row = 0
print("gen_functions",sheet.nrows)
last_row = sheet.nrows
#iterate the lines
for rownum in range(first_row,last_row):
values = []
#collect annotation values
if annotation:
for value in sheet.row_values(rownum):
if not type(value) == float:
value = value.strip()
try:
if float(value) in range(2):
values.append(float(value))
except ValueError:
continue
if num_annotators:
if len(values) != num_annotators:
print("number of annotation values on line",
rownum,"is not consistent; check the inputfile. Exiting...")
exit()
else:
num_annotators = len(values)
print(num_annotators, "annotators")
else:
rowvals = sheet.row_values(rownum)
if date:
try:
rowvals[date] = datetime.date(*xlrd.xldate_as_tuple(\
sheet.cell_value(rownum,date), workbook.datemode)[:3])
except:
continue
values = [unicode(x) for x in rowvals]
sheetlines.append(values)
#each sheet is a list of lists
lines.append(sheetlines)
return lines
评论列表
文章目录