gen_functions.py 文件源码

python
阅读 20 收藏 0 点赞 0 评论 0

项目:quoll 作者: LanguageMachines 项目源码 文件源码
def excel2lines(file_name,sheet_indexes,header = False,annotation=False,date=False):
    lines = []
    num_annotators = False
    workbook = xlrd.open_workbook(file_name)
    #collect sheets
    sheets = []
    print(sheet_indexes)
    for index in sheet_indexes:
        print(index)
        sheets.append(workbook.sheet_by_index(int(index)))
    else:
        sheets = workbook.sheets()
    #for each sheet
    for sheet in sheets:
        sheetlines = []
        if header:
            first_row = 1
        else:
            first_row = 0
        print("gen_functions",sheet.nrows)
        last_row = sheet.nrows
        #iterate the lines
        for rownum in range(first_row,last_row):
            values = []
            #collect annotation values
            if annotation:
                for value in sheet.row_values(rownum):
                    if not type(value) == float:
                        value = value.strip()
                    try:
                        if float(value) in range(2):
                            values.append(float(value))
                    except ValueError:
                        continue
                if num_annotators:
                    if len(values) != num_annotators:
                        print("number of annotation values on line",
                            rownum,"is not consistent; check the inputfile. Exiting...")
                        exit()
                else:
                    num_annotators = len(values)
                    print(num_annotators, "annotators")
            else:
                rowvals = sheet.row_values(rownum)
                if date:
                    try:
                        rowvals[date] = datetime.date(*xlrd.xldate_as_tuple(\
                            sheet.cell_value(rownum,date), workbook.datemode)[:3])
                    except:
                        continue
                values = [unicode(x) for x in rowvals]
            sheetlines.append(values)
        #each sheet is a list of lists
        lines.append(sheetlines)
    return lines
评论列表
文章目录


问题


面经


文章

微信
公众号

扫码关注公众号