def read_annotated_files(dirname):
messages = []
labels = np.zeros(0)
filenames = glob.glob(os.path.join(dirname, '*.xls*'))
for filename in filenames:
print('Reading %s' % filename, end='. ', flush=True)
df = pd.read_excel(filename)
print("Found %d new samples" % df[df.LABEL.notnull()].shape[0])
labels = np.hstack((labels, np.array(df[df.LABEL.notnull()].LABEL.tolist(),
dtype=int)))
messages += df[df.LABEL.notnull()].text.tolist()
return messages, labels
评论列表
文章目录