def get_text_len(DB, tr, te):
if tr is None:
if te=='stage1':
Data = [DB.data['training_text'],DB.data['test_text_filter']]
else:
Data = [pd.concat([DB.data['training_text'],DB.data['test_text_filter']],axis=0),DB.data['stage2_test_text']]
else:
Data = [DB.data['training_text']]
for data in Data:
data['tl'] = data['Text'].apply(lambda x:len(x))
data['tl2'] = data['Text'].apply(lambda x:len(x.split()))
if tr is None:
X,Xt = Data
return X[['tl','tl2']].values, Xt[['tl','tl2']].values
else:
X = Data[0][['tl','tl2']].values
return X[tr],X[te]
评论列表
文章目录