def get_pattern(DB,tr,te,patterns):
cols = ['p%d'%c for c,p in enumerate(patterns)]
if tr is None:
test = DB.data['test_variants_filter'] if te=='stage1' else DB.data['stage2_test_variants']
if te=='stage1':
train = DB.data['training_variants']
else:
train = pd.concat([DB.data['training_variants'],DB.data["test_variants_filter"]],axis=0)
Data =[train,test]
else:
Data = [DB.data['training_variants']]
for data in Data:
for c,p in enumerate(patterns):
data['p%d'%c] = data['Variation'].apply(lambda x: len(re.findall(p,str(x).lower())))
if tr is None:
return train[cols].values,test[cols].values
else:
X = data[cols].values
return X[tr],X[te]
评论列表
文章目录