def split_to_test_and_train(data, labels, entities, test_size=DEFAULT_TEST_SIZE):
d_train, d_test, l_train, l_test, c_train, c_test = train_test_split(data, labels, entities, test_size=test_size)
d_test_2 = []
l_test_2 = []
c_test_2 = []
train_dict = {}
for d in d_train:
train_dict[d] = 1
for d,l,c in zip(d_test, l_test, c_test):
if (train_dict.has_key(d)):
continue
d_test_2.append(d)
l_test_2.append(l)
c_test_2.append(c)
return (d_train, d_test_2, l_train, l_test_2, c_train, c_test_2)
# utility to extracts entities from preproceseed files
data_preparation_tools.py 文件源码
python
阅读 25
收藏 0
点赞 0
评论 0
评论列表
文章目录