def cut_Dataset(data_set, parrel=False, nomial=False):
"""
:param data_set:bunch of Dataset
:param parrel: if it is True,cut dataset in parrel.Windows is not available
:param nomial: if nomial is True,only noun-like words will remain
:return:data_set after cutted
"""
from tqdm import tqdm
data_cut = []
start = time.time()
print('cuting dataset......')
if parrel:
p = ThreadPool(9)
p.map(cut_Text, data_set.data)
p.close()
p.join()
else:
n=0
for doc_content in tqdm(data_set.data):
data_cut.append(cut_Text(doc_content, nomial))
end = time.time()
print('cuting runs %0.2f seconds.' % (end - start))
data_set.data = data_cut
评论列表
文章目录