def load_data(word_voc, char_voc, path):
n_lines = count_lines(path)
bar = progressbar.ProgressBar()
data = []
print('loading...: %s' % path)
with open(path) as f:
for line in bar(f, max_value=n_lines):
words = line.strip().split()
'''
array = np.array([word_voc.get(w, UNK) for w in words], dtype=np.int32)
unk_words = np.array(words)[array==UNK]
unk_array = np.array([
np.array([char_voc.get(c, UNK) for c in list(w)], dtype=np.int32)
for w in unk_words])
array = np.array([array, unk_array])
if len(unk_array)!=0:
print(array)
'''
data.append(np.array(words))
return data
评论列表
文章目录