def check_sent(s):
count = 0
for r in s:
#words = word_tokenize(r)
# for w in words:
for w in r:
if type(w) != str:
print(w)
count += 1
continue
if w in inv_words or w in oov_words_in_train:
continue
if w not in word2vec:
count += 1
oov_words_in_train.add(w)
else:
inv_words[w] = word2vec.vocab[w].index
return count
squad_dataset_exploration.py 文件源码
python
阅读 22
收藏 0
点赞 0
评论 0
评论列表
文章目录