def add_list_of_words_in_w2v_model(self, unknown_words):
huge_w2v_model_file = open(self.w2v_huge_model_path, "r")
current_w2v_model_file = open(self.w2v_model_path, "a")
line = huge_w2v_model_file.readline()
unknown_words_left = len(unknown_words)
while line and unknown_words_left:
word = line.split()[0]
if word in unknown_words:
current_w2v_model_file.write(line)
unknown_words = unknown_words - set([word])
unknown_words_left -= 1
line = huge_w2v_model_file.readline()
for word in list(unknown_words):
random_position = random(self.w2v_model.vector_size)*2-1
current_w2v_model_file.write(" ".join(([word]+[str(x) for x in random_position])))
print "warning random positions introduced for new words ... in the future this should be solved"
current_w2v_model_file.close()
huge_w2v_model_file.close()
评论列表
文章目录