def __init__(self, source):
print("Loading in word2vec model")
self.vmodel = gs.models.Word2Vec.load('vectors.bin')
print("Loading in text")
text = load2(source)
parsed_words = text.split(" ")
code_num = 1
print("Creating word -> vector dictionary...")
for word in parsed_words:
if not word in word_coding:
word_coding[word] = code_num
coded_word[code_num] = word
code_num += 1
vec_values[word] = self.vmodel[word]
coded_vector.append(word_coding[word])
print('Number of distinct words: ', len(word_coding))
sd_size = int(len(coded_vector) / sd_len)
x_d = y_d = v_d = i_d = []
for idx in range(0, sd_size - 1):
for iidx in range(0, sd_len - 1):
indexD = coded_vector[idx * sd_len + iidx + 0:(idx + 1) * sd_len + iidx]
i_D.append(indexD)
vectorValD = [vec_values[myWord] for myWord in
parsed_words[idx * sd_len + iidx + 0:(idx + 1) * sd_len + iidx]]
x_D.append(vectorValD)
y_D.append(one_hot(coded_vector[(idx + 1) * sd_len + iidx], word_coding))
v_D.append(vec_values[parsed_words[(idx + 1) * sd_len + iidx]])
self.x_d = np.asarray(x_D)
self.y_d = np.asarray(y_D)
self.v_d = np.asarray(v_D)
self.i_d = np.asarray(i_D)
print('shapes: ' + str(self.x_d.shape))
lstm_trainer_generator.py 文件源码
python
阅读 26
收藏 0
点赞 0
评论 0
评论列表
文章目录