def train_batch_score_sg(model, scored_word_sentences,
score_vector_size,
alpha=None, work=None,
sub_batch_size=256,
batch_size=256):
batch_count=0
sub_batch_count=0
train_x0 =np.zeros((batch_size,sub_batch_size),dtype='int32')
train_x1 =np.zeros((batch_size,sub_batch_size),dtype='int32')
train_y0 =np.zeros((batch_size,sub_batch_size),dtype='int8')
train_y1 =np.zeros((batch_size,sub_batch_size,score_vector_size),dtype='float32')
# train_x0=[[0]]*batch_size
# train_x1=[[0]]*batch_size
# train_y0=[[0]]*batch_size
# train_y1=[[0]]*batch_size
while 1:
for scored_word_sentence in scored_word_sentences:
#sentence=[scored_word2word(scored_word) for scored_word in scored_word_sentence]
word_vocabs = [[model.vocab[w],s] for [w,s] in scored_word_sentence if w in model.vocab and
model.vocab[w].sample_int > model.random.rand() * 2**32]
for pos, scored_word in enumerate(word_vocabs):
reduced_window = model.random.randint(model.window) # `b` in the original word2vec code
word=scored_word2word(scored_word)
# now go over all words from the (reduced) window, predicting each one in turn
start = max(0, pos - model.window + reduced_window)
for pos2, scored_word2 in enumerate(word_vocabs[start:(pos + model.window + 1 - reduced_window)], start):
word2=scored_word2word(scored_word2)
# don't train on the `word` itself
if pos2 != pos:
xy_gen=train_sg_pair(model, model.index2word[word.index], word2.index) #, alpha)
for xy in xy_gen :
if xy !=None:
(x0,x1,y0)=xy
y1=scored_word2score(scored_word)
train_x0[batch_count][sub_batch_count]=x0
train_x1[batch_count][sub_batch_count]=x1
train_y0[batch_count][sub_batch_count]=y0
train_y1[batch_count][sub_batch_count]=y1
sub_batch_count += 1
if sub_batch_count >= sub_batch_size :
batch_count += 1
sub_batch_count=0
if batch_count >= batch_size :
yield { 'index':train_x0, 'point':train_x1, 'code':train_y0,'score':train_y1}
batch_count=0
# train_x0[batch_count]=[x0]
# train_x1[batch_count]=x1
# train_y0[batch_count]=y0
# train_y1[batch_count]=y1
# #print train_x0,train_y1,
# batch_count += 1
# if batch_count >= batch_size :
# #print { 'index':np.array(train_x0), 'point':np.array(train_x1), 'code':np.array(train_y0),'score':np.array(train_y1)}
# #yield { 'index':np.array(train_x0), 'point':np.array(train_x1), 'code':np.array(train_y0),'score':np.array(train_y1,dtype=float32)}
# yield { 'index':np.array(train_x0), 'point':np.array(train_x1), 'code':np.array(train_y0),'score':np.array(train_y1)}
# batch_count=0
scoreword2veckeras.py 文件源码
python
阅读 23
收藏 0
点赞 0
评论 0
评论列表
文章目录