def create_model(self, model_input, vocab_size, num_frames, l2_penalty=1e-8, **unused_params):
num_extend = FLAGS.moe_num_extend
num_layers = 10
pool_size=2
cnn_input = model_input
num_filters=[256,256,512]
filter_sizes=[1,2,3]
features_size = sum(num_filters)
for layer in range(num_layers):
cnn_output, num_t = self.cnn(cnn_input, num_filters=num_filters, filter_sizes=filter_sizes, sub_scope="cnn%d"%(layer+1))
if layer < 3:
num_t = pool_size*(num_t//pool_size)
cnn_output = tf.reshape(cnn_output[:,:num_t,:],[-1,num_t//pool_size,pool_size,features_size])
cnn_input = tf.reduce_max(cnn_output, axis=2)
else:
cnn_input = cnn_output
cnn_output, num_t = self.kmax(cnn_input, num_filters=features_size, filter_sizes=num_extend, sub_scope="kmax")
cnn_input = tf.reshape(cnn_output,[-1,features_size])
final_probilities = self.sub_moe(cnn_input,vocab_size)
final_probilities = tf.reshape(final_probilities,[-1,num_extend,vocab_size])
weight2d = tf.get_variable("ensemble_weight2d",
shape=[num_extend, features_size, vocab_size],
regularizer=slim.l2_regularizer(1.0e-8))
weight = tf.nn.softmax(tf.einsum("aij,ijk->aik", cnn_output, weight2d), dim=1)
result = {}
result["predictions"] = tf.reduce_sum(final_probilities*weight,axis=1)
return result
评论列表
文章目录