def __init__(self, dictionary, embed_dim=512, max_positions=1024,
convolutions=((512, 3),) * 20, dropout=0.1):
super().__init__()
self.dictionary = dictionary
self.dropout = dropout
self.num_attention_layers = None
num_embeddings = len(dictionary)
padding_idx = dictionary.pad()
self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx)
self.embed_positions = Embedding(max_positions, embed_dim, padding_idx)
in_channels = convolutions[0][0]
self.fc1 = Linear(embed_dim, in_channels, dropout=dropout)
self.projections = nn.ModuleList()
self.convolutions = nn.ModuleList()
for (out_channels, kernel_size) in convolutions:
pad = (kernel_size - 1) / 2
self.projections.append(Linear(in_channels, out_channels)
if in_channels != out_channels else None)
self.convolutions.append(
ConvTBC(in_channels, out_channels * 2, kernel_size, padding=pad,
dropout=dropout))
in_channels = out_channels
self.fc2 = Linear(in_channels, embed_dim)
评论列表
文章目录