fastNLP.modules.encoder.transformer 源代码
r"""undocumented"""
__all__ = [
"TransformerEncoder"
]
from torch import nn
from .seq2seq_encoder import TransformerSeq2SeqEncoderLayer
[文档]class TransformerEncoder(nn.Module):
r"""
transformer的encoder模块,不包含embedding层
"""
[文档] def __init__(self, num_layers, d_model=512, n_head=8, dim_ff=2048, dropout=0.1):
"""
:param int num_layers: 多少层Transformer
:param int d_model: input和output的大小
:param int n_head: 多少个head
:param int dim_ff: FFN中间hidden大小
:param float dropout: 多大概率drop attention和ffn中间的表示
"""
super(TransformerEncoder, self).__init__()
self.layers = nn.ModuleList([TransformerSeq2SeqEncoderLayer(d_model = d_model, n_head = n_head, dim_ff = dim_ff,
dropout = dropout) for _ in range(num_layers)])
self.norm = nn.LayerNorm(d_model, eps=1e-6)
[文档] def forward(self, x, seq_mask=None):
r"""
:param x: [batch, seq_len, model_size] 输入序列
:param seq_mask: [batch, seq_len] 输入序列的padding mask, 若为 ``None`` , 生成全1向量. 为1的地方需要attend
Default: ``None``
:return: [batch, seq_len, model_size] 输出序列
"""
output = x
if seq_mask is None:
seq_mask = x.new_ones(x.size(0), x.size(1)).bool()
for layer in self.layers:
output = layer(output, seq_mask)
return self.norm(output)