From b5b44e57fc5a7c4a4d2ce5d412c1eecf1e8276ff Mon Sep 17 00:00:00 2001 From: dshwei <42167236+dshwei@users.noreply.github.com> Date: Wed, 23 Sep 2020 14:47:32 +0800 Subject: [PATCH 1/2] Update transformer.py --- bert_pytorch/model/transformer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bert_pytorch/model/transformer.py b/bert_pytorch/model/transformer.py index 288de26..d969e9f 100644 --- a/bert_pytorch/model/transformer.py +++ b/bert_pytorch/model/transformer.py @@ -27,5 +27,6 @@ def __init__(self, hidden, attn_heads, feed_forward_hidden, dropout): def forward(self, x, mask): x = self.input_sublayer(x, lambda _x: self.attention.forward(_x, _x, _x, mask=mask)) - x = self.output_sublayer(x, self.feed_forward) +# x = self.output_sublayer(x, self.feed_forward) + x = self.output_sublayer(x, lambda _x: self.feed_forward.forward(_x)) return self.dropout(x) From e29b4baa9f6dc53d5df5f96acd5133ca6f3f8447 Mon Sep 17 00:00:00 2001 From: dshwei <42167236+dshwei@users.noreply.github.com> Date: Wed, 23 Sep 2020 14:49:00 +0800 Subject: [PATCH 2/2] Update sublayer.py --- bert_pytorch/model/utils/sublayer.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bert_pytorch/model/utils/sublayer.py b/bert_pytorch/model/utils/sublayer.py index 6e36793..486da44 100644 --- a/bert_pytorch/model/utils/sublayer.py +++ b/bert_pytorch/model/utils/sublayer.py @@ -15,4 +15,6 @@ def __init__(self, size, dropout): def forward(self, x, sublayer): "Apply residual connection to any sublayer with the same size." - return x + self.dropout(sublayer(self.norm(x))) +# return x + self.dropout(sublayer(self.norm(x))) +# first residual connection and then layernorm + return self.norm(x + self.dropout(sublayer(x)))