"...resnet50_tensorflow.git" did not exist on "0886b384e7d78c56bb23b5be41e17e5447231eca"
Commit 80607874 authored by thomwolf's avatar thomwolf
Browse files

fix layer norm epsilon in OpenAI GPT

parent 7b4b0cf9
...@@ -141,6 +141,7 @@ class OpenAIGPTConfig(object): ...@@ -141,6 +141,7 @@ class OpenAIGPTConfig(object):
resid_pdrop=0.1, resid_pdrop=0.1,
embd_pdrop=0.1, embd_pdrop=0.1,
attn_pdrop=0.1, attn_pdrop=0.1,
layer_norm_epsilon=1e-5,
initializer_range=0.02, initializer_range=0.02,
): ):
"""Constructs OpenAIGPTConfig. """Constructs OpenAIGPTConfig.
...@@ -161,6 +162,7 @@ class OpenAIGPTConfig(object): ...@@ -161,6 +162,7 @@ class OpenAIGPTConfig(object):
attn_pdrop: The dropout ratio for the attention attn_pdrop: The dropout ratio for the attention
probabilities. probabilities.
embd_pdrop: The dropout ratio for the embeddings. embd_pdrop: The dropout ratio for the embeddings.
layer_norm_epsilon: epsilon to use in the layer norm layers
initializer_range: The sttdev of the truncated_normal_initializer for initializer_range: The sttdev of the truncated_normal_initializer for
initializing all weight matrices. initializing all weight matrices.
""" """
...@@ -182,6 +184,7 @@ class OpenAIGPTConfig(object): ...@@ -182,6 +184,7 @@ class OpenAIGPTConfig(object):
self.resid_pdrop = resid_pdrop self.resid_pdrop = resid_pdrop
self.embd_pdrop = embd_pdrop self.embd_pdrop = embd_pdrop
self.attn_pdrop = attn_pdrop self.attn_pdrop = attn_pdrop
self.layer_norm_epsilon = layer_norm_epsilon
self.initializer_range = initializer_range self.initializer_range = initializer_range
else: else:
raise ValueError( raise ValueError(
...@@ -318,9 +321,9 @@ class Block(nn.Module): ...@@ -318,9 +321,9 @@ class Block(nn.Module):
super(Block, self).__init__() super(Block, self).__init__()
nx = config.n_embd nx = config.n_embd
self.attn = Attention(nx, n_ctx, config, scale) self.attn = Attention(nx, n_ctx, config, scale)
self.ln_1 = LayerNorm(nx) self.ln_1 = LayerNorm(nx, eps=config.layer_norm_epsilon)
self.mlp = MLP(4 * nx, config) self.mlp = MLP(4 * nx, config)
self.ln_2 = LayerNorm(nx) self.ln_2 = LayerNorm(nx, eps=config.layer_norm_epsilon)
def forward(self, x): def forward(self, x):
a = self.attn(x) a = self.attn(x)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment