"git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "195bfd118a3a0e80c647161ca2e2c9af7db9a225"
Unverified Commit 93624bfe authored by Yih-Dar's avatar Yih-Dar Committed by GitHub
Browse files

Fix non-negligible difference between GPT2 and TFGP2 (#13679)


Co-authored-by: default avatarydshieh <ydshieh@users.noreply.github.com>
parent a0c08aa3
...@@ -174,7 +174,7 @@ class TFMLP(tf.keras.layers.Layer): ...@@ -174,7 +174,7 @@ class TFMLP(tf.keras.layers.Layer):
nx = config.n_embd nx = config.n_embd
self.c_fc = TFConv1D(n_state, nx, initializer_range=config.initializer_range, name="c_fc") self.c_fc = TFConv1D(n_state, nx, initializer_range=config.initializer_range, name="c_fc")
self.c_proj = TFConv1D(nx, n_state, initializer_range=config.initializer_range, name="c_proj") self.c_proj = TFConv1D(nx, n_state, initializer_range=config.initializer_range, name="c_proj")
self.act = get_tf_activation("gelu") self.act = get_tf_activation(config.activation_function)
self.dropout = tf.keras.layers.Dropout(config.resid_pdrop) self.dropout = tf.keras.layers.Dropout(config.resid_pdrop)
def call(self, x, training=False): def call(self, x, training=False):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment