Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
a0cd17e0
Commit
a0cd17e0
authored
Jun 26, 2022
by
Hongkun Yu
Committed by
A. Unique TensorFlower
Jun 26, 2022
Browse files
Internal change
PiperOrigin-RevId: 457383160
parent
a5bbb547
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
28 additions
and
24 deletions
+28
-24
official/nlp/modeling/layers/gated_feedforward.py
official/nlp/modeling/layers/gated_feedforward.py
+21
-17
official/nlp/modeling/layers/gated_feedforward_test.py
official/nlp/modeling/layers/gated_feedforward_test.py
+6
-6
official/nlp/modeling/layers/rezero_transformer.py
official/nlp/modeling/layers/rezero_transformer.py
+1
-1
No files found.
official/nlp/modeling/layers/gated_feedforward.py
View file @
a0cd17e0
...
...
@@ -19,6 +19,7 @@ import gin
import
tensorflow
as
tf
from
official.modeling
import
tf_utils
from
official.nlp.modeling.layers
import
util
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
"Text"
)
...
...
@@ -57,9 +58,9 @@ class GatedFeedforward(tf.keras.layers.Layer):
"""
def
__init__
(
self
,
in
t
er
mediate_size
,
in
t
er
mediate
_activation
,
dropout
,
in
n
er
_dim
=
768
,
in
n
er
_activation
=
tf_utils
.
get
_activation
(
"gelu"
)
,
dropout
=
0.0
,
use_gate
=
True
,
apply_output_layer_norm
=
True
,
num_blocks
=
1
,
...
...
@@ -72,9 +73,12 @@ class GatedFeedforward(tf.keras.layers.Layer):
kernel_constraint
=
None
,
bias_constraint
=
None
,
**
kwargs
):
super
(
GatedFeedforward
,
self
).
__init__
(
**
kwargs
)
self
.
_intermediate_size
=
intermediate_size
self
.
_intermediate_activation
=
intermediate_activation
inner_dim
=
kwargs
.
pop
(
"intermediate_size"
,
inner_dim
)
inner_activation
=
kwargs
.
pop
(
"intermediate_activation"
,
inner_activation
)
util
.
filter_kwargs
(
kwargs
)
super
().
__init__
(
**
kwargs
)
self
.
_inner_dim
=
inner_dim
self
.
_inner_activation
=
inner_activation
self
.
_dropout
=
dropout
self
.
_use_gate
=
use_gate
self
.
_num_blocks
=
num_blocks
...
...
@@ -103,7 +107,7 @@ class GatedFeedforward(tf.keras.layers.Layer):
kernel_constraint
=
self
.
_kernel_constraint
,
bias_constraint
=
self
.
_bias_constraint
)
self
.
_intermediate_dense
=
[]
self
.
_in
t
er
mediate
_activation_layers
=
[]
self
.
_in
n
er_activation_layers
=
[]
self
.
_gate_dense
=
[]
self
.
_output_dense
=
[]
self
.
_output_dropout
=
[]
...
...
@@ -118,7 +122,7 @@ class GatedFeedforward(tf.keras.layers.Layer):
self
.
_intermediate_dense
.
append
(
tf
.
keras
.
layers
.
EinsumDense
(
"abc,cd->abd"
,
output_shape
=
(
None
,
self
.
_in
t
er
mediate_size
),
output_shape
=
(
None
,
self
.
_in
n
er
_dim
),
bias_axes
=
"d"
,
name
=
"intermediate_%d"
%
i
,
kernel_initializer
=
tf_utils
.
clone_initializer
(
...
...
@@ -126,14 +130,14 @@ class GatedFeedforward(tf.keras.layers.Layer):
bias_initializer
=
tf_utils
.
clone_initializer
(
self
.
_bias_initializer
),
**
common_kwargs
))
self
.
_in
t
er
mediate
_activation_layers
.
append
(
self
.
_in
n
er_activation_layers
.
append
(
tf
.
keras
.
layers
.
Activation
(
self
.
_in
t
er
mediate
_activation
,
dtype
=
activation_policy
))
self
.
_in
n
er_activation
,
dtype
=
activation_policy
))
if
self
.
_use_gate
:
self
.
_gate_dense
.
append
(
tf
.
keras
.
layers
.
EinsumDense
(
"abc,cd->abd"
,
output_shape
=
(
None
,
self
.
_in
t
er
mediate_size
),
output_shape
=
(
None
,
self
.
_in
n
er
_dim
),
bias_axes
=
"d"
,
name
=
"gate_%d"
%
i
,
kernel_initializer
=
tf_utils
.
clone_initializer
(
...
...
@@ -164,10 +168,10 @@ class GatedFeedforward(tf.keras.layers.Layer):
def
get_config
(
self
):
config
=
{
"in
t
er
mediate_size
"
:
self
.
_in
t
er
mediate_size
,
"in
t
er
mediate
_activation"
:
self
.
_in
t
er
mediate
_activation
,
"in
n
er
_dim
"
:
self
.
_in
n
er
_dim
,
"in
n
er_activation"
:
self
.
_in
n
er_activation
,
"dropout"
:
self
.
_dropout
,
"use_gate"
:
...
...
@@ -191,7 +195,7 @@ class GatedFeedforward(tf.keras.layers.Layer):
"bias_constraint"
:
tf
.
keras
.
constraints
.
serialize
(
self
.
_bias_constraint
)
}
base_config
=
super
(
GatedFeedforward
,
self
).
get_config
()
base_config
=
super
().
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
config
.
items
()))
def
call
(
self
,
inputs
):
...
...
@@ -199,7 +203,7 @@ class GatedFeedforward(tf.keras.layers.Layer):
for
i
in
range
(
self
.
_num_blocks
):
layer_input
=
layer_output
intermediate_output
=
self
.
_intermediate_dense
[
i
](
layer_input
)
intermediate_output
=
self
.
_in
t
er
mediate
_activation_layers
[
i
](
intermediate_output
=
self
.
_in
n
er_activation_layers
[
i
](
intermediate_output
)
if
self
.
_use_gate
:
gated_linear
=
self
.
_gate_dense
[
i
](
layer_input
)
...
...
official/nlp/modeling/layers/gated_feedforward_test.py
View file @
a0cd17e0
...
...
@@ -44,8 +44,8 @@ class GatedFeedforwardTest(keras_parameterized.TestCase):
def
test_layer_creation
(
self
,
use_gate
,
num_blocks
,
dropout_position
,
dtype
):
tf
.
keras
.
mixed_precision
.
set_global_policy
(
dtype
)
kwargs
=
dict
(
in
t
er
mediate_size
=
128
,
in
t
er
mediate
_activation
=
"relu"
,
in
n
er
_dim
=
128
,
in
n
er_activation
=
"relu"
,
dropout
=
0.1
,
use_gate
=
use_gate
,
num_blocks
=
num_blocks
,
...
...
@@ -76,8 +76,8 @@ class GatedFeedforwardTest(keras_parameterized.TestCase):
dtype
):
tf
.
keras
.
mixed_precision
.
set_global_policy
(
dtype
)
kwargs
=
dict
(
in
t
er
mediate_size
=
16
,
in
t
er
mediate
_activation
=
"relu"
,
in
n
er
_dim
=
16
,
in
n
er_activation
=
"relu"
,
dropout
=
0.1
,
use_gate
=
use_gate
,
num_blocks
=
num_blocks
,
...
...
@@ -104,8 +104,8 @@ class GatedFeedforwardTest(keras_parameterized.TestCase):
def
test_serialize_deserialize
(
self
):
kwargs
=
dict
(
in
t
er
mediate_size
=
16
,
in
t
er
mediate
_activation
=
"relu"
,
in
n
er
_dim
=
16
,
in
n
er_activation
=
"relu"
,
dropout
=
0.1
,
use_gate
=
False
,
num_blocks
=
4
,
...
...
official/nlp/modeling/layers/rezero_transformer.py
View file @
a0cd17e0
...
...
@@ -76,7 +76,7 @@ class ReZeroTransformer(tf.keras.layers.Layer):
attention_dropout_rate
)
dropout_rate
=
kwargs
.
pop
(
"output_dropout"
,
dropout_rate
)
inner_dim
=
kwargs
.
pop
(
"intermediate_size"
,
inner_dim
)
inner_activation
=
kwargs
.
pop
(
"in
n
er_activation"
,
inner_activation
)
inner_activation
=
kwargs
.
pop
(
"in
t
er
mediate
_activation"
,
inner_activation
)
util
.
filter_kwargs
(
kwargs
)
super
().
__init__
(
**
kwargs
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment