Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
6d52239a
Commit
6d52239a
authored
Aug 08, 2020
by
xinliupitt
Browse files
initializer func in model
parent
0b395f65
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
8 additions
and
40 deletions
+8
-40
official/modeling/activations/attention_initializer.py
official/modeling/activations/attention_initializer.py
+0
-37
official/nlp/modeling/models/seq2seq_transformer.py
official/nlp/modeling/models/seq2seq_transformer.py
+8
-3
No files found.
official/modeling/activations/attention_initializer.py
deleted
100644 → 0
View file @
0b395f65
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Attention Layer Initializer."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
math
import
tensorflow
as
tf
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Text'
)
def
attention_initializer
(
hidden_size
):
"""Weight Initializer of Attention Layer in Seq2Seq Transformer.
Args:
hidden_size: hidden size of input tensor
Returns:
Initialized weights based on hidden size
"""
limit
=
math
.
sqrt
(
6.0
/
(
hidden_size
+
hidden_size
))
return
tf
.
keras
.
initializers
.
RandomUniform
(
minval
=-
limit
,
maxval
=
limit
)
official/nlp/modeling/models/seq2seq_transformer.py
View file @
6d52239a
...
...
@@ -21,9 +21,10 @@ from __future__ import absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
math
import
tensorflow
as
tf
from
official.modeling
import
tf_utils
from
official.modeling.activations
import
attention_initializer
from
official.nlp.modeling
import
layers
from
official.nlp.modeling.layers
import
position_embedding
from
official.nlp.modeling.layers
import
transformer
...
...
@@ -408,7 +409,7 @@ class TransformerEncoder(tf.keras.layers.Layer):
norm_first
=
self
.
_norm_first
,
norm_epsilon
=
self
.
_norm_epsilon
,
intermediate_dropout
=
self
.
_intermediate_dropout
,
attention_initializer
=
attention_initializer
.
attention_initializer
(
attention_initializer
=
attention_initializer
(
input_shape
[
2
]),
name
=
(
"layer_%d"
%
i
)))
self
.
output_normalization
=
tf
.
keras
.
layers
.
LayerNormalization
(
...
...
@@ -522,7 +523,7 @@ class TransformerDecoder(tf.keras.layers.Layer):
norm_first
=
self
.
_norm_first
,
norm_epsilon
=
self
.
_norm_epsilon
,
intermediate_dropout
=
self
.
_intermediate_dropout
,
attention_initializer
=
attention_initializer
.
attention_initializer
(
attention_initializer
=
attention_initializer
(
input_shape
[
2
]),
name
=
(
"layer_%d"
%
i
)))
self
.
output_normalization
=
tf
.
keras
.
layers
.
LayerNormalization
(
...
...
@@ -613,3 +614,7 @@ def embedding_linear(embedding_matrix, x):
logits
=
tf
.
matmul
(
x
,
embedding_matrix
,
transpose_b
=
True
)
return
tf
.
reshape
(
logits
,
[
batch_size
,
length
,
vocab_size
])
def
attention_initializer
(
hidden_size
):
limit
=
math
.
sqrt
(
6.0
/
(
hidden_size
+
hidden_size
))
return
tf
.
keras
.
initializers
.
RandomUniform
(
minval
=-
limit
,
maxval
=
limit
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment