Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
68e59e15
Commit
68e59e15
authored
Nov 24, 2021
by
Hongkun Yu
Committed by
A. Unique TensorFlower
Nov 24, 2021
Browse files
Internal change
PiperOrigin-RevId: 412185559
parent
65407126
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
16 additions
and
314 deletions
+16
-314
official/nlp/modeling/layers/__init__.py
official/nlp/modeling/layers/__init__.py
+0
-1
official/nlp/modeling/layers/dense_einsum.py
official/nlp/modeling/layers/dense_einsum.py
+0
-180
official/nlp/modeling/layers/dense_einsum_test.py
official/nlp/modeling/layers/dense_einsum_test.py
+0
-119
official/nlp/transformer/attention_layer.py
official/nlp/transformer/attention_layer.py
+16
-14
No files found.
official/nlp/modeling/layers/__init__.py
View file @
68e59e15
...
@@ -21,7 +21,6 @@ from official.nlp.modeling.layers.attention import *
...
@@ -21,7 +21,6 @@ from official.nlp.modeling.layers.attention import *
from
official.nlp.modeling.layers.bigbird_attention
import
BigBirdAttention
from
official.nlp.modeling.layers.bigbird_attention
import
BigBirdAttention
from
official.nlp.modeling.layers.bigbird_attention
import
BigBirdMasks
from
official.nlp.modeling.layers.bigbird_attention
import
BigBirdMasks
from
official.nlp.modeling.layers.cls_head
import
*
from
official.nlp.modeling.layers.cls_head
import
*
from
official.nlp.modeling.layers.dense_einsum
import
DenseEinsum
from
official.nlp.modeling.layers.gated_feedforward
import
GatedFeedforward
from
official.nlp.modeling.layers.gated_feedforward
import
GatedFeedforward
from
official.nlp.modeling.layers.gaussian_process
import
RandomFeatureGaussianProcess
from
official.nlp.modeling.layers.gaussian_process
import
RandomFeatureGaussianProcess
from
official.nlp.modeling.layers.kernel_attention
import
KernelAttention
from
official.nlp.modeling.layers.kernel_attention
import
KernelAttention
...
...
official/nlp/modeling/layers/dense_einsum.py
deleted
100644 → 0
View file @
65407126
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Keras-based einsum layer."""
# pylint: disable=g-classes-have-attributes
import
tensorflow
as
tf
from
tensorflow.python.util
import
deprecation
_CHR_IDX
=
[
"a"
,
"b"
,
"c"
,
"d"
,
"e"
,
"f"
,
"g"
,
"h"
,
"i"
,
"j"
,
"k"
,
"l"
,
"m"
]
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
"Text"
)
class
DenseEinsum
(
tf
.
keras
.
layers
.
Layer
):
"""A densely connected layer that uses `tf.einsum` as the backing computation.
This layer can perform einsum calculations of arbitrary dimensionality.
Args:
output_shape: Positive integer or tuple, dimensionality of the output space.
num_summed_dimensions: The number of dimensions to sum over. Standard 2D
matmul should use 1, 3D matmul should use 2, and so forth.
activation: Activation function to use. If you don't specify anything, no
activation is applied
(ie. "linear" activation: `a(x) = x`).
use_bias: Boolean, whether the layer uses a bias vector.
kernel_initializer: Initializer for the `kernel` weights matrix.
bias_initializer: Initializer for the bias vector.
kernel_regularizer: Regularizer function applied to the `kernel` weights
matrix.
bias_regularizer: Regularizer function applied to the bias vector.
activity_regularizer: Regularizer function applied to the output of the
layer (its "activation")..
kernel_constraint: Constraint function applied to the `kernel` weights
matrix.
bias_constraint: Constraint function applied to the bias vector.
Input shape:
N-D tensor with shape: `(batch_size, ..., input_dim)`. The most common
situation would be a 2D input with shape `(batch_size, input_dim)`.
Output shape:
N-D tensor with shape: `(batch_size, ..., units)`. For instance, for a 2D
input with shape `(batch_size, input_dim)`, the output would have shape
`(batch_size, units)`.
"""
@
deprecation
.
deprecated
(
None
,
"DenseEinsum is deprecated. Please use "
"tf.keras.experimental.EinsumDense layer instead."
)
def
__init__
(
self
,
output_shape
,
num_summed_dimensions
=
1
,
activation
=
None
,
use_bias
=
True
,
kernel_initializer
=
"glorot_uniform"
,
bias_initializer
=
"zeros"
,
kernel_regularizer
=
None
,
bias_regularizer
=
None
,
activity_regularizer
=
None
,
kernel_constraint
=
None
,
bias_constraint
=
None
,
**
kwargs
):
super
(
DenseEinsum
,
self
).
__init__
(
**
kwargs
)
self
.
_output_shape
=
output_shape
if
isinstance
(
output_shape
,
(
list
,
tuple
))
else
(
output_shape
,)
self
.
_activation
=
tf
.
keras
.
activations
.
get
(
activation
)
self
.
_use_bias
=
use_bias
self
.
_kernel_initializer
=
tf
.
keras
.
initializers
.
get
(
kernel_initializer
)
self
.
_bias_initializer
=
tf
.
keras
.
initializers
.
get
(
bias_initializer
)
self
.
_kernel_regularizer
=
tf
.
keras
.
regularizers
.
get
(
kernel_regularizer
)
self
.
_bias_regularizer
=
tf
.
keras
.
regularizers
.
get
(
bias_regularizer
)
self
.
_kernel_constraint
=
tf
.
keras
.
constraints
.
get
(
kernel_constraint
)
self
.
_bias_constraint
=
tf
.
keras
.
constraints
.
get
(
bias_constraint
)
self
.
_num_summed_dimensions
=
num_summed_dimensions
self
.
_einsum_string
=
None
def
_build_einsum_string
(
self
,
free_input_dims
,
bound_dims
,
output_dims
):
input_str
=
""
kernel_str
=
""
output_str
=
""
letter_offset
=
0
for
i
in
range
(
free_input_dims
):
char
=
_CHR_IDX
[
i
+
letter_offset
]
input_str
+=
char
output_str
+=
char
letter_offset
+=
free_input_dims
for
i
in
range
(
bound_dims
):
char
=
_CHR_IDX
[
i
+
letter_offset
]
input_str
+=
char
kernel_str
+=
char
letter_offset
+=
bound_dims
for
i
in
range
(
output_dims
):
char
=
_CHR_IDX
[
i
+
letter_offset
]
kernel_str
+=
char
output_str
+=
char
return
input_str
+
","
+
kernel_str
+
"->"
+
output_str
def
build
(
self
,
input_shape
):
input_shape
=
tf
.
TensorShape
(
input_shape
)
input_rank
=
input_shape
.
rank
free_input_dims
=
input_rank
-
self
.
_num_summed_dimensions
output_dims
=
len
(
self
.
_output_shape
)
self
.
_einsum_string
=
self
.
_build_einsum_string
(
free_input_dims
,
self
.
_num_summed_dimensions
,
output_dims
)
# This is only saved for testing purposes.
self
.
_kernel_shape
=
(
input_shape
[
free_input_dims
:].
concatenate
(
self
.
_output_shape
))
self
.
_kernel
=
self
.
add_weight
(
"kernel"
,
shape
=
self
.
_kernel_shape
,
initializer
=
self
.
_kernel_initializer
,
regularizer
=
self
.
_kernel_regularizer
,
constraint
=
self
.
_kernel_constraint
,
dtype
=
self
.
dtype
,
trainable
=
True
)
if
self
.
_use_bias
:
self
.
_bias
=
self
.
add_weight
(
"bias"
,
shape
=
self
.
_output_shape
,
initializer
=
self
.
_bias_initializer
,
regularizer
=
self
.
_bias_regularizer
,
constraint
=
self
.
_bias_constraint
,
dtype
=
self
.
dtype
,
trainable
=
True
)
else
:
self
.
_bias
=
None
super
(
DenseEinsum
,
self
).
build
(
input_shape
)
def
get_config
(
self
):
config
=
{
"output_shape"
:
self
.
_output_shape
,
"num_summed_dimensions"
:
self
.
_num_summed_dimensions
,
"activation"
:
tf
.
keras
.
activations
.
serialize
(
self
.
_activation
),
"use_bias"
:
self
.
_use_bias
,
"kernel_initializer"
:
tf
.
keras
.
initializers
.
serialize
(
self
.
_kernel_initializer
),
"bias_initializer"
:
tf
.
keras
.
initializers
.
serialize
(
self
.
_bias_initializer
),
"kernel_regularizer"
:
tf
.
keras
.
regularizers
.
serialize
(
self
.
_kernel_regularizer
),
"bias_regularizer"
:
tf
.
keras
.
regularizers
.
serialize
(
self
.
_bias_regularizer
),
"activity_regularizer"
:
tf
.
keras
.
regularizers
.
serialize
(
self
.
_activity_regularizer
),
"kernel_constraint"
:
tf
.
keras
.
constraints
.
serialize
(
self
.
_kernel_constraint
),
"bias_constraint"
:
tf
.
keras
.
constraints
.
serialize
(
self
.
_bias_constraint
)
}
base_config
=
super
(
DenseEinsum
,
self
).
get_config
()
return
dict
(
list
(
base_config
.
items
())
+
list
(
config
.
items
()))
def
call
(
self
,
inputs
):
ret
=
tf
.
einsum
(
self
.
_einsum_string
,
inputs
,
self
.
_kernel
)
if
self
.
_use_bias
:
ret
+=
self
.
_bias
if
self
.
_activation
is
not
None
:
ret
=
self
.
_activation
(
ret
)
return
ret
official/nlp/modeling/layers/dense_einsum_test.py
deleted
100644 → 0
View file @
65407126
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for Keras-based einsum layer."""
import
numpy
as
np
import
tensorflow
as
tf
from
tensorflow.python.keras
import
keras_parameterized
# pylint: disable=g-direct-tensorflow-import
from
official.nlp.modeling.layers
import
dense_einsum
# This decorator runs the test in V1, V2-Eager, and V2-Functional mode. It
# guarantees forward compatibility of this code for the V2 switchover.
@
keras_parameterized
.
run_all_keras_modes
class
DenseEinsumLayer
(
keras_parameterized
.
TestCase
):
def
test_3D_einsum_with_two_bound_dimensions
(
self
):
test_layer
=
dense_einsum
.
DenseEinsum
(
output_shape
=
(
64
,),
num_summed_dimensions
=
2
)
# Create a 4-dimensional input (the first dimension is implicit).
input_tensor
=
tf
.
keras
.
Input
(
shape
=
(
None
,
40
,
80
))
_
=
test_layer
(
input_tensor
)
self
.
assertEqual
(
test_layer
.
_einsum_string
,
"abcd,cde->abe"
)
self
.
assertEqual
(
test_layer
.
_kernel_shape
,
(
40
,
80
,
64
))
def
test_3D_einsum_with_one_bound_dimensions
(
self
):
test_layer
=
dense_einsum
.
DenseEinsum
(
output_shape
=
(
64
,
32
),
num_summed_dimensions
=
1
)
# Create a 3-dimensional input (the first dimension is implicit).
input_tensor
=
tf
.
keras
.
Input
(
shape
=
(
None
,
80
))
_
=
test_layer
(
input_tensor
)
self
.
assertEqual
(
test_layer
.
_einsum_string
,
"abc,cde->abde"
)
self
.
assertEqual
(
test_layer
.
_kernel_shape
,
(
80
,
64
,
32
))
def
test_2D_einsum_with_one_bound_dimensions
(
self
):
test_layer
=
dense_einsum
.
DenseEinsum
(
output_shape
=
(
64
,),
num_summed_dimensions
=
1
)
# Create a 3-dimensional input (the first dimension is implicit).
input_tensor
=
tf
.
keras
.
Input
(
shape
=
(
None
,
80
))
_
=
test_layer
(
input_tensor
)
self
.
assertEqual
(
test_layer
.
_einsum_string
,
"abc,cd->abd"
)
self
.
assertEqual
(
test_layer
.
_kernel_shape
,
(
80
,
64
))
def
test_bias_term_can_be_disabled
(
self
):
# A layer created using the bias should have two weights.
test_layer
=
dense_einsum
.
DenseEinsum
(
output_shape
=
64
,
num_summed_dimensions
=
1
,
use_bias
=
True
)
input_tensor
=
tf
.
keras
.
Input
(
shape
=
(
None
,
80
))
_
=
test_layer
(
input_tensor
)
self
.
assertEqual
(
2
,
len
(
test_layer
.
get_weights
()))
# A layer created without the bias should have only one weight.
test_layer
=
dense_einsum
.
DenseEinsum
(
output_shape
=
64
,
num_summed_dimensions
=
1
,
use_bias
=
False
)
input_tensor
=
tf
.
keras
.
Input
(
shape
=
(
None
,
80
))
_
=
test_layer
(
input_tensor
)
self
.
assertEqual
(
1
,
len
(
test_layer
.
get_weights
()))
def
test_activation
(
self
):
# Create a model that does not use an activation.
no_activation_layer
=
dense_einsum
.
DenseEinsum
(
output_shape
=
64
,
num_summed_dimensions
=
1
,
activation
=
None
)
input_tensor
=
tf
.
keras
.
Input
(
shape
=
(
None
,
80
))
output_tensor
=
no_activation_layer
(
input_tensor
)
no_activation_model
=
tf
.
keras
.
Model
(
input_tensor
,
output_tensor
)
# Create a model that uses a softmax activation.
activation_layer
=
dense_einsum
.
DenseEinsum
(
output_shape
=
64
,
num_summed_dimensions
=
1
,
activation
=
"softmax"
)
input_tensor
=
tf
.
keras
.
Input
(
shape
=
(
None
,
80
))
output_tensor
=
activation_layer
(
input_tensor
)
activation_model
=
tf
.
keras
.
Model
(
input_tensor
,
output_tensor
)
# Make sure the models' weights are identical.
activation_model
.
set_weights
(
no_activation_model
.
get_weights
())
# Predict using each model on the same input data. The output should be
# different, since one is using a softmax - even though the models' weights
# are the same.
input_values
=
10
*
np
.
random
.
random_sample
((
10
,
4
,
80
))
non_activated_data
=
no_activation_model
.
predict
(
input_values
)
activated_data
=
activation_model
.
predict
(
input_values
)
self
.
assertNotAllClose
(
activated_data
,
non_activated_data
)
def
test_non_iterable_output_shape
(
self
):
test_layer
=
dense_einsum
.
DenseEinsum
(
output_shape
=
64
,
num_summed_dimensions
=
1
)
# Create a 3-dimensional input (the first dimension is implicit).
input_tensor
=
tf
.
keras
.
Input
(
shape
=
(
None
,
80
))
_
=
test_layer
(
input_tensor
)
self
.
assertEqual
(
test_layer
.
_einsum_string
,
"abc,cd->abd"
)
self
.
assertEqual
(
test_layer
.
_kernel_shape
,
(
80
,
64
))
def
test_with_explicit_initializer
(
self
):
test_layer
=
dense_einsum
.
DenseEinsum
(
output_shape
=
(
64
,),
num_summed_dimensions
=
2
,
kernel_initializer
=
tf
.
keras
.
initializers
.
TruncatedNormal
(
stddev
=
0.02
))
# Create a 4-dimensional input (the first dimension is implicit).
input_tensor
=
tf
.
keras
.
Input
(
shape
=
(
None
,
40
,
80
))
_
=
test_layer
(
input_tensor
)
self
.
assertEqual
(
test_layer
.
_einsum_string
,
"abcd,cde->abe"
)
self
.
assertEqual
(
test_layer
.
_kernel_shape
,
(
40
,
80
,
64
))
if
__name__
==
"__main__"
:
tf
.
test
.
main
()
official/nlp/transformer/attention_layer.py
View file @
68e59e15
...
@@ -16,7 +16,6 @@
...
@@ -16,7 +16,6 @@
import
math
import
math
import
tensorflow
as
tf
import
tensorflow
as
tf
from
official.nlp.modeling
import
layers
class
Attention
(
tf
.
keras
.
layers
.
Layer
):
class
Attention
(
tf
.
keras
.
layers
.
Layer
):
...
@@ -51,28 +50,31 @@ class Attention(tf.keras.layers.Layer):
...
@@ -51,28 +50,31 @@ class Attention(tf.keras.layers.Layer):
attention_initializer
=
_glorot_initializer
(
input_shape
.
as_list
()[
-
1
],
attention_initializer
=
_glorot_initializer
(
input_shape
.
as_list
()[
-
1
],
self
.
hidden_size
)
self
.
hidden_size
)
self
.
query_dense_layer
=
layers
.
DenseEinsum
(
self
.
query_dense_layer
=
tf
.
keras
.
layers
.
experimental
.
EinsumDense
(
output_shape
=
(
self
.
num_heads
,
size_per_head
),
"BTE,ENH->BTNH"
,
output_shape
=
(
None
,
self
.
num_heads
,
size_per_head
),
kernel_initializer
=
attention_initializer
,
kernel_initializer
=
attention_initializer
,
use_bias
=
Fals
e
,
bias_axes
=
Non
e
,
name
=
"query"
)
name
=
"query"
)
self
.
key_dense_layer
=
layers
.
DenseEinsum
(
self
.
key_dense_layer
=
tf
.
keras
.
layers
.
experimental
.
EinsumDense
(
output_shape
=
(
self
.
num_heads
,
size_per_head
),
"BTE,ENH->BTNH"
,
output_shape
=
(
None
,
self
.
num_heads
,
size_per_head
),
kernel_initializer
=
attention_initializer
,
kernel_initializer
=
attention_initializer
,
use_bias
=
Fals
e
,
bias_axes
=
Non
e
,
name
=
"key"
)
name
=
"key"
)
self
.
value_dense_layer
=
layers
.
DenseEinsum
(
self
.
value_dense_layer
=
tf
.
keras
.
layers
.
experimental
.
EinsumDense
(
output_shape
=
(
self
.
num_heads
,
size_per_head
),
"BTE,ENH->BTNH"
,
output_shape
=
(
None
,
self
.
num_heads
,
size_per_head
),
kernel_initializer
=
attention_initializer
,
kernel_initializer
=
attention_initializer
,
use_bias
=
Fals
e
,
bias_axes
=
Non
e
,
name
=
"value"
)
name
=
"value"
)
output_initializer
=
_glorot_initializer
(
self
.
hidden_size
,
self
.
hidden_size
)
output_initializer
=
_glorot_initializer
(
self
.
hidden_size
,
self
.
hidden_size
)
self
.
output_dense_layer
=
layers
.
Dense
Einsum
(
self
.
output_dense_layer
=
tf
.
keras
.
layers
.
experimental
.
Einsum
Dense
(
output_shape
=
self
.
hidden_size
,
"BTNH,NHE->BTE"
,
num_summed_dim
ensi
ons
=
2
,
output_shape
=
(
None
,
self
.
hidd
en
_
si
ze
)
,
kernel_initializer
=
output_initializer
,
kernel_initializer
=
output_initializer
,
use_bias
=
Fals
e
,
bias_axes
=
Non
e
,
name
=
"output_transform"
)
name
=
"output_transform"
)
super
(
Attention
,
self
).
build
(
input_shape
)
super
(
Attention
,
self
).
build
(
input_shape
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment