Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
09d9656f
Unverified
Commit
09d9656f
authored
Jan 13, 2022
by
Srihari Humbarwadi
Committed by
GitHub
Jan 13, 2022
Browse files
Merge branch 'panoptic-segmentation' into panoptic-deeplab-modeling
parents
ac671306
49a5706c
Changes
427
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
190 additions
and
34 deletions
+190
-34
official/projects/movinet/modeling/movinet.py
official/projects/movinet/modeling/movinet.py
+8
-4
official/projects/movinet/modeling/movinet_layers.py
official/projects/movinet/modeling/movinet_layers.py
+3
-1
official/projects/movinet/modeling/movinet_layers_test.py
official/projects/movinet/modeling/movinet_layers_test.py
+4
-3
official/projects/movinet/modeling/movinet_model.py
official/projects/movinet/modeling/movinet_model.py
+2
-2
official/projects/movinet/modeling/movinet_model_test.py
official/projects/movinet/modeling/movinet_model_test.py
+2
-2
official/projects/movinet/modeling/movinet_test.py
official/projects/movinet/modeling/movinet_test.py
+1
-1
official/projects/movinet/movinet_tutorial.ipynb
official/projects/movinet/movinet_tutorial.ipynb
+4
-4
official/projects/movinet/requirements.txt
official/projects/movinet/requirements.txt
+0
-0
official/projects/movinet/tools/convert_3d_2plus1d.py
official/projects/movinet/tools/convert_3d_2plus1d.py
+7
-3
official/projects/movinet/tools/convert_3d_2plus1d_test.py
official/projects/movinet/tools/convert_3d_2plus1d_test.py
+5
-4
official/projects/movinet/train.py
official/projects/movinet/train.py
+4
-4
official/projects/movinet/train_test.py
official/projects/movinet/train_test.py
+1
-1
official/projects/nhnet/README.md
official/projects/nhnet/README.md
+1
-1
official/projects/nhnet/decoder.py
official/projects/nhnet/decoder.py
+2
-1
official/projects/nhnet/evaluation.py
official/projects/nhnet/evaluation.py
+2
-2
official/projects/nhnet/trainer.py
official/projects/nhnet/trainer.py
+1
-1
official/projects/roformer/README.md
official/projects/roformer/README.md
+14
-0
official/projects/roformer/__init__.py
official/projects/roformer/__init__.py
+15
-0
official/projects/roformer/experiments/roformer_base.yaml
official/projects/roformer/experiments/roformer_base.yaml
+61
-0
official/projects/roformer/roformer.py
official/projects/roformer/roformer.py
+53
-0
No files found.
official/
vision/beta/
projects/movinet/modeling/movinet.py
→
official/projects/movinet/modeling/movinet.py
View file @
09d9656f
...
@@ -24,8 +24,8 @@ from typing import Dict, Mapping, Optional, Sequence, Tuple, Union
...
@@ -24,8 +24,8 @@ from typing import Dict, Mapping, Optional, Sequence, Tuple, Union
import
tensorflow
as
tf
import
tensorflow
as
tf
from
official.modeling
import
hyperparams
from
official.modeling
import
hyperparams
from
official.projects.movinet.modeling
import
movinet_layers
from
official.vision.beta.modeling.backbones
import
factory
from
official.vision.beta.modeling.backbones
import
factory
from
official.vision.beta.projects.movinet.modeling
import
movinet_layers
# Defines a set of kernel sizes and stride sizes to simplify and shorten
# Defines a set of kernel sizes and stride sizes to simplify and shorten
# architecture definitions for configs below.
# architecture definitions for configs below.
...
@@ -474,7 +474,7 @@ class Movinet(tf.keras.Model):
...
@@ -474,7 +474,7 @@ class Movinet(tf.keras.Model):
if
not
(
len
(
block
.
expand_filters
)
==
len
(
block
.
kernel_sizes
)
==
if
not
(
len
(
block
.
expand_filters
)
==
len
(
block
.
kernel_sizes
)
==
len
(
block
.
strides
)):
len
(
block
.
strides
)):
raise
ValueError
(
raise
ValueError
(
'Lenths of block parameters differ: {}, {}, {}'
.
format
(
'Len
g
ths of block parameters differ: {}, {}, {}'
.
format
(
len
(
block
.
expand_filters
),
len
(
block
.
expand_filters
),
len
(
block
.
kernel_sizes
),
len
(
block
.
kernel_sizes
),
len
(
block
.
strides
)))
len
(
block
.
strides
)))
...
@@ -709,8 +709,12 @@ def build_movinet(
...
@@ -709,8 +709,12 @@ def build_movinet(
"""Builds MoViNet backbone from a config."""
"""Builds MoViNet backbone from a config."""
backbone_type
=
backbone_config
.
type
backbone_type
=
backbone_config
.
type
backbone_cfg
=
backbone_config
.
get
()
backbone_cfg
=
backbone_config
.
get
()
assert
backbone_type
==
'movinet'
,
(
'Inconsistent backbone type '
if
backbone_type
!=
'movinet'
:
f
'
{
backbone_type
}
'
)
raise
ValueError
(
f
'Inconsistent backbone type
{
backbone_type
}
'
)
if
norm_activation_config
.
activation
is
not
None
:
raise
ValueError
(
'norm_activation is not used in MoViNets, but specified: %s'
%
norm_activation_config
.
activation
)
return
Movinet
(
return
Movinet
(
model_id
=
backbone_cfg
.
model_id
,
model_id
=
backbone_cfg
.
model_id
,
...
...
official/
vision/beta/
projects/movinet/modeling/movinet_layers.py
→
official/projects/movinet/modeling/movinet_layers.py
View file @
09d9656f
...
@@ -22,6 +22,7 @@ from typing import Any, Mapping, Optional, Sequence, Tuple, Union
...
@@ -22,6 +22,7 @@ from typing import Any, Mapping, Optional, Sequence, Tuple, Union
import
tensorflow
as
tf
import
tensorflow
as
tf
from
official.modeling
import
tf_utils
from
official.vision.beta.modeling.layers
import
nn_layers
from
official.vision.beta.modeling.layers
import
nn_layers
# Default kernel weight decay that may be overridden
# Default kernel weight decay that may be overridden
...
@@ -323,7 +324,8 @@ class ConvBlock(tf.keras.layers.Layer):
...
@@ -323,7 +324,8 @@ class ConvBlock(tf.keras.layers.Layer):
self
.
_use_buffered_input
=
use_buffered_input
self
.
_use_buffered_input
=
use_buffered_input
if
activation
is
not
None
:
if
activation
is
not
None
:
self
.
_activation_layer
=
tf
.
keras
.
layers
.
Activation
(
activation
)
self
.
_activation_layer
=
tf_utils
.
get_activation
(
activation
,
use_keras_layer
=
True
)
else
:
else
:
self
.
_activation_layer
=
None
self
.
_activation_layer
=
None
...
...
official/
vision/beta/
projects/movinet/modeling/movinet_layers_test.py
→
official/projects/movinet/modeling/movinet_layers_test.py
View file @
09d9656f
...
@@ -18,8 +18,8 @@
...
@@ -18,8 +18,8 @@
from
absl.testing
import
parameterized
from
absl.testing
import
parameterized
import
tensorflow
as
tf
import
tensorflow
as
tf
from
official.projects.movinet.modeling
import
movinet_layers
from
official.vision.beta.modeling.layers
import
nn_layers
from
official.vision.beta.modeling.layers
import
nn_layers
from
official.vision.beta.projects.movinet.modeling
import
movinet_layers
class
MovinetLayersTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
class
MovinetLayersTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
...
@@ -338,7 +338,7 @@ class MovinetLayersTest(parameterized.TestCase, tf.test.TestCase):
...
@@ -338,7 +338,7 @@ class MovinetLayersTest(parameterized.TestCase, tf.test.TestCase):
predicted
=
tf
.
concat
(
predicted
,
axis
=
1
)
predicted
=
tf
.
concat
(
predicted
,
axis
=
1
)
self
.
assertEqual
(
predicted
.
shape
,
expected
.
shape
)
self
.
assertEqual
(
predicted
.
shape
,
expected
.
shape
)
self
.
assertAllClose
(
predicted
,
expected
)
self
.
assertAllClose
(
predicted
,
expected
,
atol
=
1e-4
)
self
.
assertAllClose
(
self
.
assertAllClose
(
predicted
,
predicted
,
...
@@ -349,7 +349,8 @@ class MovinetLayersTest(parameterized.TestCase, tf.test.TestCase):
...
@@ -349,7 +349,8 @@ class MovinetLayersTest(parameterized.TestCase, tf.test.TestCase):
[[[
3.
,
3.
,
3.
]],
[[[
3.
,
3.
,
3.
]],
[[
3.
,
3.
,
3.
]]],
[[
3.
,
3.
,
3.
]]],
[[[
4.
,
4.
,
4.
]],
[[[
4.
,
4.
,
4.
]],
[[
4.
,
4.
,
4.
]]]]])
[[
4.
,
4.
,
4.
]]]]],
atol
=
1e-4
)
def
test_stream_movinet_block
(
self
):
def
test_stream_movinet_block
(
self
):
block
=
movinet_layers
.
MovinetBlock
(
block
=
movinet_layers
.
MovinetBlock
(
...
...
official/
vision/beta/
projects/movinet/modeling/movinet_model.py
→
official/projects/movinet/modeling/movinet_model.py
View file @
09d9656f
...
@@ -21,10 +21,10 @@ from typing import Any, Dict, Mapping, Optional, Sequence, Tuple, Union
...
@@ -21,10 +21,10 @@ from typing import Any, Dict, Mapping, Optional, Sequence, Tuple, Union
from
absl
import
logging
from
absl
import
logging
import
tensorflow
as
tf
import
tensorflow
as
tf
from
official.projects.movinet.configs
import
movinet
as
cfg
from
official.projects.movinet.modeling
import
movinet_layers
from
official.vision.beta.modeling
import
backbones
from
official.vision.beta.modeling
import
backbones
from
official.vision.beta.modeling
import
factory_3d
as
model_factory
from
official.vision.beta.modeling
import
factory_3d
as
model_factory
from
official.vision.beta.projects.movinet.configs
import
movinet
as
cfg
from
official.vision.beta.projects.movinet.modeling
import
movinet_layers
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
@
tf
.
keras
.
utils
.
register_keras_serializable
(
package
=
'Vision'
)
...
...
official/
vision/beta/
projects/movinet/modeling/movinet_model_test.py
→
official/projects/movinet/modeling/movinet_model_test.py
View file @
09d9656f
...
@@ -19,8 +19,8 @@ from absl.testing import parameterized
...
@@ -19,8 +19,8 @@ from absl.testing import parameterized
import
numpy
as
np
import
numpy
as
np
import
tensorflow
as
tf
import
tensorflow
as
tf
from
official.
vision.beta.
projects.movinet.modeling
import
movinet
from
official.projects.movinet.modeling
import
movinet
from
official.
vision.beta.
projects.movinet.modeling
import
movinet_model
from
official.projects.movinet.modeling
import
movinet_model
class
MovinetModelTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
class
MovinetModelTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
...
...
official/
vision/beta/
projects/movinet/modeling/movinet_test.py
→
official/projects/movinet/modeling/movinet_test.py
View file @
09d9656f
...
@@ -18,7 +18,7 @@
...
@@ -18,7 +18,7 @@
from
absl.testing
import
parameterized
from
absl.testing
import
parameterized
import
tensorflow
as
tf
import
tensorflow
as
tf
from
official.
vision.beta.
projects.movinet.modeling
import
movinet
from
official.projects.movinet.modeling
import
movinet
class
MoViNetTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
class
MoViNetTest
(
parameterized
.
TestCase
,
tf
.
test
.
TestCase
):
...
...
official/
vision/beta/
projects/movinet/movinet_tutorial.ipynb
→
official/projects/movinet/movinet_tutorial.ipynb
View file @
09d9656f
...
@@ -66,10 +66,10 @@
...
@@ -66,10 +66,10 @@
"import tensorflow_hub as hub\n",
"import tensorflow_hub as hub\n",
"\n",
"\n",
"from official.vision.beta.configs import video_classification\n",
"from official.vision.beta.configs import video_classification\n",
"from official.
vision.beta.
projects.movinet.configs import movinet as movinet_configs\n",
"from official.projects.movinet.configs import movinet as movinet_configs\n",
"from official.
vision.beta.
projects.movinet.modeling import movinet\n",
"from official.projects.movinet.modeling import movinet\n",
"from official.
vision.beta.
projects.movinet.modeling import movinet_layers\n",
"from official.projects.movinet.modeling import movinet_layers\n",
"from official.
vision.beta.
projects.movinet.modeling import movinet_model"
"from official.projects.movinet.modeling import movinet_model"
]
]
},
},
{
{
...
...
official/
vision/beta/
projects/movinet/requirements.txt
→
official/projects/movinet/requirements.txt
View file @
09d9656f
File moved
official/
vision/beta/
projects/movinet/tools/convert_3d_2plus1d.py
→
official/projects/movinet/tools/convert_3d_2plus1d.py
View file @
09d9656f
...
@@ -18,8 +18,8 @@ from absl import app
...
@@ -18,8 +18,8 @@ from absl import app
from
absl
import
flags
from
absl
import
flags
import
tensorflow
as
tf
import
tensorflow
as
tf
from
official.
vision.beta.
projects.movinet.modeling
import
movinet
from
official.projects.movinet.modeling
import
movinet
from
official.
vision.beta.
projects.movinet.modeling
import
movinet_model
from
official.projects.movinet.modeling
import
movinet_model
flags
.
DEFINE_string
(
flags
.
DEFINE_string
(
'input_checkpoint_path'
,
None
,
'input_checkpoint_path'
,
None
,
...
@@ -29,8 +29,10 @@ flags.DEFINE_string(
...
@@ -29,8 +29,10 @@ flags.DEFINE_string(
'Export path to save the saved_model file.'
)
'Export path to save the saved_model file.'
)
flags
.
DEFINE_string
(
flags
.
DEFINE_string
(
'model_id'
,
'a0'
,
'MoViNet model name.'
)
'model_id'
,
'a0'
,
'MoViNet model name.'
)
flags
.
DEFINE_string
(
'se_type'
,
'2plus3d'
,
'MoViNet model SE type.'
)
flags
.
DEFINE_bool
(
flags
.
DEFINE_bool
(
'causal'
,
Fals
e
,
'Run the model in causal mode.'
)
'causal'
,
Tru
e
,
'Run the model in causal mode.'
)
flags
.
DEFINE_bool
(
flags
.
DEFINE_bool
(
'use_positional_encoding'
,
False
,
'use_positional_encoding'
,
False
,
'Whether to use positional encoding (only applied when causal=True).'
)
'Whether to use positional encoding (only applied when causal=True).'
)
...
@@ -47,6 +49,7 @@ def main(_) -> None:
...
@@ -47,6 +49,7 @@ def main(_) -> None:
model_id
=
FLAGS
.
model_id
,
model_id
=
FLAGS
.
model_id
,
causal
=
FLAGS
.
causal
,
causal
=
FLAGS
.
causal
,
conv_type
=
'2plus1d'
,
conv_type
=
'2plus1d'
,
se_type
=
FLAGS
.
se_type
,
use_positional_encoding
=
FLAGS
.
use_positional_encoding
)
use_positional_encoding
=
FLAGS
.
use_positional_encoding
)
model_2plus1d
=
movinet_model
.
MovinetClassifier
(
model_2plus1d
=
movinet_model
.
MovinetClassifier
(
backbone
=
backbone_2plus1d
,
backbone
=
backbone_2plus1d
,
...
@@ -57,6 +60,7 @@ def main(_) -> None:
...
@@ -57,6 +60,7 @@ def main(_) -> None:
model_id
=
FLAGS
.
model_id
,
model_id
=
FLAGS
.
model_id
,
causal
=
FLAGS
.
causal
,
causal
=
FLAGS
.
causal
,
conv_type
=
'3d_2plus1d'
,
conv_type
=
'3d_2plus1d'
,
se_type
=
FLAGS
.
se_type
,
use_positional_encoding
=
FLAGS
.
use_positional_encoding
)
use_positional_encoding
=
FLAGS
.
use_positional_encoding
)
model_3d_2plus1d
=
movinet_model
.
MovinetClassifier
(
model_3d_2plus1d
=
movinet_model
.
MovinetClassifier
(
backbone
=
backbone_3d_2plus1d
,
backbone
=
backbone_3d_2plus1d
,
...
...
official/
vision/beta/
projects/movinet/tools/convert_3d_2plus1d_test.py
→
official/projects/movinet/tools/convert_3d_2plus1d_test.py
View file @
09d9656f
...
@@ -19,9 +19,9 @@ import os
...
@@ -19,9 +19,9 @@ import os
from
absl
import
flags
from
absl
import
flags
import
tensorflow
as
tf
import
tensorflow
as
tf
from
official.
vision.beta.
projects.movinet.modeling
import
movinet
from
official.projects.movinet.modeling
import
movinet
from
official.
vision.beta.
projects.movinet.modeling
import
movinet_model
from
official.projects.movinet.modeling
import
movinet_model
from
official.
vision.beta.
projects.movinet.tools
import
convert_3d_2plus1d
from
official.projects.movinet.tools
import
convert_3d_2plus1d
FLAGS
=
flags
.
FLAGS
FLAGS
=
flags
.
FLAGS
...
@@ -36,7 +36,8 @@ class Convert3d2plus1dTest(tf.test.TestCase):
...
@@ -36,7 +36,8 @@ class Convert3d2plus1dTest(tf.test.TestCase):
model_3d_2plus1d
=
movinet_model
.
MovinetClassifier
(
model_3d_2plus1d
=
movinet_model
.
MovinetClassifier
(
backbone
=
movinet
.
Movinet
(
backbone
=
movinet
.
Movinet
(
model_id
=
'a0'
,
model_id
=
'a0'
,
conv_type
=
'3d_2plus1d'
),
conv_type
=
'3d_2plus1d'
,
se_type
=
'2plus3d'
),
num_classes
=
600
)
num_classes
=
600
)
model_3d_2plus1d
.
build
([
1
,
1
,
1
,
1
,
3
])
model_3d_2plus1d
.
build
([
1
,
1
,
1
,
1
,
3
])
save_checkpoint
=
tf
.
train
.
Checkpoint
(
model
=
model_3d_2plus1d
)
save_checkpoint
=
tf
.
train
.
Checkpoint
(
model
=
model_3d_2plus1d
)
...
...
official/
vision/beta/
projects/movinet/train.py
→
official/projects/movinet/train.py
View file @
09d9656f
...
@@ -17,8 +17,8 @@ r"""Training driver.
...
@@ -17,8 +17,8 @@ r"""Training driver.
To train:
To train:
CONFIG_FILE=official/
vision/beta/
projects/movinet/configs/yaml/movinet_a0_k600_8x8.yaml
CONFIG_FILE=official/projects/movinet/configs/yaml/movinet_a0_k600_8x8.yaml
python3 official/
vision/beta/
projects/movinet/train.py \
python3 official/projects/movinet/train.py \
--experiment=movinet_kinetics600 \
--experiment=movinet_kinetics600 \
--mode=train \
--mode=train \
--model_dir=/tmp/movinet/ \
--model_dir=/tmp/movinet/ \
...
@@ -47,8 +47,8 @@ from official.modeling import performance
...
@@ -47,8 +47,8 @@ from official.modeling import performance
# model garden factory.
# model garden factory.
# pylint: disable=unused-import
# pylint: disable=unused-import
# the followings are the necessary imports.
# the followings are the necessary imports.
from
official.
vision.beta.
projects.movinet.modeling
import
movinet
from
official.projects.movinet.modeling
import
movinet
from
official.
vision.beta.
projects.movinet.modeling
import
movinet_model
from
official.projects.movinet.modeling
import
movinet_model
# pylint: enable=unused-import
# pylint: enable=unused-import
FLAGS
=
flags
.
FLAGS
FLAGS
=
flags
.
FLAGS
...
...
official/
vision/beta/
projects/movinet/train_test.py
→
official/projects/movinet/train_test.py
View file @
09d9656f
...
@@ -24,8 +24,8 @@ from absl import logging
...
@@ -24,8 +24,8 @@ from absl import logging
from
absl.testing
import
flagsaver
from
absl.testing
import
flagsaver
import
tensorflow
as
tf
import
tensorflow
as
tf
from
official.projects.movinet
import
train
as
train_lib
from
official.vision.beta.dataloaders
import
tfexample_utils
from
official.vision.beta.dataloaders
import
tfexample_utils
from
official.vision.beta.projects.movinet
import
train
as
train_lib
FLAGS
=
flags
.
FLAGS
FLAGS
=
flags
.
FLAGS
...
...
official/projects/nhnet/README.md
View file @
09d9656f
...
@@ -36,7 +36,7 @@ will crawl and extract news articles on a local machine.
...
@@ -36,7 +36,7 @@ will crawl and extract news articles on a local machine.
First, install the
`news-please`
CLI (requires python 3.x)
First, install the
`news-please`
CLI (requires python 3.x)
```
shell
```
shell
$
pip3
install
news-please
$
pip3
install
news-please
==
1.4.26
```
```
Next, run the crawler with our provided
[
config and URL list
](
https://github.com/google-research-datasets/NewSHead/releases
)
Next, run the crawler with our provided
[
config and URL list
](
https://github.com/google-research-datasets/NewSHead/releases
)
...
...
official/projects/nhnet/decoder.py
View file @
09d9656f
...
@@ -15,9 +15,10 @@
...
@@ -15,9 +15,10 @@
"""Transformer decoder that mimics a BERT encoder, to load BERT checkpoints."""
"""Transformer decoder that mimics a BERT encoder, to load BERT checkpoints."""
import
tensorflow
as
tf
import
tensorflow
as
tf
from
official.legacy.transformer
import
model_utils
as
transformer_utils
from
official.modeling
import
tf_utils
from
official.modeling
import
tf_utils
from
official.nlp.modeling
import
layers
from
official.nlp.modeling
import
layers
from
official.nlp.transformer
import
model_utils
as
transformer_utils
class
TransformerDecoder
(
tf
.
keras
.
layers
.
Layer
):
class
TransformerDecoder
(
tf
.
keras
.
layers
.
Layer
):
...
...
official/projects/nhnet/evaluation.py
View file @
09d9656f
...
@@ -22,8 +22,8 @@ from absl import logging
...
@@ -22,8 +22,8 @@ from absl import logging
import
numpy
as
np
import
numpy
as
np
import
tensorflow
as
tf
import
tensorflow
as
tf
from
official.
nlp
.transformer
import
metrics
as
metrics_v2
from
official.
legacy
.transformer
import
metrics
as
metrics_v2
from
official.
nlp
.transformer.utils
import
metrics
from
official.
legacy
.transformer.utils
import
metrics
from
official.projects.nhnet
import
input_pipeline
from
official.projects.nhnet
import
input_pipeline
from
official.projects.nhnet
import
models
from
official.projects.nhnet
import
models
...
...
official/projects/nhnet/trainer.py
View file @
09d9656f
...
@@ -25,8 +25,8 @@ from six.moves import zip
...
@@ -25,8 +25,8 @@ from six.moves import zip
import
tensorflow
as
tf
import
tensorflow
as
tf
from
official.common
import
distribute_utils
from
official.common
import
distribute_utils
from
official.legacy.transformer
import
metrics
as
transformer_metrics
from
official.modeling.hyperparams
import
params_dict
from
official.modeling.hyperparams
import
params_dict
from
official.nlp.transformer
import
metrics
as
transformer_metrics
from
official.projects.nhnet
import
evaluation
from
official.projects.nhnet
import
evaluation
from
official.projects.nhnet
import
input_pipeline
from
official.projects.nhnet
import
input_pipeline
from
official.projects.nhnet
import
models
from
official.projects.nhnet
import
models
...
...
official/projects/roformer/README.md
0 → 100644
View file @
09d9656f
Code for Roformer.
Run with
```
bash
DATA_PATH
=
???
OUTPUT_DIR
=
???
python3 train.py
\
--experiment
=
roformer/pretraining
\
--config_file
=
experiments/roformer_base.yaml
\
--params_override
=
"task.validation_data.input_path=
${
DATA_PATH
}
,runtime.distribution_strategy=tpu"
\
--tpu
=
local
\
--model_dir
=
${
OUTPUT_DIR
}
\
--mode
=
train_and_eval
```
official/projects/roformer/__init__.py
0 → 100644
View file @
09d9656f
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
official/projects/roformer/experiments/roformer_base.yaml
0 → 100644
View file @
09d9656f
task
:
init_checkpoint
:
'
'
model
:
cls_heads
:
[{
activation
:
tanh
,
cls_token_idx
:
0
,
dropout_rate
:
0.1
,
inner_dim
:
768
,
name
:
next_sentence
,
num_classes
:
2
}]
encoder
:
type
:
any
any
:
attention_dropout_rate
:
0.1
dropout_rate
:
0.1
embedding_size
:
768
hidden_activation
:
gelu
hidden_size
:
768
initializer_range
:
0.02
intermediate_size
:
3072
max_position_embeddings
:
512
num_attention_heads
:
12
num_layers
:
12
type_vocab_size
:
2
vocab_size
:
30522
train_data
:
drop_remainder
:
true
global_batch_size
:
256
input_path
:
'
'
is_training
:
true
max_predictions_per_seq
:
76
seq_length
:
512
use_next_sentence_label
:
true
use_position_id
:
false
validation_data
:
drop_remainder
:
false
global_batch_size
:
256
input_path
:
'
'
is_training
:
false
max_predictions_per_seq
:
76
seq_length
:
512
use_next_sentence_label
:
true
use_position_id
:
false
trainer
:
checkpoint_interval
:
20000
max_to_keep
:
5
optimizer_config
:
learning_rate
:
polynomial
:
cycle
:
false
decay_steps
:
1000000
end_learning_rate
:
0.0
initial_learning_rate
:
0.0001
power
:
1.0
type
:
polynomial
optimizer
:
type
:
adamw
warmup
:
polynomial
:
power
:
1
warmup_steps
:
10000
type
:
polynomial
steps_per_loop
:
50
summary_interval
:
50
train_steps
:
1000000
validation_interval
:
1000
validation_steps
:
64
official/projects/roformer/roformer.py
0 → 100644
View file @
09d9656f
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Roformer model configurations and instantiation methods."""
import
tensorflow
as
tf
from
official.modeling
import
tf_utils
from
official.modeling.hyperparams
import
base_config
from
official.nlp.configs
import
encoders
from
official.projects.roformer
import
roformer_encoder
class
RoformerEncoderConfig
(
encoders
.
BertEncoderConfig
):
pass
@
base_config
.
bind
(
RoformerEncoderConfig
)
def
get_encoder
(
encoder_cfg
:
RoformerEncoderConfig
):
"""Gets a 'RoformerEncoder' object.
Args:
encoder_cfg: A 'RoformerEncoderConfig'.
Returns:
A encoder object.
"""
return
roformer_encoder
.
RoformerEncoder
(
vocab_size
=
encoder_cfg
.
vocab_size
,
hidden_size
=
encoder_cfg
.
hidden_size
,
num_layers
=
encoder_cfg
.
num_layers
,
num_attention_heads
=
encoder_cfg
.
num_attention_heads
,
intermediate_size
=
encoder_cfg
.
intermediate_size
,
activation
=
tf_utils
.
get_activation
(
encoder_cfg
.
hidden_activation
),
dropout_rate
=
encoder_cfg
.
dropout_rate
,
attention_dropout_rate
=
encoder_cfg
.
attention_dropout_rate
,
max_sequence_length
=
encoder_cfg
.
max_position_embeddings
,
type_vocab_size
=
encoder_cfg
.
type_vocab_size
,
initializer
=
tf
.
keras
.
initializers
.
TruncatedNormal
(
stddev
=
encoder_cfg
.
initializer_range
),
output_range
=
encoder_cfg
.
output_range
,
embedding_width
=
encoder_cfg
.
embedding_size
,
norm_first
=
encoder_cfg
.
norm_first
)
Prev
1
…
9
10
11
12
13
14
15
16
17
…
22
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment