"git@developer.sourcefind.cn:modelzoo/resnet50_tensorflow.git" did not exist on "3fa89ace18b6b60e7b843ffc555b39d410b88f7d"
Commit 60bb5067 authored by A. Unique TensorFlower's avatar A. Unique TensorFlower
Browse files

Internal change

PiperOrigin-RevId: 347135923
parent 68f301f7
...@@ -40,6 +40,8 @@ class DilatedResNet(hyperparams.Config): ...@@ -40,6 +40,8 @@ class DilatedResNet(hyperparams.Config):
multigrid: Optional[List[int]] = None multigrid: Optional[List[int]] = None
stem_type: str = 'v0' stem_type: str = 'v0'
last_stage_repeats: int = 1 last_stage_repeats: int = 1
se_ratio: float = 0.0
stochastic_depth_drop_rate: float = 0.0
@dataclasses.dataclass @dataclasses.dataclass
......
# Top1 accuracy 80.36% # Top-1 accuracy 81.6% on ImageNet
runtime: runtime:
distribution_strategy: 'tpu' distribution_strategy: 'tpu'
mixed_precision_dtype: 'bfloat16' mixed_precision_dtype: 'bfloat16'
...@@ -12,12 +12,14 @@ task: ...@@ -12,12 +12,14 @@ task:
model_id: 101 model_id: 101
output_stride: 16 output_stride: 16
stem_type: 'v1' stem_type: 'v1'
se_ratio: 0.25
stochastic_depth_drop_rate: 0.2
multigrid: [1, 2, 4] multigrid: [1, 2, 4]
last_stage_repeats: 1 last_stage_repeats: 1
norm_activation: norm_activation:
activation: 'swish' activation: 'swish'
losses: losses:
l2_weight_decay: 0.0001 l2_weight_decay: 0.00004
one_hot: true one_hot: true
label_smoothing: 0.1 label_smoothing: 0.1
train_data: train_data:
...@@ -25,6 +27,7 @@ task: ...@@ -25,6 +27,7 @@ task:
is_training: true is_training: true
global_batch_size: 4096 global_batch_size: 4096
dtype: 'bfloat16' dtype: 'bfloat16'
aug_policy: 'randaug'
validation_data: validation_data:
input_path: 'imagenet-2012-tfrecord/valid*' input_path: 'imagenet-2012-tfrecord/valid*'
is_training: false is_training: false
...@@ -32,7 +35,7 @@ task: ...@@ -32,7 +35,7 @@ task:
dtype: 'bfloat16' dtype: 'bfloat16'
drop_remainder: false drop_remainder: false
trainer: trainer:
train_steps: 62400 train_steps: 109200
validation_steps: 13 validation_steps: 13
validation_interval: 312 validation_interval: 312
steps_per_loop: 312 steps_per_loop: 312
...@@ -47,7 +50,7 @@ trainer: ...@@ -47,7 +50,7 @@ trainer:
type: 'cosine' type: 'cosine'
cosine: cosine:
initial_learning_rate: 1.6 initial_learning_rate: 1.6
decay_steps: 62400 decay_steps: 109200
warmup: warmup:
type: 'linear' type: 'linear'
linear: linear:
......
...@@ -19,6 +19,7 @@ import tensorflow as tf ...@@ -19,6 +19,7 @@ import tensorflow as tf
from official.modeling import tf_utils from official.modeling import tf_utils
from official.vision.beta.modeling.backbones import factory from official.vision.beta.modeling.backbones import factory
from official.vision.beta.modeling.layers import nn_blocks from official.vision.beta.modeling.layers import nn_blocks
from official.vision.beta.modeling.layers import nn_layers
layers = tf.keras.layers layers = tf.keras.layers
...@@ -57,6 +58,8 @@ class DilatedResNet(tf.keras.Model): ...@@ -57,6 +58,8 @@ class DilatedResNet(tf.keras.Model):
output_stride, output_stride,
input_specs=layers.InputSpec(shape=[None, None, None, 3]), input_specs=layers.InputSpec(shape=[None, None, None, 3]),
stem_type='v0', stem_type='v0',
se_ratio=None,
init_stochastic_depth_rate=0.0,
multigrid=None, multigrid=None,
last_stage_repeats=1, last_stage_repeats=1,
activation='relu', activation='relu',
...@@ -75,6 +78,8 @@ class DilatedResNet(tf.keras.Model): ...@@ -75,6 +78,8 @@ class DilatedResNet(tf.keras.Model):
input_specs: `tf.keras.layers.InputSpec` specs of the input tensor. input_specs: `tf.keras.layers.InputSpec` specs of the input tensor.
stem_type: `standard` or `deeplab`, deeplab replaces 7x7 conv by 3 3x3 stem_type: `standard` or `deeplab`, deeplab replaces 7x7 conv by 3 3x3
convs. convs.
se_ratio: `float` or None. Ratio of the Squeeze-and-Excitation layer.
init_stochastic_depth_rate: `float` initial stochastic depth rate.
multigrid: `Tuple` of the same length as the number of blocks in the last multigrid: `Tuple` of the same length as the number of blocks in the last
resnet stage. resnet stage.
last_stage_repeats: `int`, how many times last stage is repeated. last_stage_repeats: `int`, how many times last stage is repeated.
...@@ -105,6 +110,8 @@ class DilatedResNet(tf.keras.Model): ...@@ -105,6 +110,8 @@ class DilatedResNet(tf.keras.Model):
self._kernel_regularizer = kernel_regularizer self._kernel_regularizer = kernel_regularizer
self._bias_regularizer = bias_regularizer self._bias_regularizer = bias_regularizer
self._stem_type = stem_type self._stem_type = stem_type
self._se_ratio = se_ratio
self._init_stochastic_depth_rate = init_stochastic_depth_rate
if tf.keras.backend.image_data_format() == 'channels_last': if tf.keras.backend.image_data_format() == 'channels_last':
bn_axis = -1 bn_axis = -1
...@@ -193,6 +200,8 @@ class DilatedResNet(tf.keras.Model): ...@@ -193,6 +200,8 @@ class DilatedResNet(tf.keras.Model):
dilation_rate=1, dilation_rate=1,
block_fn=block_fn, block_fn=block_fn,
block_repeats=spec[2], block_repeats=spec[2],
stochastic_depth_drop_rate=nn_layers.get_stochastic_depth_rate(
self._init_stochastic_depth_rate, i + 2, 4 + last_stage_repeats),
name='block_group_l{}'.format(i + 2)) name='block_group_l{}'.format(i + 2))
endpoints[str(i + 2)] = x endpoints[str(i + 2)] = x
...@@ -210,6 +219,8 @@ class DilatedResNet(tf.keras.Model): ...@@ -210,6 +219,8 @@ class DilatedResNet(tf.keras.Model):
dilation_rate=dilation_rate, dilation_rate=dilation_rate,
block_fn=block_fn, block_fn=block_fn,
block_repeats=spec[2], block_repeats=spec[2],
stochastic_depth_drop_rate=nn_layers.get_stochastic_depth_rate(
self._init_stochastic_depth_rate, i + 2, 4 + last_stage_repeats),
multigrid=multigrid if i >= 3 else None, multigrid=multigrid if i >= 3 else None,
name='block_group_l{}'.format(i + 2)) name='block_group_l{}'.format(i + 2))
dilation_rate *= 2 dilation_rate *= 2
...@@ -228,6 +239,7 @@ class DilatedResNet(tf.keras.Model): ...@@ -228,6 +239,7 @@ class DilatedResNet(tf.keras.Model):
dilation_rate, dilation_rate,
block_fn, block_fn,
block_repeats=1, block_repeats=1,
stochastic_depth_drop_rate=0.0,
multigrid=None, multigrid=None,
name='block_group'): name='block_group'):
"""Creates one group of blocks for the ResNet model. """Creates one group of blocks for the ResNet model.
...@@ -242,6 +254,7 @@ class DilatedResNet(tf.keras.Model): ...@@ -242,6 +254,7 @@ class DilatedResNet(tf.keras.Model):
dilation_rate: `int`, diluted convolution rates. dilation_rate: `int`, diluted convolution rates.
block_fn: Either `nn_blocks.ResidualBlock` or `nn_blocks.BottleneckBlock`. block_fn: Either `nn_blocks.ResidualBlock` or `nn_blocks.BottleneckBlock`.
block_repeats: `int` number of blocks contained in the layer. block_repeats: `int` number of blocks contained in the layer.
stochastic_depth_drop_rate: `float` drop rate of the current block group.
multigrid: List of ints or None, if specified, dilation rates for each multigrid: List of ints or None, if specified, dilation rates for each
block is scaled up by its corresponding factor in the multigrid. block is scaled up by its corresponding factor in the multigrid.
name: `str`name for the block. name: `str`name for the block.
...@@ -261,6 +274,8 @@ class DilatedResNet(tf.keras.Model): ...@@ -261,6 +274,8 @@ class DilatedResNet(tf.keras.Model):
strides=strides, strides=strides,
dilation_rate=dilation_rate * multigrid[0], dilation_rate=dilation_rate * multigrid[0],
use_projection=True, use_projection=True,
stochastic_depth_drop_rate=stochastic_depth_drop_rate,
se_ratio=self._se_ratio,
kernel_initializer=self._kernel_initializer, kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer, kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer, bias_regularizer=self._bias_regularizer,
...@@ -275,6 +290,8 @@ class DilatedResNet(tf.keras.Model): ...@@ -275,6 +290,8 @@ class DilatedResNet(tf.keras.Model):
strides=1, strides=1,
dilation_rate=dilation_rate * multigrid[i], dilation_rate=dilation_rate * multigrid[i],
use_projection=False, use_projection=False,
stochastic_depth_drop_rate=stochastic_depth_drop_rate,
se_ratio=self._se_ratio,
kernel_initializer=self._kernel_initializer, kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer, kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer, bias_regularizer=self._bias_regularizer,
...@@ -290,6 +307,9 @@ class DilatedResNet(tf.keras.Model): ...@@ -290,6 +307,9 @@ class DilatedResNet(tf.keras.Model):
config_dict = { config_dict = {
'model_id': self._model_id, 'model_id': self._model_id,
'output_stride': self._output_stride, 'output_stride': self._output_stride,
'stem_type': self._stem_type,
'se_ratio': self._se_ratio,
'init_stochastic_depth_rate': self._init_stochastic_depth_rate,
'activation': self._activation, 'activation': self._activation,
'use_sync_bn': self._use_sync_bn, 'use_sync_bn': self._use_sync_bn,
'norm_momentum': self._norm_momentum, 'norm_momentum': self._norm_momentum,
...@@ -326,9 +346,11 @@ def build_dilated_resnet( ...@@ -326,9 +346,11 @@ def build_dilated_resnet(
model_id=backbone_cfg.model_id, model_id=backbone_cfg.model_id,
output_stride=backbone_cfg.output_stride, output_stride=backbone_cfg.output_stride,
input_specs=input_specs, input_specs=input_specs,
stem_type=backbone_cfg.stem_type,
se_ratio=backbone_cfg.se_ratio,
init_stochastic_depth_rate=backbone_cfg.stochastic_depth_drop_rate,
multigrid=backbone_cfg.multigrid, multigrid=backbone_cfg.multigrid,
last_stage_repeats=backbone_cfg.last_stage_repeats, last_stage_repeats=backbone_cfg.last_stage_repeats,
stem_type=backbone_cfg.stem_type,
activation=norm_activation_config.activation, activation=norm_activation_config.activation,
use_sync_bn=norm_activation_config.use_sync_bn, use_sync_bn=norm_activation_config.use_sync_bn,
norm_momentum=norm_activation_config.norm_momentum, norm_momentum=norm_activation_config.norm_momentum,
......
...@@ -48,6 +48,36 @@ class ResNetTest(parameterized.TestCase, tf.test.TestCase): ...@@ -48,6 +48,36 @@ class ResNetTest(parameterized.TestCase, tf.test.TestCase):
512 * endpoint_filter_scale 512 * endpoint_filter_scale
], endpoints[str(int(np.math.log2(output_stride)))].shape.as_list()) ], endpoints[str(int(np.math.log2(output_stride)))].shape.as_list())
@parameterized.parameters(
('v0', None, 0.0),
('v1', None, 0.0),
('v1', 0.25, 0.0),
('v1', 0.25, 0.2),
)
def test_network_features(self, stem_type, se_ratio,
init_stochastic_depth_rate):
"""Test additional features of ResNet models."""
input_size = 128
model_id = 50
endpoint_filter_scale = 4
output_stride = 8
tf.keras.backend.set_image_data_format('channels_last')
network = resnet_deeplab.DilatedResNet(
model_id=model_id,
output_stride=output_stride,
stem_type=stem_type,
se_ratio=se_ratio,
init_stochastic_depth_rate=init_stochastic_depth_rate)
inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
endpoints = network(inputs)
print(endpoints)
self.assertAllEqual([
1, input_size / output_stride, input_size / output_stride,
512 * endpoint_filter_scale
], endpoints[str(int(np.math.log2(output_stride)))].shape.as_list())
@combinations.generate( @combinations.generate(
combinations.combine( combinations.combine(
strategy=[ strategy=[
...@@ -84,6 +114,9 @@ class ResNetTest(parameterized.TestCase, tf.test.TestCase): ...@@ -84,6 +114,9 @@ class ResNetTest(parameterized.TestCase, tf.test.TestCase):
kwargs = dict( kwargs = dict(
model_id=50, model_id=50,
output_stride=8, output_stride=8,
stem_type='v0',
se_ratio=0.25,
init_stochastic_depth_rate=0.2,
use_sync_bn=False, use_sync_bn=False,
activation='relu', activation='relu',
norm_momentum=0.99, norm_momentum=0.99,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment