Commit dae05499 authored by A. Unique TensorFlower's avatar A. Unique TensorFlower
Browse files

Internal change

PiperOrigin-RevId: 336124157
parent 335331c2
...@@ -40,6 +40,7 @@ class EfficientNet(hyperparams.Config): ...@@ -40,6 +40,7 @@ class EfficientNet(hyperparams.Config):
class SpineNet(hyperparams.Config): class SpineNet(hyperparams.Config):
"""SpineNet config.""" """SpineNet config."""
model_id: str = '49' model_id: str = '49'
stochastic_depth_drop_rate: float = 0.0
@dataclasses.dataclass @dataclasses.dataclass
......
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'bfloat16'
task:
annotation_file: '/readahead/200M/placer/prod/home/snaggletooth/test/data/coco/instances_val2017.json'
gradient_clip_norm: 0.0
losses:
l2_weight_decay: 4.0e-05
model:
anchor:
anchor_size: 4
aspect_ratios: [0.5, 1.0, 2.0]
num_scales: 3
backbone:
spinenet:
stochastic_depth_drop_rate: 0.2
model_id: '143'
type: 'spinenet'
decoder:
type: 'identity'
head:
num_convs: 4
num_filters: 256
input_size: [1280, 1280, 3]
max_level: 7
min_level: 3
norm_activation:
activation: 'swish'
norm_epsilon: 0.001
norm_momentum: 0.99
use_sync_bn: true
train_data:
dtype: 'bfloat16'
global_batch_size: 256
input_path: '/readahead/200M/placer/prod/home/snaggletooth/test/data/coco/train*'
is_training: true
parser:
aug_rand_hflip: true
aug_scale_max: 2.0
aug_scale_min: 0.1
validation_data:
dtype: 'bfloat16'
global_batch_size: 8
input_path: '/readahead/200M/placer/prod/home/snaggletooth/test/data/coco/val*'
is_training: false
trainer:
checkpoint_interval: 462
optimizer_config:
learning_rate:
stepwise:
boundaries: [219450, 226380]
values: [0.32, 0.032, 0.0032]
type: 'stepwise'
warmup:
linear:
warmup_learning_rate: 0.0067
warmup_steps: 2000
steps_per_loop: 462
train_steps: 231000
validation_interval: 462
validation_steps: 625
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'bfloat16'
task:
annotation_file: '/readahead/200M/placer/prod/home/snaggletooth/test/data/coco/instances_val2017.json'
gradient_clip_norm: 0.0
losses:
l2_weight_decay: 4.0e-05
model:
anchor:
anchor_size: 4
aspect_ratios: [0.5, 1.0, 2.0]
num_scales: 3
backbone:
spinenet:
stochastic_depth_drop_rate: 0.2
model_id: '190'
type: 'spinenet'
decoder:
type: 'identity'
head:
num_convs: 7
num_filters: 512
input_size: [1280, 1280, 3]
max_level: 7
min_level: 3
norm_activation:
activation: 'swish'
norm_epsilon: 0.001
norm_momentum: 0.99
use_sync_bn: true
train_data:
dtype: 'bfloat16'
global_batch_size: 256
input_path: '/readahead/200M/placer/prod/home/snaggletooth/test/data/coco/train*'
is_training: true
parser:
aug_rand_hflip: true
aug_scale_max: 2.0
aug_scale_min: 0.1
validation_data:
dtype: 'bfloat16'
global_batch_size: 8
input_path: '/readahead/200M/placer/prod/home/snaggletooth/test/data/coco/val*'
is_training: false
trainer:
checkpoint_interval: 462
optimizer_config:
learning_rate:
stepwise:
boundaries: [219450, 226380]
values: [0.32, 0.032, 0.0032]
type: 'stepwise'
warmup:
linear:
warmup_learning_rate: 0.0067
warmup_steps: 2000
steps_per_loop: 462
train_steps: 231000
validation_interval: 462
validation_steps: 625
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'bfloat16'
task:
annotation_file: '/readahead/200M/placer/prod/home/snaggletooth/test/data/coco/instances_val2017.json'
gradient_clip_norm: 0.0
losses:
l2_weight_decay: 4.0e-05
model:
anchor:
anchor_size: 3
aspect_ratios: [0.5, 1.0, 2.0]
num_scales: 3
backbone:
spinenet:
stochastic_depth_drop_rate: 0.2
model_id: '49'
type: 'spinenet'
decoder:
type: 'identity'
head:
num_convs: 4
num_filters: 256
input_size: [640, 640, 3]
max_level: 7
min_level: 3
norm_activation:
activation: 'swish'
norm_epsilon: 0.001
norm_momentum: 0.99
use_sync_bn: true
train_data:
dtype: 'bfloat16'
global_batch_size: 256
input_path: '/readahead/200M/placer/prod/home/snaggletooth/test/data/coco/train*'
is_training: true
parser:
aug_rand_hflip: true
aug_scale_max: 2.0
aug_scale_min: 0.1
validation_data:
dtype: 'bfloat16'
global_batch_size: 8
input_path: '/readahead/200M/placer/prod/home/snaggletooth/test/data/coco/val*'
is_training: false
trainer:
checkpoint_interval: 462
optimizer_config:
learning_rate:
stepwise:
boundaries: [219450, 226380]
values: [0.32, 0.032, 0.0032]
type: 'stepwise'
warmup:
linear:
warmup_learning_rate: 0.0067
warmup_steps: 2000
steps_per_loop: 462
train_steps: 231000
validation_interval: 462
validation_steps: 625
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'bfloat16'
task:
annotation_file: '/readahead/200M/placer/prod/home/snaggletooth/test/data/coco/instances_val2017.json'
gradient_clip_norm: 0.0
losses:
l2_weight_decay: 4.0e-05
model:
anchor:
anchor_size: 3
aspect_ratios: [0.5, 1.0, 2.0]
num_scales: 3
backbone:
spinenet:
stochastic_depth_drop_rate: 0.2
model_id: '96'
type: 'spinenet'
decoder:
type: 'identity'
head:
num_convs: 4
num_filters: 256
input_size: [1024, 1024, 3]
max_level: 7
min_level: 3
norm_activation:
activation: 'swish'
norm_epsilon: 0.001
norm_momentum: 0.99
use_sync_bn: true
train_data:
dtype: 'bfloat16'
global_batch_size: 256
input_path: '/readahead/200M/placer/prod/home/snaggletooth/test/data/coco/train*'
is_training: true
parser:
aug_rand_hflip: true
aug_scale_max: 2.0
aug_scale_min: 0.1
validation_data:
dtype: 'bfloat16'
global_batch_size: 8
input_path: '/readahead/200M/placer/prod/home/snaggletooth/test/data/coco/val*'
is_training: false
trainer:
checkpoint_interval: 462
optimizer_config:
learning_rate:
stepwise:
boundaries: [219450, 226380]
values: [0.32, 0.032, 0.0032]
type: 'stepwise'
warmup:
linear:
warmup_learning_rate: 0.0067
warmup_steps: 2000
steps_per_loop: 462
train_steps: 231000
validation_interval: 462
validation_steps: 625
...@@ -200,9 +200,9 @@ def retinanet_resnetfpn_coco() -> cfg.ExperimentConfig: ...@@ -200,9 +200,9 @@ def retinanet_resnetfpn_coco() -> cfg.ExperimentConfig:
57 * steps_per_epoch, 67 * steps_per_epoch 57 * steps_per_epoch, 67 * steps_per_epoch
], ],
'values': [ 'values': [
0.28 * train_batch_size / 256.0, 0.32 * train_batch_size / 256.0,
0.028 * train_batch_size / 256.0, 0.032 * train_batch_size / 256.0,
0.0028 * train_batch_size / 256.0 0.0032 * train_batch_size / 256.0
], ],
} }
}, },
...@@ -238,11 +238,13 @@ def retinanet_spinenet_coco() -> cfg.ExperimentConfig: ...@@ -238,11 +238,13 @@ def retinanet_spinenet_coco() -> cfg.ExperimentConfig:
model=RetinaNet( model=RetinaNet(
backbone=backbones.Backbone( backbone=backbones.Backbone(
type='spinenet', type='spinenet',
spinenet=backbones.SpineNet(model_id='49')), spinenet=backbones.SpineNet(
model_id='49', stochastic_depth_drop_rate=0.2)),
decoder=decoders.Decoder( decoder=decoders.Decoder(
type='identity', identity=decoders.Identity()), type='identity', identity=decoders.Identity()),
anchor=Anchor(anchor_size=3), anchor=Anchor(anchor_size=3),
norm_activation=common.NormActivation(use_sync_bn=True), norm_activation=common.NormActivation(
use_sync_bn=True, activation='swish'),
num_classes=91, num_classes=91,
input_size=[input_size, input_size, 3], input_size=[input_size, input_size, 3],
min_level=3, min_level=3,
...@@ -253,13 +255,13 @@ def retinanet_spinenet_coco() -> cfg.ExperimentConfig: ...@@ -253,13 +255,13 @@ def retinanet_spinenet_coco() -> cfg.ExperimentConfig:
is_training=True, is_training=True,
global_batch_size=train_batch_size, global_batch_size=train_batch_size,
parser=Parser( parser=Parser(
aug_rand_hflip=True, aug_scale_min=0.5, aug_scale_max=2.0)), aug_rand_hflip=True, aug_scale_min=0.1, aug_scale_max=2.0)),
validation_data=DataConfig( validation_data=DataConfig(
input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'), input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'),
is_training=False, is_training=False,
global_batch_size=eval_batch_size)), global_batch_size=eval_batch_size)),
trainer=cfg.TrainerConfig( trainer=cfg.TrainerConfig(
train_steps=350 * steps_per_epoch, train_steps=500 * steps_per_epoch,
validation_steps=COCO_VAL_EXAMPLES // eval_batch_size, validation_steps=COCO_VAL_EXAMPLES // eval_batch_size,
validation_interval=steps_per_epoch, validation_interval=steps_per_epoch,
steps_per_loop=steps_per_epoch, steps_per_loop=steps_per_epoch,
...@@ -276,12 +278,12 @@ def retinanet_spinenet_coco() -> cfg.ExperimentConfig: ...@@ -276,12 +278,12 @@ def retinanet_spinenet_coco() -> cfg.ExperimentConfig:
'type': 'stepwise', 'type': 'stepwise',
'stepwise': { 'stepwise': {
'boundaries': [ 'boundaries': [
320 * steps_per_epoch, 340 * steps_per_epoch 475 * steps_per_epoch, 490 * steps_per_epoch
], ],
'values': [ 'values': [
0.28 * train_batch_size / 256.0, 0.32 * train_batch_size / 256.0,
0.028 * train_batch_size / 256.0, 0.032 * train_batch_size / 256.0,
0.0028 * train_batch_size / 256.0 0.0032 * train_batch_size / 256.0
], ],
} }
}, },
......
...@@ -114,6 +114,17 @@ def build_block_specs(block_specs=None): ...@@ -114,6 +114,17 @@ def build_block_specs(block_specs=None):
return [BlockSpec(*b) for b in block_specs] return [BlockSpec(*b) for b in block_specs]
def get_stochastic_depth_rate(init_rate, i, n):
"""Get drop connect rate for the ith block."""
if init_rate is not None:
if init_rate < 0 or init_rate > 1:
raise ValueError('Initial drop rate must be within 0 and 1.')
dc_rate = init_rate * float(i + 1) / n
else:
dc_rate = None
return dc_rate
@tf.keras.utils.register_keras_serializable(package='Vision') @tf.keras.utils.register_keras_serializable(package='Vision')
class SpineNet(tf.keras.Model): class SpineNet(tf.keras.Model):
"""Class to build SpineNet models.""" """Class to build SpineNet models."""
...@@ -127,6 +138,7 @@ class SpineNet(tf.keras.Model): ...@@ -127,6 +138,7 @@ class SpineNet(tf.keras.Model):
resample_alpha=0.5, resample_alpha=0.5,
block_repeats=1, block_repeats=1,
filter_size_scale=1.0, filter_size_scale=1.0,
init_stochastic_depth_rate=0.0,
kernel_initializer='VarianceScaling', kernel_initializer='VarianceScaling',
kernel_regularizer=None, kernel_regularizer=None,
bias_regularizer=None, bias_regularizer=None,
...@@ -144,6 +156,7 @@ class SpineNet(tf.keras.Model): ...@@ -144,6 +156,7 @@ class SpineNet(tf.keras.Model):
self._resample_alpha = resample_alpha self._resample_alpha = resample_alpha
self._block_repeats = block_repeats self._block_repeats = block_repeats
self._filter_size_scale = filter_size_scale self._filter_size_scale = filter_size_scale
self._init_stochastic_depth_rate = init_stochastic_depth_rate
self._kernel_initializer = kernel_initializer self._kernel_initializer = kernel_initializer
self._kernel_regularizer = kernel_regularizer self._kernel_regularizer = kernel_regularizer
self._bias_regularizer = bias_regularizer self._bias_regularizer = bias_regularizer
...@@ -187,6 +200,7 @@ class SpineNet(tf.keras.Model): ...@@ -187,6 +200,7 @@ class SpineNet(tf.keras.Model):
strides, strides,
block_fn_cand, block_fn_cand,
block_repeats=1, block_repeats=1,
stochastic_depth_drop_rate=None,
name='block_group'): name='block_group'):
"""Creates one group of blocks for the SpineNet model.""" """Creates one group of blocks for the SpineNet model."""
block_fn_candidates = { block_fn_candidates = {
...@@ -205,6 +219,7 @@ class SpineNet(tf.keras.Model): ...@@ -205,6 +219,7 @@ class SpineNet(tf.keras.Model):
filters=filters, filters=filters,
strides=strides, strides=strides,
use_projection=use_projection, use_projection=use_projection,
stochastic_depth_drop_rate=stochastic_depth_drop_rate,
kernel_initializer=self._kernel_initializer, kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer, kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer, bias_regularizer=self._bias_regularizer,
...@@ -218,6 +233,7 @@ class SpineNet(tf.keras.Model): ...@@ -218,6 +233,7 @@ class SpineNet(tf.keras.Model):
filters=filters, filters=filters,
strides=1, strides=1,
use_projection=False, use_projection=False,
stochastic_depth_drop_rate=stochastic_depth_drop_rate,
kernel_initializer=self._kernel_initializer, kernel_initializer=self._kernel_initializer,
kernel_regularizer=self._kernel_regularizer, kernel_regularizer=self._kernel_regularizer,
bias_regularizer=self._bias_regularizer, bias_regularizer=self._bias_regularizer,
...@@ -334,6 +350,8 @@ class SpineNet(tf.keras.Model): ...@@ -334,6 +350,8 @@ class SpineNet(tf.keras.Model):
strides=1, strides=1,
block_fn_cand=target_block_fn, block_fn_cand=target_block_fn,
block_repeats=self._block_repeats, block_repeats=self._block_repeats,
stochastic_depth_drop_rate=get_stochastic_depth_rate(
self._init_stochastic_depth_rate, i, len(self._block_specs)),
name='scale_permuted_block_{}'.format(i + 1)) name='scale_permuted_block_{}'.format(i + 1))
net.append(x) net.append(x)
...@@ -459,6 +477,7 @@ class SpineNet(tf.keras.Model): ...@@ -459,6 +477,7 @@ class SpineNet(tf.keras.Model):
'resample_alpha': self._resample_alpha, 'resample_alpha': self._resample_alpha,
'block_repeats': self._block_repeats, 'block_repeats': self._block_repeats,
'filter_size_scale': self._filter_size_scale, 'filter_size_scale': self._filter_size_scale,
'init_stochastic_depth_rate': self._init_stochastic_depth_rate,
'kernel_initializer': self._kernel_initializer, 'kernel_initializer': self._kernel_initializer,
'kernel_regularizer': self._kernel_regularizer, 'kernel_regularizer': self._kernel_regularizer,
'bias_regularizer': self._bias_regularizer, 'bias_regularizer': self._bias_regularizer,
...@@ -505,6 +524,7 @@ def build_spinenet( ...@@ -505,6 +524,7 @@ def build_spinenet(
resample_alpha=scaling_params['resample_alpha'], resample_alpha=scaling_params['resample_alpha'],
block_repeats=scaling_params['block_repeats'], block_repeats=scaling_params['block_repeats'],
filter_size_scale=scaling_params['filter_size_scale'], filter_size_scale=scaling_params['filter_size_scale'],
init_stochastic_depth_rate=backbone_cfg.stochastic_depth_drop_rate,
kernel_regularizer=l2_regularizer, kernel_regularizer=l2_regularizer,
activation=norm_activation_config.activation, activation=norm_activation_config.activation,
use_sync_bn=norm_activation_config.use_sync_bn, use_sync_bn=norm_activation_config.use_sync_bn,
......
...@@ -48,6 +48,7 @@ class SpineNetTest(parameterized.TestCase, tf.test.TestCase): ...@@ -48,6 +48,7 @@ class SpineNetTest(parameterized.TestCase, tf.test.TestCase):
resample_alpha=resample_alpha, resample_alpha=resample_alpha,
block_repeats=block_repeats, block_repeats=block_repeats,
filter_size_scale=filter_size_scale, filter_size_scale=filter_size_scale,
init_stochastic_depth_rate=0.2,
) )
inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1) inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
...@@ -68,6 +69,7 @@ class SpineNetTest(parameterized.TestCase, tf.test.TestCase): ...@@ -68,6 +69,7 @@ class SpineNetTest(parameterized.TestCase, tf.test.TestCase):
resample_alpha=0.5, resample_alpha=0.5,
block_repeats=1, block_repeats=1,
filter_size_scale=1.0, filter_size_scale=1.0,
init_stochastic_depth_rate=0.2,
use_sync_bn=False, use_sync_bn=False,
activation='relu', activation='relu',
norm_momentum=0.99, norm_momentum=0.99,
......
...@@ -133,8 +133,8 @@ class StochasticDepth(tf.keras.layers.Layer): ...@@ -133,8 +133,8 @@ class StochasticDepth(tf.keras.layers.Layer):
def call(self, inputs, training=None): def call(self, inputs, training=None):
if training is None: if training is None:
is_training = tf.keras.backend.learning_phase() training = tf.keras.backend.learning_phase()
if not is_training or self._drop_rate is None or self._drop_rate == 0: if not training or self._drop_rate is None or self._drop_rate == 0:
return inputs return inputs
keep_prob = 1.0 - self._drop_rate keep_prob = 1.0 - self._drop_rate
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment