Commit 76df72b4 authored by Dan Kondratyuk's avatar Dan Kondratyuk Committed by A. Unique TensorFlower
Browse files

Improve MoViNet-Base and MoViNet-Stream training, and add a4 stream.

PiperOrigin-RevId: 374236760
parent 4f5ad013
# Video classification on Kinetics-600 using MoViNet-A0 backbone. # Video classification on Kinetics-600 using MoViNet-A0 backbone.
# --experiment_type=movinet_kinetics600 # --experiment_type=movinet_kinetics600
# Achieves 71.65% Top-1 accuracy. # Achieves 72.28% Top-1 accuracy.
# http://mldash/experiments/4591693621833944103 # http://mldash/experiments/2112621422911359474
runtime: runtime:
distribution_strategy: 'tpu' distribution_strategy: 'tpu'
...@@ -17,7 +17,7 @@ task: ...@@ -17,7 +17,7 @@ task:
stochastic_depth_drop_rate: 0.2 stochastic_depth_drop_rate: 0.2
norm_activation: norm_activation:
use_sync_bn: true use_sync_bn: true
dropout_rate: 0.5 dropout_rate: 0.2
train_data: train_data:
name: kinetics600 name: kinetics600
variant_name: rgb variant_name: rgb
......
# Video classification on Kinetics-600 using MoViNet-A0-Stream backbone. # Video classification on Kinetics-600 using MoViNet-A0-Stream backbone.
# --experiment_type=movinet_kinetics600 # --experiment_type=movinet_kinetics600
# Achieves 69.56% Top-1 accuracy. # Achieves 72.03% Top-1 accuracy.
# http://mldash/experiments/6696393165423234453 # http://mldash/experiments/7841061381580044300
runtime: runtime:
distribution_strategy: 'tpu' distribution_strategy: 'tpu'
...@@ -18,7 +18,7 @@ task: ...@@ -18,7 +18,7 @@ task:
stochastic_depth_drop_rate: 0.2 stochastic_depth_drop_rate: 0.2
norm_activation: norm_activation:
use_sync_bn: true use_sync_bn: true
dropout_rate: 0.5 dropout_rate: 0.2
train_data: train_data:
name: kinetics600 name: kinetics600
variant_name: rgb variant_name: rgb
...@@ -37,7 +37,6 @@ task: ...@@ -37,7 +37,6 @@ task:
aug_max_aspect_ratio: 2.0 aug_max_aspect_ratio: 2.0
aug_min_area_ratio: 0.08 aug_min_area_ratio: 0.08
aug_min_aspect_ratio: 0.5 aug_min_aspect_ratio: 0.5
aug_type: 'autoaug'
validation_data: validation_data:
name: kinetics600 name: kinetics600
feature_shape: !!python/tuple feature_shape: !!python/tuple
......
# Video classification on Kinetics-600 using MoViNet-A1 backbone. # Video classification on Kinetics-600 using MoViNet-A1 backbone.
# --experiment_type=movinet_kinetics600 # --experiment_type=movinet_kinetics600
# Achieves 76.63% Top-1 accuracy. # Achieves 76.69% Top-1 accuracy.
# http://mldash/experiments/6004897086445740406 # http://mldash/experiments/6004897086445740406
runtime: runtime:
......
# Video classification on Kinetics-600 using MoViNet-A1-Stream backbone. # Video classification on Kinetics-600 using MoViNet-A1-Stream backbone.
# --experiment_type=movinet_kinetics600 # --experiment_type=movinet_kinetics600
# Achieves x% Top-1 accuracy. # Achieves 76.45% Top-1 accuracy.
# http://mldash/experiments/ # http://mldash/experiments/2106053499367982379
runtime: runtime:
distribution_strategy: 'tpu' distribution_strategy: 'tpu'
...@@ -15,10 +15,10 @@ task: ...@@ -15,10 +15,10 @@ task:
movinet: movinet:
model_id: 'a1' model_id: 'a1'
causal: true causal: true
stochastic_depth_drop_rate: 0.2
norm_activation: norm_activation:
use_sync_bn: true use_sync_bn: true
dropout_rate: 0.5 dropout_rate: 0.2
stochastic_depth_rate: 0.2
train_data: train_data:
name: kinetics600 name: kinetics600
variant_name: rgb variant_name: rgb
...@@ -37,7 +37,6 @@ task: ...@@ -37,7 +37,6 @@ task:
aug_max_aspect_ratio: 2.0 aug_max_aspect_ratio: 2.0
aug_min_area_ratio: 0.08 aug_min_area_ratio: 0.08
aug_min_aspect_ratio: 0.5 aug_min_aspect_ratio: 0.5
aug_type: 'autoaug'
validation_data: validation_data:
name: kinetics600 name: kinetics600
feature_shape: !!python/tuple feature_shape: !!python/tuple
......
...@@ -15,10 +15,10 @@ task: ...@@ -15,10 +15,10 @@ task:
movinet: movinet:
model_id: 'a2' model_id: 'a2'
causal: true causal: true
stochastic_depth_drop_rate: 0.2
norm_activation: norm_activation:
use_sync_bn: true use_sync_bn: true
dropout_rate: 0.5 dropout_rate: 0.2
stochastic_depth_rate: 0.2
train_data: train_data:
name: kinetics600 name: kinetics600
variant_name: rgb variant_name: rgb
...@@ -37,7 +37,6 @@ task: ...@@ -37,7 +37,6 @@ task:
aug_max_aspect_ratio: 2.0 aug_max_aspect_ratio: 2.0
aug_min_area_ratio: 0.08 aug_min_area_ratio: 0.08
aug_min_aspect_ratio: 0.5 aug_min_aspect_ratio: 0.5
aug_type: 'autoaug'
validation_data: validation_data:
name: kinetics600 name: kinetics600
feature_shape: !!python/tuple feature_shape: !!python/tuple
......
# Video classification on Kinetics-600 using MoViNet-A3-Stream backbone. # Video classification on Kinetics-600 using MoViNet-A3-Stream backbone.
# --experiment_type=movinet_kinetics600 # --experiment_type=movinet_kinetics600
# Achieves x% Top-1 accuracy. # Achieves 80.09% Top-1 accuracy.
# http://mldash/experiments/ # http://mldash/experiments/8515953265355959123
runtime: runtime:
distribution_strategy: 'tpu' distribution_strategy: 'tpu'
...@@ -14,10 +14,12 @@ task: ...@@ -14,10 +14,12 @@ task:
backbone: backbone:
movinet: movinet:
model_id: 'a3' model_id: 'a3'
causal: true
use_positional_encoding: true
stochastic_depth_drop_rate: 0.2
norm_activation: norm_activation:
use_sync_bn: true use_sync_bn: true
dropout_rate: 0.5 dropout_rate: 0.5
stochastic_depth_rate: 0.2
train_data: train_data:
name: kinetics600 name: kinetics600
variant_name: rgb variant_name: rgb
...@@ -36,7 +38,6 @@ task: ...@@ -36,7 +38,6 @@ task:
aug_max_aspect_ratio: 2.0 aug_max_aspect_ratio: 2.0
aug_min_area_ratio: 0.08 aug_min_area_ratio: 0.08
aug_min_aspect_ratio: 0.5 aug_min_aspect_ratio: 0.5
aug_type: 'autoaug'
validation_data: validation_data:
name: kinetics600 name: kinetics600
feature_shape: !!python/tuple feature_shape: !!python/tuple
......
# Video classification on Kinetics-600 using MoViNet-A4 backbone.
# --experiment_type=movinet_kinetics600
# Achieves 81.33% Top-1 accuracy.
# http://mldash/experiments/3621454183108305685
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'bfloat16'
task:
losses:
l2_weight_decay: 0.00003
label_smoothing: 0.1
model:
backbone:
movinet:
model_id: 'a4'
causal: true
use_positional_encoding: true
stochastic_depth_drop_rate: 0.2
norm_activation:
use_sync_bn: true
dropout_rate: 0.5
train_data:
name: kinetics600
variant_name: rgb
feature_shape: !!python/tuple
- 32
- 290
- 290
- 3
temporal_stride: 3
random_stride_range: 1
global_batch_size: 1024
dtype: 'bfloat16'
shuffle_buffer_size: 1024
min_image_size: 320
aug_max_area_ratio: 1.0
aug_max_aspect_ratio: 2.0
aug_min_area_ratio: 0.08
aug_min_aspect_ratio: 0.5
aug_type: 'autoaug'
validation_data:
name: kinetics600
feature_shape: !!python/tuple
- 80
- 290
- 290
- 3
temporal_stride: 3
num_test_clips: 1
num_test_crops: 1
global_batch_size: 64
min_image_size: 320
dtype: 'bfloat16'
drop_remainder: false
trainer:
optimizer_config:
learning_rate:
cosine:
initial_learning_rate: 1.8
decay_steps: 85785
warmup:
linear:
warmup_steps: 2145
optimizer:
type: 'rmsprop'
rmsprop:
rho: 0.9
momentum: 0.9
epsilon: 1.0
clipnorm: 1.0
train_steps: 85785
steps_per_loop: 500
summary_interval: 500
validation_interval: 500
...@@ -36,7 +36,7 @@ task: ...@@ -36,7 +36,7 @@ task:
aug_max_aspect_ratio: 2.0 aug_max_aspect_ratio: 2.0
aug_min_area_ratio: 0.08 aug_min_area_ratio: 0.08
aug_min_aspect_ratio: 0.5 aug_min_aspect_ratio: 0.5
aug_type: 'randaug' aug_type: 'autoaug'
validation_data: validation_data:
name: kinetics600 name: kinetics600
feature_shape: !!python/tuple feature_shape: !!python/tuple
......
...@@ -17,7 +17,7 @@ task: ...@@ -17,7 +17,7 @@ task:
stochastic_depth_drop_rate: 0.2 stochastic_depth_drop_rate: 0.2
norm_activation: norm_activation:
use_sync_bn: true use_sync_bn: true
dropout_rate: 0.5 dropout_rate: 0.2
train_data: train_data:
name: kinetics600 name: kinetics600
variant_name: rgb variant_name: rgb
......
# Video classification on Kinetics-600 using MoViNet-T0-Stream backbone. # Video classification on Kinetics-600 using MoViNet-T0-Stream backbone.
# --experiment_type=movinet_kinetics600 # --experiment_type=movinet_kinetics600
# Achieves x% Top-1 accuracy. # Achieves 67.17% Top-1 accuracy.
# http://mldash/experiments/ # http://mldash/experiments/3540709722174127508
runtime: runtime:
distribution_strategy: 'tpu' distribution_strategy: 'tpu'
...@@ -14,10 +14,11 @@ task: ...@@ -14,10 +14,11 @@ task:
backbone: backbone:
movinet: movinet:
model_id: 't0' model_id: 't0'
causal: true
stochastic_depth_drop_rate: 0.2
norm_activation: norm_activation:
use_sync_bn: true use_sync_bn: true
dropout_rate: 0.5 dropout_rate: 0.2
stochastic_depth_rate: 0.2
train_data: train_data:
name: kinetics600 name: kinetics600
variant_name: rgb variant_name: rgb
......
...@@ -294,8 +294,8 @@ class ConvBlock(tf.keras.layers.Layer): ...@@ -294,8 +294,8 @@ class ConvBlock(tf.keras.layers.Layer):
own batch norm and activation. '3d_2plus1d' is like '2plus1d', but own batch norm and activation. '3d_2plus1d' is like '2plus1d', but
uses two sequential 3D ops instead. uses two sequential 3D ops instead.
use_positional_encoding: add a positional encoding before the temporal use_positional_encoding: add a positional encoding before the temporal
convolution. Assumes `use_2plus1d=True` and `kernel_size[0] > 1`. convolution. Assumes `kernel_size[0] > 1`. Otherwise, this argument
Otherwise, this argument is ignored. is ignored.
use_buffered_input: if True, the input is expected to be padded use_buffered_input: if True, the input is expected to be padded
beforehand. In effect, calling this layer will use 'valid' padding on beforehand. In effect, calling this layer will use 'valid' padding on
the temporal dimension to simulate 'causal' padding. the temporal dimension to simulate 'causal' padding.
...@@ -426,7 +426,7 @@ class ConvBlock(tf.keras.layers.Layer): ...@@ -426,7 +426,7 @@ class ConvBlock(tf.keras.layers.Layer):
use_buffered_input=self._use_buffered_input, use_buffered_input=self._use_buffered_input,
name='conv3d') name='conv3d')
if self._use_positional_encoding and self._conv_temporal is not None: if self._use_positional_encoding and self._kernel_size[0] > 1:
self._pos_encoding = nn_layers.PositionalEncoding() self._pos_encoding = nn_layers.PositionalEncoding()
else: else:
self._pos_encoding = None self._pos_encoding = None
...@@ -451,7 +451,7 @@ class ConvBlock(tf.keras.layers.Layer): ...@@ -451,7 +451,7 @@ class ConvBlock(tf.keras.layers.Layer):
"""Calls the layer with the given inputs.""" """Calls the layer with the given inputs."""
x = inputs x = inputs
if self._pos_encoding is not None: if self._pos_encoding is not None and self._conv_temporal is None:
x = self._pos_encoding(x) x = self._pos_encoding(x)
x = self._conv(x) x = self._conv(x)
......
...@@ -162,5 +162,7 @@ def build_movinet_model( ...@@ -162,5 +162,7 @@ def build_movinet_model(
num_classes=num_classes, num_classes=num_classes,
kernel_regularizer=l2_regularizer, kernel_regularizer=l2_regularizer,
input_specs=input_specs_dict, input_specs=input_specs_dict,
dropout_rate=model_config.dropout_rate) dropout_rate=model_config.dropout_rate,
output_states=model_config.output_states)
return model return model
...@@ -15,7 +15,6 @@ ...@@ -15,7 +15,6 @@
# Lint as: python3 # Lint as: python3
"""Tests for movinet_model.py.""" """Tests for movinet_model.py."""
# Import libraries
from absl.testing import parameterized from absl.testing import parameterized
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
......
...@@ -15,7 +15,6 @@ ...@@ -15,7 +15,6 @@
# Lint as: python3 # Lint as: python3
"""Tests for movinet.py.""" """Tests for movinet.py."""
# Import libraries
from absl.testing import parameterized from absl.testing import parameterized
import tensorflow as tf import tensorflow as tf
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment