Internal change

PiperOrigin-RevId: 353697955

Internal change
PiperOrigin-RevId: 353697955
32dcc1e4 · Yin Cui · A. Unique TensorFlower · 056d83d2 · 32dcc1e4 · 32dcc1e4
Commit 32dcc1e4 authored Jan 25, 2021 by Yin Cui Committed by A. Unique TensorFlower Jan 25, 2021
3 changed files
--- a/official/vision/beta/configs/experiments/video_classification/k400_3d-resnet50_tpu.yaml
+++ b/official/vision/beta/configs/experiments/video_classification/k400_3d-resnet50_tpu.yaml
-# 3D ResNet-50 video classification on Kinetics-400. 75.3% top-1 and 91.2% top-5 accuracy.
+# 3D ResNet-50 video classification on Kinetics-400.
 #
 # --experiment_type=video_classification_kinetics400
-# Expected accuracy on TPU 8x8: 75.1%
+# Expected accuracy: 77.0% top-1, 93.0% top-5.
-# Updated: 2020-12-16
 runtime:
  distribution_strategy: 'tpu'
  mixed_precision_dtype: 'bfloat16'
@@ -15,45 +14,59 @@ task:
      resnet_3d:
        block_specs: !!python/tuple
        - temporal_kernel_sizes: !!python/tuple
-          - 3
+          - 1
-          - 3
+          - 1
-          - 3
+          - 1
          temporal_strides: 1
-          use_self_gating: true
+          use_self_gating: false
        - temporal_kernel_sizes: !!python/tuple
-          - 3
          - 1
-          - 3
+          - 1
+          - 1
          - 1
          temporal_strides: 1
-          use_self_gating: true
+          use_self_gating: false
        - temporal_kernel_sizes: !!python/tuple
          - 3
-          - 1
          - 3
-          - 1
          - 3
-          - 1
+          - 3
+          - 3
+          - 3
          temporal_strides: 1
-          use_self_gating: true
+          use_self_gating: false
        - temporal_kernel_sizes: !!python/tuple
-          - 1
          - 3
-          - 1
+          - 3
+          - 3
          temporal_strides: 1
-          use_self_gating: true
+          use_self_gating: false
        model_id: 50
        stem_conv_temporal_kernel_size: 5
        stem_conv_temporal_stride: 2
-        stem_pool_temporal_stride: 2
+        stem_pool_temporal_stride: 1
  train_data:
    name: kinetics400
+    feature_shape: !!python/tuple
+    - 32
+    - 224
+    - 224
+    - 3
+    temporal_stride: 2
    global_batch_size: 1024
    dtype: 'bfloat16'
    shuffle_buffer_size: 1024
  validation_data:
    name: kinetics400
-    global_batch_size: 32
+    feature_shape: !!python/tuple
+    - 32
+    - 256
+    - 256
+    - 3
+    temporal_stride: 2
+    num_test_clips: 10
+    num_test_crops: 3
+    global_batch_size: 64
    dtype: 'bfloat16'
    drop_remainder: false
 trainer:
@@ -61,11 +74,11 @@ trainer:
    learning_rate:
      cosine:
        initial_learning_rate: 0.8
-        decay_steps: 42000
+        decay_steps: 42104
    warmup:
      linear:
-        warmup_steps: 1050
+        warmup_steps: 1053
-  train_steps: 42000
+  train_steps: 42104
  steps_per_loop: 500
  summary_interval: 500
  validation_interval: 500
--- a/official/vision/beta/configs/experiments/video_classification/k400_slowonly16x4_tpu.yaml
+++ b/official/vision/beta/configs/experiments/video_classification/k400_slowonly16x4_tpu.yaml
+# SlowOnly 16x4 video classification on Kinetics-400.
+#
+# --experiment_type=video_classification_kinetics400
+# Expected accuracy: 75.6% top-1, 92.1% top-5.
+runtime:
+  distribution_strategy: 'tpu'
+  mixed_precision_dtype: 'bfloat16'
+task:
+  model:
+    dropout_rate: 0.5
+    norm_activation:
+      use_sync_bn: false
+    backbone:
+      resnet_3d:
+        block_specs: !!python/tuple
+        - temporal_kernel_sizes: !!python/tuple
+          - 1
+          - 1
+          - 1
+          temporal_strides: 1
+          use_self_gating: false
+        - temporal_kernel_sizes: !!python/tuple
+          - 1
+          - 1
+          - 1
+          - 1
+          temporal_strides: 1
+          use_self_gating: false
+        - temporal_kernel_sizes: !!python/tuple
+          - 3
+          - 3
+          - 3
+          - 3
+          - 3
+          - 3
+          temporal_strides: 1
+          use_self_gating: false
+        - temporal_kernel_sizes: !!python/tuple
+          - 3
+          - 3
+          - 3
+          temporal_strides: 1
+          use_self_gating: false
+        model_id: 50
+        stem_conv_temporal_kernel_size: 1
+        stem_conv_temporal_stride: 1
+        stem_pool_temporal_stride: 1
+  train_data:
+    name: kinetics400
+    feature_shape: !!python/tuple
+    - 16
+    - 224
+    - 224
+    - 3
+    temporal_stride: 4
+    global_batch_size: 1024
+    dtype: 'bfloat16'
+    shuffle_buffer_size: 1024
+  validation_data:
+    name: kinetics400
+    feature_shape: !!python/tuple
+    - 16
+    - 256
+    - 256
+    - 3
+    temporal_stride: 4
+    num_test_clips: 10
+    num_test_crops: 3
+    global_batch_size: 64
+    dtype: 'bfloat16'
+    drop_remainder: false
+trainer:
+  optimizer_config:
+    learning_rate:
+      cosine:
+        initial_learning_rate: 0.8
+        decay_steps: 42104
+    warmup:
+      linear:
+        warmup_steps: 1053
+  train_steps: 42104
+  steps_per_loop: 500
+  summary_interval: 500
+  validation_interval: 500
--- a/official/vision/beta/configs/experiments/video_classification/k400_slowonly8x8_tpu.yaml
+++ b/official/vision/beta/configs/experiments/video_classification/k400_slowonly8x8_tpu.yaml
-# SlowOnly video classification on Kinetics-400. Expected performance to be updated.
+# SlowOnly 8x8 video classification on Kinetics-400.
 #
 # --experiment_type=video_classification_kinetics400
 # Expected accuracy: 74.1% top-1, 91.4% top-5.