yt8m.py 7.34 KB
Newer Older
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
1
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
Hye Yoon's avatar
Hye Yoon committed
2
3
4
5
6
7
8
9
10
11
12
13
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
14

Hye Yoon's avatar
Hye Yoon committed
15
"""Video classification configuration definition."""
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
16
import dataclasses
Hye Yoon's avatar
Hye Yoon committed
17
18
from typing import Optional, Tuple
from absl import flags
19

Hye Yoon's avatar
Hye Yoon committed
20
21
from official.core import config_definitions as cfg
from official.core import exp_factory
22
23
from official.modeling import hyperparams
from official.modeling import optimization
Yeqing Li's avatar
Yeqing Li committed
24
from official.vision.configs import common
Hye Yoon's avatar
Hye Yoon committed
25
26
27
28
29
30
31
32
33
34

FLAGS = flags.FLAGS

YT8M_TRAIN_EXAMPLES = 3888919
YT8M_VAL_EXAMPLES = 1112356
# 2/frame -> frame level
# 3/frame -> segment level
YT8M_TRAIN_PATH = 'gs://youtube8m-ml/2/frame/train/train*.tfrecord'
YT8M_VAL_PATH = 'gs://youtube8m-ml/3/frame/validate/validate*.tfrecord'

35

Hye Yoon's avatar
Hye Yoon committed
36
37
@dataclasses.dataclass
class DataConfig(cfg.DataConfig):
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
  """The base configuration for building datasets.

  Attributes:
    name: Dataset name.
    split: dataset split, 'train' or 'valid'.
    feature_sizes: shape(length) of each feature specified in the feature_names.
    feature_names: names of the features in the tf.SequenceExample.
    segment_size: Number of frames in each segment.
    segment_labels: Use segment level label. Default: False, video level label.
    include_video_id: `True` means include video id (string) in the input to
      the model.
    temporal_stride: Not used. Need to deprecated.
    max_frames: Maxim Number of frames in a input example. It is used to crop
      the input in the temporal dimension.
    num_frames: Number of frames in a single input example.
    num_classes: Number of classes to classify. Assuming it is a classification
      task.
    num_devices: Not used. To be deprecated.
    input_path: The path to the input.
    is_training: Whether this data is used for training or not.
    num_examples: Number of examples in the dataset. It is used to compute the
      steps for train or eval. set the value to `-1` to make the experiment run
      until the end of dataset.
  """
Hye Yoon's avatar
Hye Yoon committed
62
  name: Optional[str] = 'yt8m'
63
  split: Optional[str] = None
Hye Yoon's avatar
Hye Yoon committed
64
65
66
67
  feature_sizes: Tuple[int, ...] = (1024, 128)
  feature_names: Tuple[str, ...] = ('rgb', 'audio')
  segment_size: int = 1
  segment_labels: bool = False
68
  include_video_id: bool = False
Hye Yoon's avatar
Hye Yoon committed
69
70
  temporal_stride: int = 1
  max_frames: int = 300
71
  num_frames: int = 300  # set smaller to allow random sample (Parser)
Hye Yoon's avatar
Hye Yoon committed
72
73
74
75
76
77
78
79
  num_classes: int = 3862
  num_devices: int = 1
  input_path: str = ''
  is_training: bool = True
  num_examples: int = -1


def yt8m(is_training):
80
  """YT8M dataset configs."""
Hye Yoon's avatar
Hye Yoon committed
81
  return DataConfig(
82
83
84
85
86
87
88
89
      num_frames=30,
      temporal_stride=1,
      segment_labels=False,
      segment_size=5,
      is_training=is_training,
      split='train' if is_training else 'valid',
      num_examples=YT8M_TRAIN_EXAMPLES if is_training else YT8M_VAL_EXAMPLES,
      input_path=YT8M_TRAIN_PATH if is_training else YT8M_VAL_PATH)
Hye Yoon's avatar
Hye Yoon committed
90
91
92


@dataclasses.dataclass
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
93
class MoeModel(hyperparams.Config):
Hye Yoon's avatar
Hye Yoon committed
94
  """The model config."""
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
95
96
97
98
99
100
101
102
103
104
105
  num_mixtures: int = 5
  l2_penalty: float = 1e-5
  use_input_context_gate: bool = False
  use_output_context_gate: bool = False


@dataclasses.dataclass
class DbofModel(hyperparams.Config):
  """The model config."""
  cluster_size: int = 3000
  hidden_size: int = 2000
106
107
  add_batch_norm: bool = True
  sample_random_frames: bool = True
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
108
109
  use_context_gate_cluster_layer: bool = False
  context_gate_cluster_bottleneck_size: int = 0
110
111
  pooling_method: str = 'average'
  yt8m_agg_classifier_model: str = 'MoeModel'
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
112
113
114
  agg_model: hyperparams.Config = MoeModel()
  norm_activation: common.NormActivation = common.NormActivation(
      activation='relu', use_sync_bn=False)
115

Hye Yoon's avatar
Hye Yoon committed
116
117
118
119
120
121

@dataclasses.dataclass
class Losses(hyperparams.Config):
  name: str = 'binary_crossentropy'
  from_logits: bool = False
  label_smoothing: float = 0.0
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
122
  l2_weight_decay: float = 1e-5
Hye Yoon's avatar
Hye Yoon committed
123

124

Hye Yoon's avatar
Hye Yoon committed
125
126
127
@dataclasses.dataclass
class YT8MTask(cfg.TaskConfig):
  """The task config."""
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
128
  model: DbofModel = DbofModel()
Hye Yoon's avatar
Hye Yoon committed
129
130
131
132
133
134
  train_data: DataConfig = yt8m(is_training=True)
  validation_data: DataConfig = yt8m(is_training=False)
  losses: Losses = Losses()
  gradient_clip_norm: float = 1.0
  num_readers: int = 8
  top_k: int = 20
135
136
137
138
139
140
141
  top_n: Optional[int] = None


def add_trainer(
    experiment: cfg.ExperimentConfig,
    train_batch_size: int,
    eval_batch_size: int,
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
142
143
    learning_rate: float = 0.0001,
    train_epochs: int = 50,
144
):
Hye Yoon's avatar
Hye Yoon committed
145
146
147
  """Add and config a trainer to the experiment config."""
  if YT8M_TRAIN_EXAMPLES <= 0:
    raise ValueError('Wrong train dataset size {!r}'.format(
148
        experiment.task.train_data))
Hye Yoon's avatar
Hye Yoon committed
149
150
  if YT8M_VAL_EXAMPLES <= 0:
    raise ValueError('Wrong validation dataset size {!r}'.format(
151
        experiment.task.validation_data))
Hye Yoon's avatar
Hye Yoon committed
152
153
154
  experiment.task.train_data.global_batch_size = train_batch_size
  experiment.task.validation_data.global_batch_size = eval_batch_size
  steps_per_epoch = YT8M_TRAIN_EXAMPLES // train_batch_size
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
155
  steps_per_loop = 30
Hye Yoon's avatar
Hye Yoon committed
156
  experiment.trainer = cfg.TrainerConfig(
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
157
158
159
      steps_per_loop=steps_per_loop,
      summary_interval=steps_per_loop,
      checkpoint_interval=steps_per_loop,
160
161
      train_steps=train_epochs * steps_per_epoch,
      validation_steps=YT8M_VAL_EXAMPLES // eval_batch_size,
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
162
      validation_interval=steps_per_loop,
163
164
165
166
167
168
169
170
171
172
      optimizer_config=optimization.OptimizationConfig({
          'optimizer': {
              'type': 'adam',
              'adam': {}
          },
          'learning_rate': {
              'type': 'exponential',
              'exponential': {
                  'initial_learning_rate': learning_rate,
                  'decay_rate': 0.95,
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
173
174
                  'decay_steps': int(steps_per_epoch * 1.5),
                  'offset': 500,
175
176
              }
          },
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
177
178
179
180
181
182
183
184
          'warmup': {
              'linear': {
                  'name': 'linear',
                  'warmup_learning_rate': 0,
                  'warmup_steps': 500,
              },
              'type': 'linear',
          }
185
      }))
Hye Yoon's avatar
Hye Yoon committed
186
187
  return experiment

188

Hye Yoon's avatar
Hye Yoon committed
189
190
191
192
@exp_factory.register_config_factory('yt8m_experiment')
def yt8m_experiment() -> cfg.ExperimentConfig:
  """Video classification general."""
  exp_config = cfg.ExperimentConfig(
193
194
195
196
197
198
199
200
201
202
203
      runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'),
      task=YT8MTask(),
      trainer=cfg.TrainerConfig(),
      restrictions=[
          'task.train_data.is_training != None',
          'task.validation_data.is_training != None',
          'task.train_data.num_classes == task.validation_data.num_classes',
          'task.train_data.feature_sizes != None',
          'task.train_data.feature_names != None',
      ])

A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
204
205
206
207
208
209
210
211
212
213
214
215
216
217
  # Per TPUv3 Core batch size 16GB HBM. `factor` in range(1, 26)
  factor = 1
  num_cores = 32  # for TPU 4x4
  train_per_core_bs = 32 * factor
  train_bs = train_per_core_bs * num_cores
  eval_per_core_bs = 32 * 50  # multiplier<=100
  eval_bs = eval_per_core_bs * num_cores
  # based lr=0.0001 for bs=512
  return add_trainer(
      exp_config,
      train_batch_size=train_bs,
      eval_batch_size=eval_bs,
      learning_rate=0.0001 * (train_bs / 512),
      train_epochs=100)