yt8m.py 8.18 KB
Newer Older
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
1
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
Hye Yoon's avatar
Hye Yoon committed
2
3
4
5
6
7
8
9
10
11
12
13
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
14

Hye Yoon's avatar
Hye Yoon committed
15
"""Video classification configuration definition."""
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
16
import dataclasses
Hye Yoon's avatar
Hye Yoon committed
17
18
from typing import Optional, Tuple
from absl import flags
19

Hye Yoon's avatar
Hye Yoon committed
20
21
from official.core import config_definitions as cfg
from official.core import exp_factory
22
23
from official.modeling import hyperparams
from official.modeling import optimization
Yeqing Li's avatar
Yeqing Li committed
24
from official.vision.configs import common
Hye Yoon's avatar
Hye Yoon committed
25
26
27
28
29
30
31
32
33
34

FLAGS = flags.FLAGS

YT8M_TRAIN_EXAMPLES = 3888919
YT8M_VAL_EXAMPLES = 1112356
# 2/frame -> frame level
# 3/frame -> segment level
YT8M_TRAIN_PATH = 'gs://youtube8m-ml/2/frame/train/train*.tfrecord'
YT8M_VAL_PATH = 'gs://youtube8m-ml/3/frame/validate/validate*.tfrecord'

35

Hye Yoon's avatar
Hye Yoon committed
36
37
@dataclasses.dataclass
class DataConfig(cfg.DataConfig):
38
39
40
41
42
43
44
  """The base configuration for building datasets.

  Attributes:
    name: Dataset name.
    split: dataset split, 'train' or 'valid'.
    feature_sizes: shape(length) of each feature specified in the feature_names.
    feature_names: names of the features in the tf.SequenceExample.
45
46
47
    feature_sources: if the feature from 'context' or 'features'.
    feature_dtypes: dtype of decoded feature.
    feature_from_bytes: decode feature from bytes or as dtype list.
48
    label_fields: name of field to read from tf.SequenceExample.
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
    segment_size: Number of frames in each segment.
    segment_labels: Use segment level label. Default: False, video level label.
    include_video_id: `True` means include video id (string) in the input to
      the model.
    temporal_stride: Not used. Need to deprecated.
    max_frames: Maxim Number of frames in a input example. It is used to crop
      the input in the temporal dimension.
    num_frames: Number of frames in a single input example.
    num_classes: Number of classes to classify. Assuming it is a classification
      task.
    num_devices: Not used. To be deprecated.
    input_path: The path to the input.
    is_training: Whether this data is used for training or not.
    num_examples: Number of examples in the dataset. It is used to compute the
      steps for train or eval. set the value to `-1` to make the experiment run
      until the end of dataset.
65
    file_type: type of input files.
66
  """
Hye Yoon's avatar
Hye Yoon committed
67
  name: Optional[str] = 'yt8m'
68
  split: Optional[str] = None
Hye Yoon's avatar
Hye Yoon committed
69
70
  feature_sizes: Tuple[int, ...] = (1024, 128)
  feature_names: Tuple[str, ...] = ('rgb', 'audio')
71
72
73
  feature_sources: Tuple[str, ...] = ('feature', 'feature')
  feature_dtypes: Tuple[str, ...] = ('uint8', 'uint8')
  feature_from_bytes: Tuple[bool, ...] = (True, True)
74
  label_field: str = 'labels'
Hye Yoon's avatar
Hye Yoon committed
75
76
  segment_size: int = 1
  segment_labels: bool = False
77
  include_video_id: bool = False
Hye Yoon's avatar
Hye Yoon committed
78
79
  temporal_stride: int = 1
  max_frames: int = 300
80
  num_frames: int = 300  # set smaller to allow random sample (Parser)
Hye Yoon's avatar
Hye Yoon committed
81
82
83
84
85
  num_classes: int = 3862
  num_devices: int = 1
  input_path: str = ''
  is_training: bool = True
  num_examples: int = -1
86
  file_type: str = 'tfrecord'
Hye Yoon's avatar
Hye Yoon committed
87
88
89


def yt8m(is_training):
90
  """YT8M dataset configs."""
Yeqing Li's avatar
Yeqing Li committed
91
  # pylint: disable=unexpected-keyword-arg
Hye Yoon's avatar
Hye Yoon committed
92
  return DataConfig(
93
94
95
96
97
98
      num_frames=30,
      temporal_stride=1,
      segment_labels=False,
      segment_size=5,
      is_training=is_training,
      split='train' if is_training else 'valid',
Yeqing Li's avatar
Yeqing Li committed
99
      drop_remainder=is_training,  # pytype: disable=wrong-keyword-args
100
101
      num_examples=YT8M_TRAIN_EXAMPLES if is_training else YT8M_VAL_EXAMPLES,
      input_path=YT8M_TRAIN_PATH if is_training else YT8M_VAL_PATH)
Yeqing Li's avatar
Yeqing Li committed
102
  # pylint: enable=unexpected-keyword-arg
Hye Yoon's avatar
Hye Yoon committed
103
104
105


@dataclasses.dataclass
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
106
class MoeModel(hyperparams.Config):
Hye Yoon's avatar
Hye Yoon committed
107
  """The model config."""
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
108
109
110
111
112
113
114
115
116
117
118
  num_mixtures: int = 5
  l2_penalty: float = 1e-5
  use_input_context_gate: bool = False
  use_output_context_gate: bool = False


@dataclasses.dataclass
class DbofModel(hyperparams.Config):
  """The model config."""
  cluster_size: int = 3000
  hidden_size: int = 2000
119
120
  add_batch_norm: bool = True
  sample_random_frames: bool = True
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
121
122
  use_context_gate_cluster_layer: bool = False
  context_gate_cluster_bottleneck_size: int = 0
123
124
  pooling_method: str = 'average'
  yt8m_agg_classifier_model: str = 'MoeModel'
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
125
126
127
  agg_model: hyperparams.Config = MoeModel()
  norm_activation: common.NormActivation = common.NormActivation(
      activation='relu', use_sync_bn=False)
128

Hye Yoon's avatar
Hye Yoon committed
129
130
131
132
133
134

@dataclasses.dataclass
class Losses(hyperparams.Config):
  name: str = 'binary_crossentropy'
  from_logits: bool = False
  label_smoothing: float = 0.0
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
135
  l2_weight_decay: float = 1e-5
Hye Yoon's avatar
Hye Yoon committed
136

137

Hye Yoon's avatar
Hye Yoon committed
138
139
140
@dataclasses.dataclass
class YT8MTask(cfg.TaskConfig):
  """The task config."""
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
141
  model: DbofModel = DbofModel()
Hye Yoon's avatar
Hye Yoon committed
142
143
144
145
146
147
  train_data: DataConfig = yt8m(is_training=True)
  validation_data: DataConfig = yt8m(is_training=False)
  losses: Losses = Losses()
  gradient_clip_norm: float = 1.0
  num_readers: int = 8
  top_k: int = 20
148
149
150
151
152
153
154
  top_n: Optional[int] = None


def add_trainer(
    experiment: cfg.ExperimentConfig,
    train_batch_size: int,
    eval_batch_size: int,
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
155
156
    learning_rate: float = 0.0001,
    train_epochs: int = 50,
Yeqing Li's avatar
Yeqing Li committed
157
158
    num_train_examples: int = YT8M_TRAIN_EXAMPLES,
    num_val_examples: int = YT8M_VAL_EXAMPLES,
159
):
Hye Yoon's avatar
Hye Yoon committed
160
  """Add and config a trainer to the experiment config."""
Yeqing Li's avatar
Yeqing Li committed
161
  if num_train_examples <= 0:
Hye Yoon's avatar
Hye Yoon committed
162
    raise ValueError('Wrong train dataset size {!r}'.format(
163
        experiment.task.train_data))
Yeqing Li's avatar
Yeqing Li committed
164
  if num_val_examples <= 0:
Hye Yoon's avatar
Hye Yoon committed
165
    raise ValueError('Wrong validation dataset size {!r}'.format(
166
        experiment.task.validation_data))
Hye Yoon's avatar
Hye Yoon committed
167
168
  experiment.task.train_data.global_batch_size = train_batch_size
  experiment.task.validation_data.global_batch_size = eval_batch_size
Yeqing Li's avatar
Yeqing Li committed
169
  steps_per_epoch = num_train_examples // train_batch_size
170
  steps_per_loop = 500
Hye Yoon's avatar
Hye Yoon committed
171
  experiment.trainer = cfg.TrainerConfig(
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
172
173
174
      steps_per_loop=steps_per_loop,
      summary_interval=steps_per_loop,
      checkpoint_interval=steps_per_loop,
175
      train_steps=train_epochs * steps_per_epoch,
Yeqing Li's avatar
Yeqing Li committed
176
      validation_steps=num_val_examples // eval_batch_size,
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
177
      validation_interval=steps_per_loop,
178
179
180
181
182
183
184
185
186
187
      optimizer_config=optimization.OptimizationConfig({
          'optimizer': {
              'type': 'adam',
              'adam': {}
          },
          'learning_rate': {
              'type': 'exponential',
              'exponential': {
                  'initial_learning_rate': learning_rate,
                  'decay_rate': 0.95,
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
188
189
                  'decay_steps': int(steps_per_epoch * 1.5),
                  'offset': 500,
190
191
              }
          },
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
192
193
194
195
196
197
198
199
          'warmup': {
              'linear': {
                  'name': 'linear',
                  'warmup_learning_rate': 0,
                  'warmup_steps': 500,
              },
              'type': 'linear',
          }
200
      }))
Hye Yoon's avatar
Hye Yoon committed
201
202
  return experiment

203

Hye Yoon's avatar
Hye Yoon committed
204
205
206
207
@exp_factory.register_config_factory('yt8m_experiment')
def yt8m_experiment() -> cfg.ExperimentConfig:
  """Video classification general."""
  exp_config = cfg.ExperimentConfig(
208
209
210
211
212
213
214
215
216
      runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'),
      task=YT8MTask(),
      trainer=cfg.TrainerConfig(),
      restrictions=[
          'task.train_data.is_training != None',
          'task.validation_data.is_training != None',
          'task.train_data.num_classes == task.validation_data.num_classes',
          'task.train_data.feature_sizes != None',
          'task.train_data.feature_names != None',
217
218
          'task.train_data.feature_sources != None',
          'task.train_data.feature_dtypes != None',
219
220
      ])

A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
221
222
  # Per TPUv3 Core batch size 16GB HBM. `factor` in range(1, 26)
  factor = 1
223
  num_cores = 32  # for TPUv3 4x4
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
224
225
  train_per_core_bs = 32 * factor
  train_bs = train_per_core_bs * num_cores
226
  eval_per_core_bs = 4 * 50  # multiplier<=100
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
227
228
229
230
231
232
233
234
  eval_bs = eval_per_core_bs * num_cores
  # based lr=0.0001 for bs=512
  return add_trainer(
      exp_config,
      train_batch_size=train_bs,
      eval_batch_size=eval_bs,
      learning_rate=0.0001 * (train_bs / 512),
      train_epochs=100)