yt8m.py 7.85 KB
Newer Older
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
1
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
Hye Yoon's avatar
Hye Yoon committed
2
3
4
5
6
7
8
9
10
11
12
13
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
14

Hye Yoon's avatar
Hye Yoon committed
15
"""Video classification configuration definition."""
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
16
import dataclasses
Hye Yoon's avatar
Hye Yoon committed
17
18
from typing import Optional, Tuple
from absl import flags
19

Hye Yoon's avatar
Hye Yoon committed
20
21
from official.core import config_definitions as cfg
from official.core import exp_factory
22
23
from official.modeling import hyperparams
from official.modeling import optimization
Yeqing Li's avatar
Yeqing Li committed
24
from official.vision.configs import common
Hye Yoon's avatar
Hye Yoon committed
25
26
27
28
29
30
31
32
33
34

FLAGS = flags.FLAGS

YT8M_TRAIN_EXAMPLES = 3888919
YT8M_VAL_EXAMPLES = 1112356
# 2/frame -> frame level
# 3/frame -> segment level
YT8M_TRAIN_PATH = 'gs://youtube8m-ml/2/frame/train/train*.tfrecord'
YT8M_VAL_PATH = 'gs://youtube8m-ml/3/frame/validate/validate*.tfrecord'

35

Hye Yoon's avatar
Hye Yoon committed
36
37
@dataclasses.dataclass
class DataConfig(cfg.DataConfig):
38
39
40
41
42
43
44
  """The base configuration for building datasets.

  Attributes:
    name: Dataset name.
    split: dataset split, 'train' or 'valid'.
    feature_sizes: shape(length) of each feature specified in the feature_names.
    feature_names: names of the features in the tf.SequenceExample.
45
46
47
    feature_sources: if the feature from 'context' or 'features'.
    feature_dtypes: dtype of decoded feature.
    feature_from_bytes: decode feature from bytes or as dtype list.
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
    segment_size: Number of frames in each segment.
    segment_labels: Use segment level label. Default: False, video level label.
    include_video_id: `True` means include video id (string) in the input to
      the model.
    temporal_stride: Not used. Need to deprecated.
    max_frames: Maxim Number of frames in a input example. It is used to crop
      the input in the temporal dimension.
    num_frames: Number of frames in a single input example.
    num_classes: Number of classes to classify. Assuming it is a classification
      task.
    num_devices: Not used. To be deprecated.
    input_path: The path to the input.
    is_training: Whether this data is used for training or not.
    num_examples: Number of examples in the dataset. It is used to compute the
      steps for train or eval. set the value to `-1` to make the experiment run
      until the end of dataset.
64
    file_type: type of input files.
65
  """
Hye Yoon's avatar
Hye Yoon committed
66
  name: Optional[str] = 'yt8m'
67
  split: Optional[str] = None
Hye Yoon's avatar
Hye Yoon committed
68
69
  feature_sizes: Tuple[int, ...] = (1024, 128)
  feature_names: Tuple[str, ...] = ('rgb', 'audio')
70
71
72
  feature_sources: Tuple[str, ...] = ('feature', 'feature')
  feature_dtypes: Tuple[str, ...] = ('uint8', 'uint8')
  feature_from_bytes: Tuple[bool, ...] = (True, True)
Hye Yoon's avatar
Hye Yoon committed
73
74
  segment_size: int = 1
  segment_labels: bool = False
75
  include_video_id: bool = False
Hye Yoon's avatar
Hye Yoon committed
76
77
  temporal_stride: int = 1
  max_frames: int = 300
78
  num_frames: int = 300  # set smaller to allow random sample (Parser)
Hye Yoon's avatar
Hye Yoon committed
79
80
81
82
83
  num_classes: int = 3862
  num_devices: int = 1
  input_path: str = ''
  is_training: bool = True
  num_examples: int = -1
84
  file_type: str = 'tfrecord'
Hye Yoon's avatar
Hye Yoon committed
85
86
87


def yt8m(is_training):
88
  """YT8M dataset configs."""
Hye Yoon's avatar
Hye Yoon committed
89
  return DataConfig(
90
91
92
93
94
95
96
97
      num_frames=30,
      temporal_stride=1,
      segment_labels=False,
      segment_size=5,
      is_training=is_training,
      split='train' if is_training else 'valid',
      num_examples=YT8M_TRAIN_EXAMPLES if is_training else YT8M_VAL_EXAMPLES,
      input_path=YT8M_TRAIN_PATH if is_training else YT8M_VAL_PATH)
Hye Yoon's avatar
Hye Yoon committed
98
99
100


@dataclasses.dataclass
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
101
class MoeModel(hyperparams.Config):
Hye Yoon's avatar
Hye Yoon committed
102
  """The model config."""
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
103
104
105
106
107
108
109
110
111
112
113
  num_mixtures: int = 5
  l2_penalty: float = 1e-5
  use_input_context_gate: bool = False
  use_output_context_gate: bool = False


@dataclasses.dataclass
class DbofModel(hyperparams.Config):
  """The model config."""
  cluster_size: int = 3000
  hidden_size: int = 2000
114
115
  add_batch_norm: bool = True
  sample_random_frames: bool = True
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
116
117
  use_context_gate_cluster_layer: bool = False
  context_gate_cluster_bottleneck_size: int = 0
118
119
  pooling_method: str = 'average'
  yt8m_agg_classifier_model: str = 'MoeModel'
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
120
121
122
  agg_model: hyperparams.Config = MoeModel()
  norm_activation: common.NormActivation = common.NormActivation(
      activation='relu', use_sync_bn=False)
123

Hye Yoon's avatar
Hye Yoon committed
124
125
126
127
128
129

@dataclasses.dataclass
class Losses(hyperparams.Config):
  name: str = 'binary_crossentropy'
  from_logits: bool = False
  label_smoothing: float = 0.0
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
130
  l2_weight_decay: float = 1e-5
Hye Yoon's avatar
Hye Yoon committed
131

132

Hye Yoon's avatar
Hye Yoon committed
133
134
135
@dataclasses.dataclass
class YT8MTask(cfg.TaskConfig):
  """The task config."""
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
136
  model: DbofModel = DbofModel()
Hye Yoon's avatar
Hye Yoon committed
137
138
139
140
141
142
  train_data: DataConfig = yt8m(is_training=True)
  validation_data: DataConfig = yt8m(is_training=False)
  losses: Losses = Losses()
  gradient_clip_norm: float = 1.0
  num_readers: int = 8
  top_k: int = 20
143
144
145
146
147
148
149
  top_n: Optional[int] = None


def add_trainer(
    experiment: cfg.ExperimentConfig,
    train_batch_size: int,
    eval_batch_size: int,
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
150
151
    learning_rate: float = 0.0001,
    train_epochs: int = 50,
152
):
Hye Yoon's avatar
Hye Yoon committed
153
154
155
  """Add and config a trainer to the experiment config."""
  if YT8M_TRAIN_EXAMPLES <= 0:
    raise ValueError('Wrong train dataset size {!r}'.format(
156
        experiment.task.train_data))
Hye Yoon's avatar
Hye Yoon committed
157
158
  if YT8M_VAL_EXAMPLES <= 0:
    raise ValueError('Wrong validation dataset size {!r}'.format(
159
        experiment.task.validation_data))
Hye Yoon's avatar
Hye Yoon committed
160
161
162
  experiment.task.train_data.global_batch_size = train_batch_size
  experiment.task.validation_data.global_batch_size = eval_batch_size
  steps_per_epoch = YT8M_TRAIN_EXAMPLES // train_batch_size
163
  steps_per_loop = 500
Hye Yoon's avatar
Hye Yoon committed
164
  experiment.trainer = cfg.TrainerConfig(
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
165
166
167
      steps_per_loop=steps_per_loop,
      summary_interval=steps_per_loop,
      checkpoint_interval=steps_per_loop,
168
169
      train_steps=train_epochs * steps_per_epoch,
      validation_steps=YT8M_VAL_EXAMPLES // eval_batch_size,
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
170
      validation_interval=steps_per_loop,
171
172
173
174
175
176
177
178
179
180
      optimizer_config=optimization.OptimizationConfig({
          'optimizer': {
              'type': 'adam',
              'adam': {}
          },
          'learning_rate': {
              'type': 'exponential',
              'exponential': {
                  'initial_learning_rate': learning_rate,
                  'decay_rate': 0.95,
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
181
182
                  'decay_steps': int(steps_per_epoch * 1.5),
                  'offset': 500,
183
184
              }
          },
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
185
186
187
188
189
190
191
192
          'warmup': {
              'linear': {
                  'name': 'linear',
                  'warmup_learning_rate': 0,
                  'warmup_steps': 500,
              },
              'type': 'linear',
          }
193
      }))
Hye Yoon's avatar
Hye Yoon committed
194
195
  return experiment

196

Hye Yoon's avatar
Hye Yoon committed
197
198
199
200
@exp_factory.register_config_factory('yt8m_experiment')
def yt8m_experiment() -> cfg.ExperimentConfig:
  """Video classification general."""
  exp_config = cfg.ExperimentConfig(
201
202
203
204
205
206
207
208
209
      runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'),
      task=YT8MTask(),
      trainer=cfg.TrainerConfig(),
      restrictions=[
          'task.train_data.is_training != None',
          'task.validation_data.is_training != None',
          'task.train_data.num_classes == task.validation_data.num_classes',
          'task.train_data.feature_sizes != None',
          'task.train_data.feature_names != None',
210
211
          'task.train_data.feature_sources != None',
          'task.train_data.feature_dtypes != None',
212
213
      ])

A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
214
215
  # Per TPUv3 Core batch size 16GB HBM. `factor` in range(1, 26)
  factor = 1
216
  num_cores = 32  # for TPUv3 4x4
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
217
218
  train_per_core_bs = 32 * factor
  train_bs = train_per_core_bs * num_cores
219
  eval_per_core_bs = 4 * 50  # multiplier<=100
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
220
221
222
223
224
225
226
227
  eval_bs = eval_per_core_bs * num_cores
  # based lr=0.0001 for bs=512
  return add_trainer(
      exp_config,
      train_batch_size=train_bs,
      eval_batch_size=eval_bs,
      learning_rate=0.0001 * (train_bs / 512),
      train_epochs=100)