yt8m.py 8.03 KB
Newer Older
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
1
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
Hye Yoon's avatar
Hye Yoon committed
2
3
4
5
6
7
8
9
10
11
12
13
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
14

Hye Yoon's avatar
Hye Yoon committed
15
"""Video classification configuration definition."""
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
16
import dataclasses
Hye Yoon's avatar
Hye Yoon committed
17
18
from typing import Optional, Tuple
from absl import flags
19

Hye Yoon's avatar
Hye Yoon committed
20
21
from official.core import config_definitions as cfg
from official.core import exp_factory
22
23
from official.modeling import hyperparams
from official.modeling import optimization
Yeqing Li's avatar
Yeqing Li committed
24
from official.vision.configs import common
Hye Yoon's avatar
Hye Yoon committed
25
26
27
28
29
30
31
32
33
34

FLAGS = flags.FLAGS

YT8M_TRAIN_EXAMPLES = 3888919
YT8M_VAL_EXAMPLES = 1112356
# 2/frame -> frame level
# 3/frame -> segment level
YT8M_TRAIN_PATH = 'gs://youtube8m-ml/2/frame/train/train*.tfrecord'
YT8M_VAL_PATH = 'gs://youtube8m-ml/3/frame/validate/validate*.tfrecord'

35

Hye Yoon's avatar
Hye Yoon committed
36
37
@dataclasses.dataclass
class DataConfig(cfg.DataConfig):
38
39
40
41
42
43
44
  """The base configuration for building datasets.

  Attributes:
    name: Dataset name.
    split: dataset split, 'train' or 'valid'.
    feature_sizes: shape(length) of each feature specified in the feature_names.
    feature_names: names of the features in the tf.SequenceExample.
45
46
47
    feature_sources: if the feature from 'context' or 'features'.
    feature_dtypes: dtype of decoded feature.
    feature_from_bytes: decode feature from bytes or as dtype list.
48
    label_fields: name of field to read from tf.SequenceExample.
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
    segment_size: Number of frames in each segment.
    segment_labels: Use segment level label. Default: False, video level label.
    include_video_id: `True` means include video id (string) in the input to
      the model.
    temporal_stride: Not used. Need to deprecated.
    max_frames: Maxim Number of frames in a input example. It is used to crop
      the input in the temporal dimension.
    num_frames: Number of frames in a single input example.
    num_classes: Number of classes to classify. Assuming it is a classification
      task.
    num_devices: Not used. To be deprecated.
    input_path: The path to the input.
    is_training: Whether this data is used for training or not.
    num_examples: Number of examples in the dataset. It is used to compute the
      steps for train or eval. set the value to `-1` to make the experiment run
      until the end of dataset.
65
    file_type: type of input files.
66
  """
Hye Yoon's avatar
Hye Yoon committed
67
  name: Optional[str] = 'yt8m'
68
  split: Optional[str] = None
Hye Yoon's avatar
Hye Yoon committed
69
70
  feature_sizes: Tuple[int, ...] = (1024, 128)
  feature_names: Tuple[str, ...] = ('rgb', 'audio')
71
72
73
  feature_sources: Tuple[str, ...] = ('feature', 'feature')
  feature_dtypes: Tuple[str, ...] = ('uint8', 'uint8')
  feature_from_bytes: Tuple[bool, ...] = (True, True)
74
  label_field: str = 'labels'
Hye Yoon's avatar
Hye Yoon committed
75
76
  segment_size: int = 1
  segment_labels: bool = False
77
  include_video_id: bool = False
Hye Yoon's avatar
Hye Yoon committed
78
79
  temporal_stride: int = 1
  max_frames: int = 300
80
  num_frames: int = 300  # set smaller to allow random sample (Parser)
Hye Yoon's avatar
Hye Yoon committed
81
82
83
84
85
  num_classes: int = 3862
  num_devices: int = 1
  input_path: str = ''
  is_training: bool = True
  num_examples: int = -1
86
  file_type: str = 'tfrecord'
Hye Yoon's avatar
Hye Yoon committed
87
88
89


def yt8m(is_training):
90
  """YT8M dataset configs."""
Hye Yoon's avatar
Hye Yoon committed
91
  return DataConfig(
92
93
94
95
96
97
98
99
      num_frames=30,
      temporal_stride=1,
      segment_labels=False,
      segment_size=5,
      is_training=is_training,
      split='train' if is_training else 'valid',
      num_examples=YT8M_TRAIN_EXAMPLES if is_training else YT8M_VAL_EXAMPLES,
      input_path=YT8M_TRAIN_PATH if is_training else YT8M_VAL_PATH)
Hye Yoon's avatar
Hye Yoon committed
100
101
102


@dataclasses.dataclass
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
103
class MoeModel(hyperparams.Config):
Hye Yoon's avatar
Hye Yoon committed
104
  """The model config."""
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
105
106
107
108
109
110
111
112
113
114
115
  num_mixtures: int = 5
  l2_penalty: float = 1e-5
  use_input_context_gate: bool = False
  use_output_context_gate: bool = False


@dataclasses.dataclass
class DbofModel(hyperparams.Config):
  """The model config."""
  cluster_size: int = 3000
  hidden_size: int = 2000
116
117
  add_batch_norm: bool = True
  sample_random_frames: bool = True
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
118
119
  use_context_gate_cluster_layer: bool = False
  context_gate_cluster_bottleneck_size: int = 0
120
121
  pooling_method: str = 'average'
  yt8m_agg_classifier_model: str = 'MoeModel'
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
122
123
124
  agg_model: hyperparams.Config = MoeModel()
  norm_activation: common.NormActivation = common.NormActivation(
      activation='relu', use_sync_bn=False)
125

Hye Yoon's avatar
Hye Yoon committed
126
127
128
129
130
131

@dataclasses.dataclass
class Losses(hyperparams.Config):
  name: str = 'binary_crossentropy'
  from_logits: bool = False
  label_smoothing: float = 0.0
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
132
  l2_weight_decay: float = 1e-5
Hye Yoon's avatar
Hye Yoon committed
133

134

Hye Yoon's avatar
Hye Yoon committed
135
136
137
@dataclasses.dataclass
class YT8MTask(cfg.TaskConfig):
  """The task config."""
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
138
  model: DbofModel = DbofModel()
Hye Yoon's avatar
Hye Yoon committed
139
140
141
142
143
144
  train_data: DataConfig = yt8m(is_training=True)
  validation_data: DataConfig = yt8m(is_training=False)
  losses: Losses = Losses()
  gradient_clip_norm: float = 1.0
  num_readers: int = 8
  top_k: int = 20
145
146
147
148
149
150
151
  top_n: Optional[int] = None


def add_trainer(
    experiment: cfg.ExperimentConfig,
    train_batch_size: int,
    eval_batch_size: int,
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
152
153
    learning_rate: float = 0.0001,
    train_epochs: int = 50,
Yeqing Li's avatar
Yeqing Li committed
154
155
    num_train_examples: int = YT8M_TRAIN_EXAMPLES,
    num_val_examples: int = YT8M_VAL_EXAMPLES,
156
):
Hye Yoon's avatar
Hye Yoon committed
157
  """Add and config a trainer to the experiment config."""
Yeqing Li's avatar
Yeqing Li committed
158
  if num_train_examples <= 0:
Hye Yoon's avatar
Hye Yoon committed
159
    raise ValueError('Wrong train dataset size {!r}'.format(
160
        experiment.task.train_data))
Yeqing Li's avatar
Yeqing Li committed
161
  if num_val_examples <= 0:
Hye Yoon's avatar
Hye Yoon committed
162
    raise ValueError('Wrong validation dataset size {!r}'.format(
163
        experiment.task.validation_data))
Hye Yoon's avatar
Hye Yoon committed
164
165
  experiment.task.train_data.global_batch_size = train_batch_size
  experiment.task.validation_data.global_batch_size = eval_batch_size
Yeqing Li's avatar
Yeqing Li committed
166
  steps_per_epoch = num_train_examples // train_batch_size
167
  steps_per_loop = 500
Hye Yoon's avatar
Hye Yoon committed
168
  experiment.trainer = cfg.TrainerConfig(
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
169
170
171
      steps_per_loop=steps_per_loop,
      summary_interval=steps_per_loop,
      checkpoint_interval=steps_per_loop,
172
      train_steps=train_epochs * steps_per_epoch,
Yeqing Li's avatar
Yeqing Li committed
173
      validation_steps=num_val_examples // eval_batch_size,
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
174
      validation_interval=steps_per_loop,
175
176
177
178
179
180
181
182
183
184
      optimizer_config=optimization.OptimizationConfig({
          'optimizer': {
              'type': 'adam',
              'adam': {}
          },
          'learning_rate': {
              'type': 'exponential',
              'exponential': {
                  'initial_learning_rate': learning_rate,
                  'decay_rate': 0.95,
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
185
186
                  'decay_steps': int(steps_per_epoch * 1.5),
                  'offset': 500,
187
188
              }
          },
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
189
190
191
192
193
194
195
196
          'warmup': {
              'linear': {
                  'name': 'linear',
                  'warmup_learning_rate': 0,
                  'warmup_steps': 500,
              },
              'type': 'linear',
          }
197
      }))
Hye Yoon's avatar
Hye Yoon committed
198
199
  return experiment

200

Hye Yoon's avatar
Hye Yoon committed
201
202
203
204
@exp_factory.register_config_factory('yt8m_experiment')
def yt8m_experiment() -> cfg.ExperimentConfig:
  """Video classification general."""
  exp_config = cfg.ExperimentConfig(
205
206
207
208
209
210
211
212
213
      runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'),
      task=YT8MTask(),
      trainer=cfg.TrainerConfig(),
      restrictions=[
          'task.train_data.is_training != None',
          'task.validation_data.is_training != None',
          'task.train_data.num_classes == task.validation_data.num_classes',
          'task.train_data.feature_sizes != None',
          'task.train_data.feature_names != None',
214
215
          'task.train_data.feature_sources != None',
          'task.train_data.feature_dtypes != None',
216
217
      ])

A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
218
219
  # Per TPUv3 Core batch size 16GB HBM. `factor` in range(1, 26)
  factor = 1
220
  num_cores = 32  # for TPUv3 4x4
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
221
222
  train_per_core_bs = 32 * factor
  train_bs = train_per_core_bs * num_cores
223
  eval_per_core_bs = 4 * 50  # multiplier<=100
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
224
225
226
227
228
229
230
231
  eval_bs = eval_per_core_bs * num_cores
  # based lr=0.0001 for bs=512
  return add_trainer(
      exp_config,
      train_batch_size=train_bs,
      eval_batch_size=eval_bs,
      learning_rate=0.0001 * (train_bs / 512),
      train_epochs=100)