Commit ff676f0b authored by Vishnu Banna's avatar Vishnu Banna
Browse files

large models

parent cf3bb8cf
# --experiment_type=large_yolo_finetune
# mAP 51.1%
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'float32'
tpu_enable_xla_dynamic_padder: false
task:
model:
input_size: [896, 896, 3]
backbone:
type: 'darknet'
darknet:
model_id: 'csp-large'
max_level: 5
min_level: 3
width_scale: 1.00
depth_scale: 1.00
decoder:
type: yolo_decoder
yolo_decoder:
version: v4
type: csp_large
head:
smart_bias: true
detection_generator:
box_type:
'all': scaled
scale_xy:
'all': 2.0
max_boxes: 300
nms_type: iou
iou_thresh: 0.001
nms_thresh: 0.65
loss:
use_scaled_loss: true
update_on_repeat: true
box_loss_type:
'all': ciou
ignore_thresh:
'all': 0.0
iou_normalizer:
'all': 0.05
cls_normalizer:
'all': 0.5
object_normalizer:
'5': 0.4
'4': 1.0
'3': 4.0
objectness_smooth:
'all': 1.0
norm_activation:
use_sync_bn: true
num_classes: 80
anchor_boxes:
anchors_per_scale: 4
boxes: [box: [13,17], box: [31,25], box: [24,51], box: [61,45],
box: [48,102], box: [119,96], box: [97,189], box: [217,184],
box: [171,384], box: [324,451], box: [616,618], box: [800,800]]
train_data:
input_path: '/readahead/200M/placer/prod/home/tensorflow-performance-data/datasets/coco/train*'
shuffle_buffer_size: 10000
parser:
mosaic:
mosaic_frequency: 1.0
mixup_frequency: 0.2
mosaic_crop_mode: 'scale'
mosaic_center: 0.0
aug_scale_min: 0.2
aug_scale_max: 1.8
max_num_instances: 300
letter_box: true
random_flip: true
aug_rand_translate: 0.5
area_thresh: 0.1
validation_data:
input_path: '/readahead/200M/placer/prod/home/tensorflow-performance-data/datasets/coco/val*'
# --experiment_type=large_yolo_finetune
# mAP 54.4%
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'float32'
tpu_enable_xla_dynamic_padder: false
task:
model:
input_size: [1280, 1280, 3]
backbone:
type: 'darknet'
darknet:
model_id: 'csp-large'
max_level: 6
min_level: 3
width_scale: 1.00
depth_scale: 1.00
decoder:
type: yolo_decoder
yolo_decoder:
version: v4
type: csp_large
head:
smart_bias: true
detection_generator:
box_type:
'all': scaled
scale_xy:
'all': 2.0
max_boxes: 300
nms_type: iou
iou_thresh: 0.001
nms_thresh: 0.65
loss:
use_scaled_loss: true
update_on_repeat: true
box_loss_type:
'all': ciou
ignore_thresh:
'all': 0.0
iou_normalizer:
'all': 0.05
cls_normalizer:
'all': 0.5
object_normalizer:
'6': 0.1
'5': 0.4
'4': 1.0
'3': 4.0
objectness_smooth:
'all': 1.0
norm_activation:
use_sync_bn: true
num_classes: 80
anchor_boxes:
anchors_per_scale: 4
boxes: [box: [13,17], box: [31,25], box: [24,51], box: [61,45],
box: [61,45], box: [48,102], box: [119,96], box: [97,189],
box: [97,189], box: [217,184], box: [171,384], box: [324,451],
box: [324,451], box: [545,357], box: [616,618], box: [1024,1024]]
train_data:
input_path: '/readahead/200M/placer/prod/home/tensorflow-performance-data/datasets/coco/train*'
shuffle_buffer_size: 10000
parser:
mosaic:
mosaic_frequency: 1.0
mixup_frequency: 0.2
mosaic_crop_mode: 'scale'
mosaic_center: 0.0
aug_scale_min: 0.2
aug_scale_max: 1.8
max_num_instances: 300
letter_box: true
random_flip: true
aug_rand_translate: 0.5
area_thresh: 0.1
validation_data:
input_path: '/readahead/200M/placer/prod/home/tensorflow-performance-data/datasets/coco/val*'
# --experiment_type=large_yolo
# mAP 55.3%
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'float32'
tpu_enable_xla_dynamic_padder: false
task:
model:
input_size: [1536, 1536, 3]
backbone:
type: 'darknet'
darknet:
model_id: 'csp-large'
max_level: 7
min_level: 3
width_scale: 1.25
depth_scale: 1.00
decoder:
type: yolo_decoder
yolo_decoder:
version: v4
type: csp_large
head:
smart_bias: true
detection_generator:
box_type:
'all': scaled
scale_xy:
'all': 2.0
max_boxes: 300
nms_type: iou
iou_thresh: 0.001
nms_thresh: 0.65
loss:
use_scaled_loss: true
update_on_repeat: true
box_loss_type:
'all': ciou
ignore_thresh:
'all': 0.0
iou_normalizer:
'all': 0.05
cls_normalizer:
'all': 0.5
object_normalizer:
'7': 0.1
'6': 0.4
'5': 0.5
'4': 1.0
'3': 4.0
objectness_smooth:
'all': 1.0
norm_activation:
use_sync_bn: true
num_classes: 80
anchor_boxes:
anchors_per_scale: 4
boxes: [box: [13,17], box: [22,25], box: [55,41], box: [27,66],
box: [57,88], box: [112,69], box: [69,177], box: [136,138],
box: [136,138], box: [287,114], box: [134,275], box: [268,248],
box: [268,248], box: [232,504], box: [445,416], box: [640,640],
box: [812,393], box: [477,808], box: [1070,908], box: [1408,1408]]
train_data:
input_path: '/readahead/200M/placer/prod/home/tensorflow-performance-data/datasets/coco/train*'
shuffle_buffer_size: 10000
parser:
mosaic:
mosaic_frequency: 1.0
mixup_frequency: 0.2
mosaic_crop_mode: 'scale'
mosaic_center: 0.0
aug_scale_min: 0.2
aug_scale_max: 1.8
max_num_instances: 300
letter_box: true
random_flip: true
aug_rand_translate: 0.5
area_thresh: 0.1
validation_data:
input_path: '/readahead/200M/placer/prod/home/tensorflow-performance-data/datasets/coco/val*'
# --experiment_type=scaled_yolo
# mAP 47.6
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'float32'
tpu_enable_xla_dynamic_padder: false
task:
model:
input_size: [640, 640, 3]
backbone:
type: 'darknet'
darknet:
model_id: 'altered_cspdarknet53'
max_level: 5
min_level: 3
decoder:
type: yolo_decoder
yolo_decoder:
version: v4
type: csp
head:
smart_bias: true
detection_generator:
box_type:
'all': scaled
scale_xy:
'all': 2.0
max_boxes: 300
nms_type: iou
iou_thresh: 0.001
nms_thresh: 0.65
loss:
use_scaled_loss: true
update_on_repeat: true
box_loss_type:
'all': ciou
ignore_thresh:
'all': 0.0
iou_normalizer:
'all': 0.05
cls_normalizer:
'all': 0.3
object_normalizer:
'5': 0.28
'4': 0.70
'3': 2.80
objectness_smooth:
'all': 1.0
norm_activation:
use_sync_bn: true
num_classes: 80
anchor_boxes:
anchors_per_scale: 3
boxes: [box: [12, 16], box: [19, 36], box: [40, 28],
box: [36, 75], box: [76, 55], box: [72, 146],
box: [142, 110], box: [192, 243], box: [459, 401]]
train_data:
input_path: '/readahead/200M/placer/prod/home/tensorflow-performance-data/datasets/coco/train*'
shuffle_buffer_size: 10000
parser:
mosaic:
mosaic_frequency: 1.0
mixup_frequency: 0.0
mosaic_crop_mode: 'scale'
mosaic_center: 0.25
aug_scale_min: 0.1
aug_scale_max: 1.9
max_num_instances: 300
letter_box: true
random_flip: true
aug_rand_translate: 0.1
area_thresh: 0.1
validation_data:
input_path: '/readahead/200M/placer/prod/home/tensorflow-performance-data/datasets/coco/val*'
# --experiment_type=large_yolo
# mAP 50.5%
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'float32'
tpu_enable_xla_dynamic_padder: false
runtime:
distribution_strategy: 'mirrored'
mixed_precision_dtype: 'float16'
tpu_enable_xla_dynamic_padder: false
num_gpus: 1
task:
model:
input_size: [896, 896, 3]
backbone:
type: 'darknet'
darknet:
model_id: 'csp-large'
max_level: 5
min_level: 3
width_scale: 1.00
depth_scale: 1.00
decoder:
type: yolo_decoder
yolo_decoder:
version: v4
type: csp_large
head:
smart_bias: true
detection_generator:
box_type:
'all': scaled
scale_xy:
'all': 2.0
max_boxes: 300
nms_type: iou
iou_thresh: 0.001
nms_thresh: 0.65
loss:
use_scaled_loss: true
update_on_repeat: true
box_loss_type:
'all': ciou
ignore_thresh:
'all': 0.0
iou_normalizer:
'all': 0.05
cls_normalizer:
'all': 0.5
object_normalizer:
'5': 0.4
'4': 1.0
'3': 4.0
objectness_smooth:
'all': 1.0
norm_activation:
use_sync_bn: true
num_classes: 80
anchor_boxes:
anchors_per_scale: 4
boxes: [box: [13,17], box: [31,25], box: [24,51], box: [61,45],
box: [48,102], box: [119,96], box: [97,189], box: [217,184],
box: [171,384], box: [324,451], box: [616,618], box: [800,800]]
train_data:
input_path: '/readahead/200M/placer/prod/home/tensorflow-performance-data/datasets/coco/train*'
shuffle_buffer_size: 10000
parser:
mosaic:
mosaic_frequency: 1.0
mixup_frequency: 0.0
mosaic_crop_mode: 'scale'
mosaic_center: 0.0
aug_scale_min: 0.5
aug_scale_max: 1.5
max_num_instances: 300
letter_box: true
random_flip: true
aug_rand_translate: 0.5
area_thresh: 0.1
validation_data:
input_path: '/readahead/200M/placer/prod/home/tensorflow-performance-data/datasets/coco/val*'
# --experiment_type=large_yolo
# mAP 53.4%
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'float32'
tpu_enable_xla_dynamic_padder: false
task:
model:
input_size: [1280, 1280, 3]
backbone:
type: 'darknet'
darknet:
model_id: 'csp-large'
max_level: 6
min_level: 3
width_scale: 1.00
depth_scale: 1.00
decoder:
type: yolo_decoder
yolo_decoder:
version: v4
type: csp_large
head:
smart_bias: true
detection_generator:
box_type:
'all': scaled
scale_xy:
'all': 2.0
max_boxes: 300
nms_type: iou
iou_thresh: 0.001
nms_thresh: 0.65
loss:
use_scaled_loss: true
update_on_repeat: true
box_loss_type:
'all': ciou
ignore_thresh:
'all': 0.0
iou_normalizer:
'all': 0.05
cls_normalizer:
'all': 0.5
object_normalizer:
'6': 0.1
'5': 0.4
'4': 1.0
'3': 4.0
objectness_smooth:
'all': 1.0
norm_activation:
use_sync_bn: true
num_classes: 80
anchor_boxes:
anchors_per_scale: 4
boxes: [box: [13,17], box: [31,25], box: [24,51], box: [61,45],
box: [61,45], box: [48,102], box: [119,96], box: [97,189],
box: [97,189], box: [217,184], box: [171,384], box: [324,451],
box: [324,451], box: [545,357], box: [616,618], box: [1024,1024]]
train_data:
input_path: '/readahead/200M/placer/prod/home/tensorflow-performance-data/datasets/coco/train*'
shuffle_buffer_size: 10000
parser:
mosaic:
mosaic_frequency: 1.0
mixup_frequency: 0.0
mosaic_crop_mode: 'scale'
mosaic_center: 0.0
aug_scale_min: 0.5
aug_scale_max: 1.5
max_num_instances: 300
letter_box: true
random_flip: true
aug_rand_translate: 0.5
area_thresh: 0.1
validation_data:
input_path: '/readahead/200M/placer/prod/home/tensorflow-performance-data/datasets/coco/val*'
# --experiment_type=large_yolo
# mAP 54.6%
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'float32'
tpu_enable_xla_dynamic_padder: false
task:
model:
input_size: [1536, 1536, 3]
backbone:
type: 'darknet'
darknet:
model_id: 'csp-large'
max_level: 7
min_level: 3
width_scale: 1.25
depth_scale: 1.00
decoder:
type: yolo_decoder
yolo_decoder:
version: v4
type: csp_large
head:
smart_bias: true
detection_generator:
box_type:
'all': scaled
scale_xy:
'all': 2.0
max_boxes: 300
nms_type: iou
iou_thresh: 0.001
nms_thresh: 0.65
loss:
use_scaled_loss: true
update_on_repeat: true
box_loss_type:
'all': ciou
ignore_thresh:
'all': 0.0
iou_normalizer:
'all': 0.05
cls_normalizer:
'all': 0.5
object_normalizer:
'7': 0.1
'6': 0.4
'5': 0.5
'4': 1.0
'3': 4.0
objectness_smooth:
'all': 1.0
norm_activation:
use_sync_bn: true
num_classes: 80
anchor_boxes:
anchors_per_scale: 4
boxes: [box: [13,17], box: [22,25], box: [55,41], box: [27,66],
box: [57,88], box: [112,69], box: [69,177], box: [136,138],
box: [136,138], box: [287,114], box: [134,275], box: [268,248],
box: [268,248], box: [232,504], box: [445,416], box: [640,640],
box: [812,393], box: [477,808], box: [1070,908], box: [1408,1408]]
train_data:
input_path: '/readahead/200M/placer/prod/home/tensorflow-performance-data/datasets/coco/train*'
shuffle_buffer_size: 10000
parser:
mosaic:
mosaic_frequency: 1.0
mixup_frequency: 0.0
mosaic_crop_mode: 'scale'
mosaic_center: 0.0
aug_scale_min: 0.5
aug_scale_max: 1.5
max_num_instances: 300
letter_box: true
random_flip: true
aug_rand_translate: 0.5
area_thresh: 0.1
validation_data:
input_path: '/readahead/200M/placer/prod/home/tensorflow-performance-data/datasets/coco/val*'
runtime:
distribution_strategy: 'mirrored'
mixed_precision_dtype: 'float32'
task:
model:
num_classes: 1001
input_size: [256, 256, 3]
backbone:
type: 'darknet'
darknet:
model_id: 'cspdarknet53'
norm_activation:
activation: 'mish'
losses:
l2_weight_decay: 0.0005
one_hot: true
label_smoothing: 0.1
train_data:
input_path: 'imagenet-2012-tfrecord/train*'
is_training: true
global_batch_size: 128
dtype: 'float16'
validation_data:
input_path: 'imagenet-2012-tfrecord/valid*'
is_training: true
global_batch_size: 128
dtype: 'float16'
drop_remainder: false
trainer:
train_steps: 1200000 # epochs: 120
validation_steps: 400 # size of validation data
validation_interval: 10000
steps_per_loop: 10000
summary_interval: 10000
checkpoint_interval: 10000
optimizer_config:
optimizer:
type: 'sgd'
sgd:
momentum: 0.9
learning_rate:
type: 'polynomial'
polynomial:
initial_learning_rate: 0.1
end_learning_rate: 0.0001
power: 4.0
decay_steps: 1200000
warmup:
type: 'linear'
linear:
warmup_steps: 1000 # learning rate rises from 0 to 0.1 over 1000 steps
......@@ -508,3 +508,226 @@ def scaled_yolo() -> cfg.ExperimentConfig:
])
return config
@exp_factory.register_config_factory('large_yolo')
def large_yolo() -> cfg.ExperimentConfig:
"""COCO object detection with YOLOv4-csp and v4."""
train_batch_size = 64
eval_batch_size = 8
train_epochs = 300
fine_tune_epochs = 450
warmup_epochs = 3
validation_interval = 5
steps_per_epoch = COCO_TRAIN_EXAMPLES // train_batch_size
max_num_instances = 300
config = cfg.ExperimentConfig(
runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'),
task=YoloTask(
smart_bias_lr=0.1,
init_checkpoint_modules='',
annotation_file=None,
weight_decay=0.0,
model=Yolo(
darknet_based_model=False,
norm_activation=common.NormActivation(
activation='mish',
use_sync_bn=True,
norm_epsilon=0.0001,
norm_momentum=0.97),
head=YoloHead(smart_bias=True),
loss=YoloLoss(use_scaled_loss=True)),
train_data=DataConfig(
input_path=os.path.join(COCO_INPUT_PATH_BASE, 'train*'),
is_training=True,
global_batch_size=train_batch_size,
dtype='float32',
parser=Parser(
aug_rand_saturation=0.7,
aug_rand_brightness=0.4,
aug_rand_hue=0.015,
letter_box=True,
use_tie_breaker=True,
best_match_only=True,
anchor_thresh=4.0,
random_pad=False,
area_thresh=0.1,
max_num_instances=max_num_instances,
mosaic=Mosaic(
mosaic_crop_mode='scale',
mosaic_frequency=1.0,
mixup_frequency=0.0,
))),
validation_data=DataConfig(
input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'),
is_training=False,
global_batch_size=eval_batch_size,
drop_remainder=True,
dtype='float32',
parser=Parser(
letter_box=True,
use_tie_breaker=True,
best_match_only=True,
anchor_thresh=4.0,
area_thresh=0.1,
max_num_instances=max_num_instances,
))),
trainer=cfg.TrainerConfig(
train_steps=train_epochs * steps_per_epoch,
validation_steps=COCO_VAL_EXAMPLES // eval_batch_size,
validation_interval=validation_interval * steps_per_epoch,
steps_per_loop=steps_per_epoch,
summary_interval=steps_per_epoch,
checkpoint_interval=steps_per_epoch,
optimizer_config=optimization.OptimizationConfig({
'ema': {
'average_decay': 0.9999,
'trainable_weights_only': False,
'dynamic_decay': True,
},
'optimizer': {
'type': 'sgd_torch',
'sgd_torch': {
'momentum': 0.937,
'momentum_start': 0.9,
'nesterov': True,
'warmup_steps': steps_per_epoch * warmup_epochs,
'weight_decay': 0.0005 * train_batch_size / 64.0,
}
},
'learning_rate': {
'type': 'cosine',
'cosine': {
'initial_learning_rate': 0.01,
'alpha': 0.2,
'decay_steps': fine_tune_epochs * steps_per_epoch,
}
},
'warmup': {
'type': 'linear',
'linear': {
'warmup_steps': steps_per_epoch * warmup_epochs,
'warmup_learning_rate': 0
}
}
})),
restrictions=[
'task.train_data.is_training != None',
'task.validation_data.is_training != None'
])
return config
@exp_factory.register_config_factory('large_yolo_finetune')
def large_yolo_finetune() -> cfg.ExperimentConfig:
"""COCO object detection with YOLOv4-csp and v4."""
train_batch_size = 64
eval_batch_size = 8
train_epochs = 300
fine_tune_epochs = 450
warmup_epochs = 3
validation_interval = 5
steps_per_epoch = COCO_TRAIN_EXAMPLES // train_batch_size
max_num_instances = 300
config = cfg.ExperimentConfig(
runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'),
task=YoloTask(
smart_bias_lr=0.1,
init_checkpoint_modules='',
annotation_file=None,
weight_decay=0.0,
model=Yolo(
darknet_based_model=False,
norm_activation=common.NormActivation(
activation='mish',
use_sync_bn=True,
norm_epsilon=0.0001,
norm_momentum=0.97),
head=YoloHead(smart_bias=True),
loss=YoloLoss(use_scaled_loss=True)),
train_data=DataConfig(
input_path=os.path.join(COCO_INPUT_PATH_BASE, 'train*'),
is_training=True,
global_batch_size=train_batch_size,
dtype='float32',
parser=Parser(
aug_rand_saturation=0.7,
aug_rand_brightness=0.4,
aug_rand_hue=0.015,
letter_box=True,
use_tie_breaker=True,
best_match_only=True,
anchor_thresh=4.0,
random_pad=False,
area_thresh=0.1,
max_num_instances=max_num_instances,
mosaic=Mosaic(
mosaic_crop_mode='scale',
mosaic_frequency=1.0,
mixup_frequency=0.2,
))),
validation_data=DataConfig(
input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'),
is_training=False,
global_batch_size=eval_batch_size,
drop_remainder=True,
dtype='float32',
parser=Parser(
letter_box=True,
use_tie_breaker=True,
best_match_only=True,
anchor_thresh=4.0,
area_thresh=0.1,
max_num_instances=max_num_instances,
))),
trainer=cfg.TrainerConfig(
train_steps=fine_tune_epochs * steps_per_epoch,
validation_steps=COCO_VAL_EXAMPLES // eval_batch_size,
validation_interval=validation_interval * steps_per_epoch,
steps_per_loop=steps_per_epoch,
summary_interval=steps_per_epoch,
checkpoint_interval=steps_per_epoch,
optimizer_config=optimization.OptimizationConfig({
'ema': {
'average_decay': 0.9999,
'trainable_weights_only': False,
'dynamic_decay': True,
},
'optimizer': {
'type': 'sgd_torch',
'sgd_torch': {
'momentum': 0.937,
'momentum_start': 0.9,
'nesterov': True,
'warmup_steps': steps_per_epoch * warmup_epochs,
'weight_decay': 0.0005 * train_batch_size / 64.0,
}
},
'learning_rate': {
'type': 'cosine',
'cosine': {
'initial_learning_rate': 0.01,
'alpha': 0.2,
'decay_steps': fine_tune_epochs * steps_per_epoch,
}
},
'warmup': {
'type': 'linear',
'linear': {
'warmup_steps': steps_per_epoch * warmup_epochs,
'warmup_learning_rate': 0
}
}
})),
restrictions=[
'task.train_data.is_training != None',
'task.validation_data.is_training != None'
])
return config
\ No newline at end of file
......@@ -51,7 +51,7 @@ YOLO_MODELS = {
csp_stack=7,
fpn_depth=7,
path_process_len=8,
fpn_filter_scale=2),
fpn_filter_scale=1),
),
'v3':
dict(
......@@ -349,13 +349,16 @@ class YoloPAN(tf.keras.layers.Layer):
downsample = False
upsample = True
if self._csp_stack == 0:
proc_filters = lambda x: x
resample_filters = lambda x: x // 2
else:
proc_filters = lambda x: x * 2
resample_filters = lambda x: x
for level, depth in zip(self._iterator, self._depths):
if level > 5:
proc_filters = lambda x: x * 2
resample_filters = lambda x: x // 2
elif self._csp_stack == 0:
proc_filters = lambda x: x
resample_filters = lambda x: x // 2
else:
proc_filters = lambda x: x * 2
resample_filters = lambda x: x
if level == self._input:
self.preprocessors[str(level)] = nn_blocks.DarkRouteProcess(
filters=proc_filters(depth),
......@@ -396,7 +399,7 @@ class YoloPAN(tf.keras.layers.Layer):
depths = []
if len(inputs.keys()) > 3 or self._fpn_filter_scale > 1:
for i in range(self._min_level, self._max_level + 1):
depths.append(inputs[str(i)][-1] * 2)
depths.append(inputs[str(i)][-1]) # * 2)
else:
for _ in range(self._min_level, self._max_level + 1):
depths.append(minimum_depth)
......@@ -605,4 +608,4 @@ def build_yolo_decoder(
base_model.update(base_dict)
model = YoloDecoder(input_specs, **base_model, **kwargs)
return model
return model
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment