Commit d4fb52e7 authored by Vishnu Banna's avatar Vishnu Banna
Browse files

model builds

parent c631af40
...@@ -16,6 +16,21 @@ ...@@ -16,6 +16,21 @@
# pylint: disable=unused-import # pylint: disable=unused-import
from official.common import registry_imports from official.common import registry_imports
# import configs
from official.vision.beta.projects.yolo.configs import darknet_classification from official.vision.beta.projects.yolo.configs import darknet_classification
from official.vision.beta.projects.yolo.configs import yolo
# import modeling components
from official.vision.beta.projects.yolo.modeling.backbones import darknet from official.vision.beta.projects.yolo.modeling.backbones import darknet
from official.vision.beta.projects.yolo.modeling.decoders import yolo_decoder
# import tasks
from official.vision.beta.projects.yolo.tasks import image_classification from official.vision.beta.projects.yolo.tasks import image_classification
from official.vision.beta.projects.yolo.tasks import yolo
# import optimization packages
from official.vision.beta.projects.yolo.optimization import optimizer_factory
from official.vision.beta.projects.yolo.optimization.configs import learning_rate_config
from official.vision.beta.projects.yolo.optimization.configs import optimization_config
from official.vision.beta.projects.yolo.optimization.configs import optimizer_config
\ No newline at end of file
"""Backbones configurations."""
# Import libraries
import dataclasses
from typing import Optional, List
from official.modeling import hyperparams
from official.vision.beta.configs import decoders
@dataclasses.dataclass
class YoloDecoder(hyperparams.Config):
"""if the name is specified, or version is specified we ignore
input parameters and use version and name defaults"""
version: Optional[str] = None
type: Optional[str] = None
use_fpn: Optional[bool] = None
use_spatial_attention: bool = False
use_separable_conv: bool = False
csp_stack: Optional[bool] = None
fpn_depth: Optional[int] = None
fpn_filter_scale: Optional[int] = None
path_process_len: Optional[int] = None
max_level_process_len: Optional[int] = None
embed_spp: Optional[bool] = None
activation: Optional[str] = 'same'
@dataclasses.dataclass
class Decoder(decoders.Decoder):
type: Optional[str] = 'yolo_decoder'
yolo_decoder: YoloDecoder = YoloDecoder()
runtime:
distribution_strategy: 'mirrored'
mixed_precision_dtype: 'float16'
num_gpus: 1
task:
smart_bias_lr: 0.0
model:
darknet_based_model: True
input_size: [512, 512, 3]
backbone:
type: 'darknet'
darknet:
model_id: 'darknet53'
max_level: 5
min_level: 3
decoder:
type: yolo_decoder
yolo_decoder:
version: v3
type: regular
head:
smart_bias: true
detection_generator:
box_type:
'all': original
scale_xy:
'5': 1.05
'4': 1.1
'3': 1.2
max_boxes: 200
nms_type: greedy
iou_thresh: 0.25
nms_thresh: 0.45
pre_nms_points: 500
loss:
use_scaled_loss: False
box_loss_type:
'all': ciou
ignore_thresh:
'all': 0.7
iou_normalizer:
'all': 0.07
cls_normalizer:
'all': 1.0
obj_normalizer:
'all': 1.0
objectness_smooth:
'all': 0.0
max_delta:
'all': 5.0
norm_activation:
activation: leaky
norm_epsilon: 0.0001
norm_momentum: 0.99
use_sync_bn: true
num_classes: 80
anchor_boxes:
anchors_per_scale: 3
boxes: [box: [12, 16], box: [19, 36], box: [40, 28],
box: [36, 75], box: [76, 55], box: [72, 146],
box: [142, 110], box: [192, 243], box: [459, 401]]
train_data:
global_batch_size: 64
dtype: float32
input_path: 'gs://cam2-datasets/coco/train*'
is_training: true
drop_remainder: true
seed: 1000
parser:
mosaic:
mosaic_frequency: 0.75
mixup_frequency: 0.0
mosaic_crop_mode: 'crop'
mosaic_center: 0.2
aug_scale_min: 0.2
aug_scale_max: 1.6
jitter: 0.3
max_num_instances: 200
letter_box: False
random_flip: True
aug_rand_saturation: 1.5
aug_rand_brightness: 1.5
aug_rand_hue: 0.1
aug_scale_min: 0.1
aug_scale_max: 1.9
aug_rand_translate: 0.0
jitter: 0.3
area_thresh: 0.1
random_pad: True
use_tie_breaker: True
anchor_thresh: 0.213
validation_data:
global_batch_size: 8
dtype: float32
input_path: 'gs://cam2-datasets/coco/val*'
is_training: false
drop_remainder: true
parser:
max_num_instances: 200
letter_box: False
use_tie_breaker: True
anchor_thresh: 0.213
weight_decay: 0.000
init_checkpoint: 'gs://tensorflow2/darknet/cspdarknet53-golden'
init_checkpoint_modules: 'backbone'
annotation_file: null
trainer:
train_steps: 500500 # 160 epochs at 64 batchsize -> 500500 * 64/2
validation_steps: 625
steps_per_loop: 1850
summary_interval: 1850
validation_interval: 9250
checkpoint_interval: 1850
optimizer_config:
ema:
average_decay: 0.9998
trainable_weights_only: False
dynamic_decay: True
learning_rate:
type: stepwise
stepwise:
boundaries: [400000, 450000]
name: PiecewiseConstantDecay
values: [0.00131, 0.000131, 0.0000131]
optimizer:
type: sgd_torch
sgd_torch:
momentum: 0.949
momentum_start: 0.949
nesterov: True
warmup_steps: 1000
weight_decay: 0.0005
sim_torch: true
name: SGD
warmup:
type: 'linear'
linear:
warmup_steps: 1000 #learning rate rises from 0 to 0.0013 over 1000 steps
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'bfloat16'
task:
smart_bias_lr: 0.0
model:
darknet_based_model: True
input_size: [512, 512, 3]
backbone:
type: 'darknet'
darknet:
model_id: 'darknet53'
max_level: 5
min_level: 3
decoder:
type: yolo_decoder
yolo_decoder:
version: v3
type: regular
head:
smart_bias: true
detection_generator:
box_type:
'all': original
scale_xy:
'5': 1.05
'4': 1.1
'3': 1.2
max_boxes: 200
nms_type: greedy
iou_thresh: 0.001
nms_thresh: 0.60
loss:
use_scaled_loss: False
box_loss_type:
'all': ciou
ignore_thresh:
'all': 0.7
iou_normalizer:
'all': 0.07
cls_normalizer:
'all': 1.0
obj_normalizer:
'all': 1.0
objectness_smooth:
'all': 0.0
max_delta:
'all': 5.0
norm_activation:
activation: mish
norm_epsilon: 0.0001
norm_momentum: 0.99
use_sync_bn: true
num_classes: 80
anchor_boxes:
anchors_per_scale: 3
boxes: [box: [12, 16], box: [19, 36], box: [40, 28],
box: [36, 75], box: [76, 55], box: [72, 146],
box: [142, 110], box: [192, 243], box: [459, 401]]
train_data:
global_batch_size: 64
dtype: float32
input_path: 'gs://cam2-datasets/coco/train*'
is_training: true
drop_remainder: true
seed: 1000
parser:
mosaic:
mosaic_frequency: 1.0
mixup_frequency: 0.0
mosaic_crop_mode: 'scale'
mosaic_center: 0.25
aug_scale_min: 0.1
aug_scale_max: 1.9
max_num_instances: 200
letter_box: True
random_flip: True
aug_rand_saturation: 0.7
aug_rand_brightness: 0.4
aug_rand_hue: 0.015
aug_rand_translate: 0.1
area_thresh: 0.1
random_pad: False
use_tie_breaker: True
anchor_thresh: 0.213
validation_data:
global_batch_size: 8
dtype: float32
input_path: 'gs://cam2-datasets/coco/val*'
is_training: false
drop_remainder: true
parser:
max_num_instances: 300
letter_box: True
use_tie_breaker: True
anchor_thresh: 0.213
weight_decay: 0.000
init_checkpoint: 'gs://tensorflow2/darknet/cspdarknet53-golden'
init_checkpoint_modules: 'backbone'
annotation_file: null
trainer:
train_steps: 500500 # 160 epochs at 64 batchsize -> 500500 * 64/2
validation_steps: 625
steps_per_loop: 1850
summary_interval: 1850
validation_interval: 9250
checkpoint_interval: 1850
optimizer_config:
ema:
average_decay: 0.9998
trainable_weights_only: False
dynamic_decay: True
learning_rate:
type: stepwise
stepwise:
boundaries: [400000, 450000]
name: PiecewiseConstantDecay
values: [0.00131, 0.000131, 0.0000131]
optimizer:
type: sgd_torch
sgd_torch:
momentum: 0.949
momentum_start: 0.949
nesterov: True
warmup_steps: 1000
weight_decay: 0.0005
sim_torch: true
name: SGD
warmup:
type: 'linear'
linear:
warmup_steps: 1000 #learning rate rises from 0 to 0.0013 over 1000 steps
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'bfloat16'
task:
smart_bias_lr: 0.0
model:
darknet_based_model: True
input_size: [512, 512, 3]
backbone:
type: 'darknet'
darknet:
model_id: 'darknet53'
max_level: 5
min_level: 3
decoder:
type: yolo_decoder
yolo_decoder:
version: v3
type: regular
head:
smart_bias: true
detection_generator:
box_type:
'all': original
scale_xy:
'5': 1.05
'4': 1.1
'3': 1.2
max_boxes: 200
nms_type: greedy
iou_thresh: 0.001
nms_thresh: 0.60
loss:
use_scaled_loss: False
box_loss_type:
'all': ciou
ignore_thresh:
'all': 0.7
iou_normalizer:
'all': 0.07
cls_normalizer:
'all': 1.0
obj_normalizer:
'all': 1.0
objectness_smooth:
'all': 0.0
max_delta:
'all': 5.0
norm_activation:
activation: leaky
norm_epsilon: 0.0001
norm_momentum: 0.99
use_sync_bn: true
num_classes: 80
anchor_boxes:
anchors_per_scale: 3
boxes: [box: [12, 16], box: [19, 36], box: [40, 28],
box: [36, 75], box: [76, 55], box: [72, 146],
box: [142, 110], box: [192, 243], box: [459, 401]]
train_data:
global_batch_size: 64
dtype: float32
input_path: 'gs://cam2-datasets/coco/train*'
is_training: true
drop_remainder: true
seed: 1000
parser:
mosaic:
mosaic_frequency: 0.75
mixup_frequency: 0.0
mosaic_crop_mode: 'crop'
mosaic_center: 0.2
aug_scale_min: 0.2
aug_scale_max: 1.6
jitter: 0.3
max_num_instances: 200
letter_box: False
random_flip: True
aug_rand_saturation: 1.5
aug_rand_brightness: 1.5
aug_rand_hue: 0.1
aug_scale_min: 0.1
aug_scale_max: 1.9
aug_rand_translate: 0.0
jitter: 0.3
area_thresh: 0.1
random_pad: True
use_tie_breaker: True
anchor_thresh: 0.213
validation_data:
global_batch_size: 8
dtype: float32
input_path: 'gs://cam2-datasets/coco/val*'
is_training: false
drop_remainder: true
parser:
max_num_instances: 200
letter_box: False
use_tie_breaker: True
anchor_thresh: 0.213
weight_decay: 0.000
init_checkpoint: 'gs://tensorflow2/darknet/cspdarknet53-golden'
init_checkpoint_modules: 'backbone'
annotation_file: null
trainer:
train_steps: 500500 # 160 epochs at 64 batchsize -> 500500 * 64/2
validation_steps: 625
steps_per_loop: 1850
summary_interval: 1850
validation_interval: 9250
checkpoint_interval: 1850
optimizer_config:
ema:
average_decay: 0.9998
trainable_weights_only: False
dynamic_decay: True
learning_rate:
type: stepwise
stepwise:
boundaries: [400000, 450000]
name: PiecewiseConstantDecay
values: [0.00131, 0.000131, 0.0000131]
optimizer:
type: sgd_torch
sgd_torch:
momentum: 0.949
momentum_start: 0.949
nesterov: True
warmup_steps: 1000
weight_decay: 0.0005
sim_torch: true
name: SGD
warmup:
type: 'linear'
linear:
warmup_steps: 1000 #learning rate rises from 0 to 0.0013 over 1000 steps
runtime:
distribution_strategy: 'mirrored'
mixed_precision_dtype: 'float16'
num_gpus: 1
task:
smart_bias_lr: 0.1
model:
darknet_based_model: False
input_size: [640, 640, 3]
backbone:
type: 'darknet'
darknet:
model_id: 'altered_cspdarknet53'
max_level: 5
min_level: 3
decoder:
type: yolo_decoder
yolo_decoder:
version: v4
type: csp
head:
smart_bias: true
detection_generator:
box_type:
'all': scaled
scale_xy:
'all': 2.0
max_boxes: 300
nms_type: greedy
iou_thresh: 0.25
nms_thresh: 0.45
pre_nms_points: 500
loss:
use_scaled_loss: true
update_on_repeat: true
box_loss_type:
'all': ciou
ignore_thresh:
'all': 0.7
iou_normalizer:
'all': 0.05
cls_normalizer:
'all': 0.3
obj_normalizer:
'5': 0.28
'4': 0.70
'3': 2.80
objectness_smooth:
'all': 1.0
num_classes: 80
anchor_boxes:
anchors_per_scale: 3
boxes: [box: [12, 16], box: [19, 36], box: [40, 28],
box: [36, 75], box: [76, 55], box: [72, 146],
box: [142, 110], box: [192, 243], box: [459, 401]]
train_data:
global_batch_size: 1
input_path: '/media/vbanna/DATA_SHARE/CV/datasets/COCO_raw/records/train*'
shuffle_buffer_size: 10000
parser:
mosaic:
mosaic_frequency: 1.0
mixup_frequency: 0.0
mosaic_crop_mode: 'scale'
mosaic_center: 0.25
aug_scale_min: 0.1
aug_scale_max: 1.9
letter_box: True
random_flip: True
aug_rand_translate: 0.1
area_thresh: 0.1
validation_data:
global_batch_size: 1
input_path: '/media/vbanna/DATA_SHARE/CV/datasets/COCO_raw/records/val*'
\ No newline at end of file
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'float32'
tpu_enable_xla_dynamic_padder: false
task:
smart_bias_lr: 0.1
model:
darknet_based_model: False
input_size: [640, 640, 3]
backbone:
type: 'darknet'
darknet:
model_id: 'altered_cspdarknet53'
max_level: 5
min_level: 3
decoder:
type: yolo_decoder
yolo_decoder:
version: v4
type: csp
head:
smart_bias: true
detection_generator:
box_type:
'all': scaled
scale_xy:
'all': 2.0
max_boxes: 300
nms_type: greedy
iou_thresh: 0.001
nms_thresh: 0.60
loss:
use_scaled_loss: true
update_on_repeat: true
box_loss_type:
'all': ciou
ignore_thresh:
'all': 0.7
iou_normalizer:
'all': 0.05
cls_normalizer:
'all': 0.3
obj_normalizer:
'5': 0.28
'4': 0.70
'3': 2.80
objectness_smooth:
'all': 1.0
norm_activation:
activation: mish
norm_epsilon: 0.0001
norm_momentum: 0.97
use_sync_bn: true
num_classes: 80
anchor_boxes:
anchors_per_scale: 3
boxes: [box: [12, 16], box: [19, 36], box: [40, 28],
box: [36, 75], box: [76, 55], box: [72, 146],
box: [142, 110], box: [192, 243], box: [459, 401]]
train_data:
global_batch_size: 64
dtype: float32
input_path: 'gs://cam2-datasets/coco/train*'
is_training: true
shuffle_buffer_size: 10000
drop_remainder: true
seed: 1000
parser:
mosaic:
mosaic_frequency: 1.0
mixup_frequency: 0.0
mosaic_crop_mode: 'scale'
mosaic_center: 0.25
aug_scale_min: 0.1
aug_scale_max: 1.9
max_num_instances: 300
letter_box: True
random_flip: True
aug_rand_saturation: 0.7
aug_rand_brightness: 0.4
aug_rand_hue: 0.015
aug_rand_translate: 0.1
area_thresh: 0.1
random_pad: False
use_tie_breaker: True
anchor_thresh: 4.0
best_match_only: True
validation_data:
global_batch_size: 8
dtype: float32
input_path: 'gs://cam2-datasets/coco/val*'
is_training: false
shuffle_buffer_size: 10
drop_remainder: true
parser:
max_num_instances: 300
letter_box: True
use_tie_breaker: True
anchor_thresh: 4.0
best_match_only: True
weight_decay: 0.000
annotation_file: null
trainer:
train_steps: 555000 # 160 epochs at 64 batchsize -> 500500 * 64/2
validation_steps: 625
steps_per_loop: 1850
summary_interval: 1850
validation_interval: 1850
checkpoint_interval: 1850
optimizer_config:
ema:
average_decay: 0.9999
trainable_weights_only: False
dynamic_decay: True
learning_rate:
type: cosine
cosine:
initial_learning_rate: 0.01
name: Cosine
alpha: 0.2
decay_steps: 555000
optimizer:
type: sgd_torch
sgd_torch:
momentum: 0.937
momentum_start: 0.8
nesterov: True
warmup_steps: 5550
weight_decay: 0.0005
sim_torch: true
name: SGD
warmup:
type: 'linear'
linear:
warmup_steps: 5550 #learning rate rises from 0 to 0.0013 over 1000 steps
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'float32'
tpu_enable_xla_dynamic_padder: false
task:
model:
input_size: [640, 640, 3]
backbone:
type: 'darknet'
darknet:
model_id: 'altered_cspdarknet53'
max_level: 5
min_level: 3
decoder:
type: yolo_decoder
yolo_decoder:
version: v4
type: csp
head:
smart_bias: true
detection_generator:
box_type:
'all': scaled
scale_xy:
'all': 2.0
max_boxes: 300
nms_type: iou
iou_thresh: 0.001
nms_thresh: 0.60
loss:
use_scaled_loss: true
update_on_repeat: true
box_loss_type:
'all': ciou
ignore_thresh:
'all': 0.0
iou_normalizer:
'all': 0.05
cls_normalizer:
'all': 0.3
obj_normalizer:
'5': 0.28
'4': 0.70
'3': 2.80
objectness_smooth:
'all': 1.0
norm_activation:
use_sync_bn: true
num_classes: 80
anchor_boxes:
anchors_per_scale: 3
boxes: [box: [12, 16], box: [19, 36], box: [40, 28],
box: [36, 75], box: [76, 55], box: [72, 146],
box: [142, 110], box: [192, 243], box: [459, 401]]
train_data:
input_path: 'gs://cam2-datasets/coco/train*'
shuffle_buffer_size: 10000
parser:
mosaic:
mosaic_frequency: 1.0
mixup_frequency: 0.0
mosaic_crop_mode: 'scale'
mosaic_center: 0.25
aug_scale_min: 0.1
aug_scale_max: 1.9
max_num_instances: 300
letter_box: True
random_flip: True
aug_rand_translate: 0.1
area_thresh: 0.1
validation_data:
input_path: 'gs://cam2-datasets/coco/val*'
\ No newline at end of file
runtime:
distribution_strategy: 'mirrored'
mixed_precision_dtype: 'float16'
num_gpus: 1
task:
smart_bias_lr: 0.0
model:
darknet_based_model: True
input_size: [512, 512, 3]
backbone:
type: 'swin'
swin:
min_level: 3
max_level: 5
patch_size: 4
embed_dims: 96
window_size: [7, 7, 7, 7]
depths: [2, 2, 6, 2]
num_heads: [3, 6, 12, 24]
drop_path: 0.0
absolute_positional_embed: False
decoder:
type: yolo_decoder
yolo_decoder:
version: v4
type: csp
activation: leaky
head:
smart_bias: true
detection_generator:
box_type:
'all': original
scale_xy:
'5': 1.05
'4': 1.1
'3': 1.2
max_boxes: 200
nms_type: greedy
iou_thresh: 0.25
nms_thresh: 0.45
pre_nms_points: 500
loss:
use_scaled_loss: False
box_loss_type:
'all': ciou
ignore_thresh:
'all': 0.7
iou_normalizer:
'all': 0.07
cls_normalizer:
'all': 1.0
obj_normalizer:
'all': 1.0
objectness_smooth:
'all': 0.0
max_delta:
'all': 5.0
norm_activation:
activation: gelu
norm_epsilon: 0.0001
norm_momentum: 0.99
use_sync_bn: false
num_classes: 80
anchor_boxes:
anchors_per_scale: 3
boxes: [box: [12, 16], box: [19, 36], box: [40, 28],
box: [36, 75], box: [76, 55], box: [72, 146],
box: [142, 110], box: [192, 243], box: [459, 401]]
train_data:
global_batch_size: 4
dtype: float16
input_path: '/media/vbanna/DATA_SHARE/CV/datasets/COCO_raw/records/train*'
is_training: true
drop_remainder: true
seed: 1000
parser:
mosaic:
mosaic_frequency: 0.6
mixup_frequency: 0.0
mosaic_crop_mode: 'crop'
mosaic_center: 0.2
aug_scale_min: 0.2
aug_scale_max: 1.6
jitter: 0.3
max_num_instances: 200
letter_box: True
random_flip: True
aug_rand_saturation: 1.5
aug_rand_brightness: 1.5
aug_rand_hue: 0.1
aug_scale_min: 1.0
aug_scale_max: 1.0
aug_rand_translate: 0.0
jitter: 0.3
area_thresh: 0.1
random_pad: True
use_tie_breaker: True
anchor_thresh: 0.213
validation_data:
global_batch_size: 8
dtype: float16
input_path: '/media/vbanna/DATA_SHARE/CV/datasets/COCO_raw/records/val*'
is_training: false
drop_remainder: true
parser:
max_num_instances: 200
letter_box: True
use_tie_breaker: True
anchor_thresh: 0.213
weight_decay: 0.000
init_checkpoint: '../checkpoints/swin-baseline-3'
init_checkpoint_modules: 'backbone'
annotation_file: null
trainer:
train_steps: 500500 # 160 epochs at 64 batchsize -> 500500 * 64/2
validation_steps: 625
steps_per_loop: 10
summary_interval: 10
validation_interval: 9250
checkpoint_interval: 1850
optimizer_config:
ema: null
learning_rate:
type: stepwise
stepwise:
boundaries: [400000, 450000]
name: PiecewiseConstantDecay
values: [0.00131, 0.000131, 0.0000131]
optimizer:
type: sgd_torch
sgd_torch:
momentum: 0.949
momentum_start: 0.949
nesterov: True
warmup_steps: 1000
weight_decay: 0.0005
sim_torch: true
name: SGD
warmup:
type: 'linear'
linear:
warmup_steps: 1000 #learning rate rises from 0 to 0.0013 over 1000 steps
runtime:
distribution_strategy: 'mirrored'
mixed_precision_dtype: 'float16'
num_gpus: 1
task:
smart_bias_lr: 0.0
model:
darknet_based_model: True
input_size: [512, 512, 3]
backbone:
type: 'darknet'
darknet:
model_id: 'cspdarknet53'
max_level: 5
min_level: 3
decoder:
type: yolo_decoder
yolo_decoder:
version: v4
type: regular
activation: leaky
head:
smart_bias: true
detection_generator:
box_type:
'all': original
scale_xy:
'5': 1.05
'4': 1.1
'3': 1.2
max_boxes: 200
nms_type: greedy
iou_thresh: 0.25
nms_thresh: 0.45
pre_nms_points: 500
loss:
use_scaled_loss: False
box_loss_type:
'all': ciou
ignore_thresh:
'all': 0.7
iou_normalizer:
'all': 0.07
cls_normalizer:
'all': 1.0
obj_normalizer:
'all': 1.0
objectness_smooth:
'all': 0.0
max_delta:
'all': 5.0
norm_activation:
activation: mish
norm_epsilon: 0.0001
norm_momentum: 0.99
use_sync_bn: true
num_classes: 80
anchor_boxes:
anchors_per_scale: 3
boxes: [box: [12, 16], box: [19, 36], box: [40, 28],
box: [36, 75], box: [76, 55], box: [72, 146],
box: [142, 110], box: [192, 243], box: [459, 401]]
train_data:
# global_batch_size: 64
# dtype: float32
input_path: '/media/vbanna/DATA_SHARE/CV/datasets/COCO_raw/records/train*'
# is_training: true
# drop_remainder: true
# seed: 1000
parser:
mosaic:
mosaic_frequency: 1.0
mixup_frequency: 0.0
mosaic_crop_mode: 'scale'
mosaic_center: 0.25
aug_scale_min: 0.1
aug_scale_max: 1.9
max_num_instances: 200
letter_box: True
random_flip: True
aug_rand_translate: 0.1
random_pad: False
validation_data:
# global_batch_size: 1
# dtype: float32
input_path: '/media/vbanna/DATA_SHARE/CV/datasets/COCO_raw/records/val*'
# is_training: false
# drop_remainder: true
# parser:
# max_num_instances: 200
# letter_box: True
# use_tie_breaker: True
# anchor_thresh: 0.213
# weight_decay: 0.000
# init_checkpoint: '../checkpoints/512-wd-baseline-e1'
# init_checkpoint_modules: 'all'
# annotation_file: null
# trainer:
# train_steps: 500500 # 160 epochs at 64 batchsize -> 500500 * 64/2
# validation_steps: 625
# steps_per_loop: 1850
# summary_interval: 1850
# validation_interval: 9250
# checkpoint_interval: 1850
# optimizer_config:
# ema:
# average_decay: 0.9998
# trainable_weights_only: False
# dynamic_decay: True
# learning_rate:
# type: stepwise
# stepwise:
# boundaries: [400000, 450000]
# name: PiecewiseConstantDecay
# values: [0.00131, 0.000131, 0.0000131]
# optimizer:
# type: sgd_torch
# sgd_torch:
# momentum: 0.949
# momentum_start: 0.949
# nesterov: True
# warmup_steps: 1000
# weight_decay: 0.0005
# sim_torch: true
# name: SGD
# warmup:
# type: 'linear'
# linear:
# warmup_steps: 1000 #learning rate rises from 0 to 0.0013 over 1000 steps
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'bfloat16'
task:
smart_bias_lr: 0.0
model:
darknet_based_model: True
input_size: [512, 512, 3]
backbone:
type: 'darknet'
darknet:
model_id: 'cspdarknet53'
max_level: 5
min_level: 3
decoder:
type: yolo_decoder
yolo_decoder:
version: v4
type: regular
activation: leaky
head:
smart_bias: true
detection_generator:
box_type:
'all': original
scale_xy:
'5': 1.05
'4': 1.1
'3': 1.2
max_boxes: 200
nms_type: iou
iou_thresh: 0.001
nms_thresh: 0.60
loss:
use_scaled_loss: False
box_loss_type:
'all': ciou
ignore_thresh:
'all': 0.7
iou_normalizer:
'all': 0.07
cls_normalizer:
'all': 1.0
obj_normalizer:
'all': 1.0
objectness_smooth:
'all': 0.0
max_delta:
'all': 5.0
norm_activation:
activation: mish
norm_epsilon: 0.0001
norm_momentum: 0.99
use_sync_bn: true
num_classes: 80
anchor_boxes:
anchors_per_scale: 3
boxes: [box: [12, 16], box: [19, 36], box: [40, 28],
box: [36, 75], box: [76, 55], box: [72, 146],
box: [142, 110], box: [192, 243], box: [459, 401]]
train_data:
input_path: 'gs://cam2-datasets/coco/train*'
parser:
mosaic:
mosaic_frequency: 1.0
mixup_frequency: 0.0
mosaic_crop_mode: 'scale'
mosaic_center: 0.25
aug_scale_min: 0.1
aug_scale_max: 1.9
jitter: 0.3
max_num_instances: 200
letter_box: False
random_flip: True
aug_rand_translate: 0.1
random_pad: False
validation_data:
input_path: 'gs://cam2-datasets/coco/val*'
parser:
letter_box: False
weight_decay: 0.000
init_checkpoint: 'gs://tensorflow2/darknet/cspdarknet53-golden'
init_checkpoint_modules: 'backbone'
annotation_file: null
# trainer:
# train_steps: 500500 # 160 epochs at 64 batchsize -> 500500 * 64/2
# validation_steps: 625
# steps_per_loop: 1850
# summary_interval: 1850
# validation_interval: 9250
# checkpoint_interval: 1850
# optimizer_config:
# ema:
# average_decay: 0.9998
# trainable_weights_only: False
# dynamic_decay: True
# learning_rate:
# type: stepwise
# stepwise:
# boundaries: [400000, 450000]
# values: [0.00131, 0.000131, 0.0000131]
# optimizer:
# type: sgd_torch
# sgd_torch:
# momentum: 0.949
# momentum_start: 0.949
# nesterov: True
# warmup_steps: 1000
# weight_decay: 0.0005
# sim_torch: true
# name: SGD
# warmup:
# type: 'linear'
# linear:
# warmup_steps: 1000 #learning rate rises from 0 to 0.0013 over 1000 steps
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'bfloat16'
task:
smart_bias_lr: 0.0
model:
darknet_based_model: True
input_size: [512, 512, 3]
backbone:
type: 'swin'
swin:
min_level: 3
max_level: 5
patch_size: 4
embed_dims: 96
window_size: [7, 7, 7, 7]
depths: [2, 2, 6, 2]
num_heads: [3, 6, 12, 24]
drop_path: 0.0
absolute_positional_embed: False
decoder:
type: yolo_decoder
yolo_decoder:
version: v4
type: csp
activation: leaky
head:
smart_bias: true
detection_generator:
box_type:
'all': original
scale_xy:
'5': 1.05
'4': 1.1
'3': 1.2
max_boxes: 200
nms_type: greedy
iou_thresh: 0.001
nms_thresh: 0.60
loss:
use_scaled_loss: False
box_loss_type:
'all': ciou
ignore_thresh:
'all': 0.7
iou_normalizer:
'all': 0.07
cls_normalizer:
'all': 1.0
obj_normalizer:
'all': 1.0
objectness_smooth:
'all': 0.0
max_delta:
'all': 5.0
norm_activation:
activation: mish
norm_epsilon: 0.0001
norm_momentum: 0.99
use_sync_bn: true
num_classes: 80
anchor_boxes:
anchors_per_scale: 3
boxes: [box: [12, 16], box: [19, 36], box: [40, 28],
box: [36, 75], box: [76, 55], box: [72, 146],
box: [142, 110], box: [192, 243], box: [459, 401]]
train_data:
global_batch_size: 64
dtype: float32
input_path: 'gs://cam2-datasets/coco/train*'
is_training: true
drop_remainder: true
seed: 1000
parser:
mosaic:
mosaic_frequency: 1.0
mixup_frequency: 0.0
mosaic_crop_mode: 'scale'
mosaic_center: 0.25
aug_scale_min: 0.1
aug_scale_max: 1.9
max_num_instances: 200
letter_box: True
random_flip: True
aug_rand_saturation: 0.7
aug_rand_brightness: 0.4
aug_rand_hue: 0.015
aug_rand_translate: 0.1
area_thresh: 0.1
random_pad: False
use_tie_breaker: True
anchor_thresh: 0.213
validation_data:
global_batch_size: 8
dtype: float32
input_path: 'gs://cam2-datasets/coco/val*'
is_training: false
drop_remainder: true
parser:
max_num_instances: 300
letter_box: True
use_tie_breaker: True
anchor_thresh: 0.213
weight_decay: 0.000
init_checkpoint: 'gs://tensorflow2/darknet/cspdarknet53-golden'
init_checkpoint_modules: 'backbone'
annotation_file: null
trainer:
train_steps: 500500 # 160 epochs at 64 batchsize -> 500500 * 64/2
validation_steps: 625
steps_per_loop: 1850
summary_interval: 1850
validation_interval: 9250
checkpoint_interval: 1850
optimizer_config:
ema: null
learning_rate:
type: stepwise
stepwise:
boundaries: [400000, 450000]
name: PiecewiseConstantDecay
values: [0.00131, 0.000131, 0.0000131]
optimizer:
type: sgd_torch
sgd_torch:
momentum: 0.949
momentum_start: 0.949
nesterov: True
warmup_steps: 1000
weight_decay: 0.0005
sim_torch: true
name: SGD
warmup:
type: 'linear'
linear:
warmup_steps: 1000 #learning rate rises from 0 to 0.0013 over 1000 steps
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'bfloat16'
task:
smart_bias_lr: 0.0
model:
darknet_based_model: True
input_size: [512, 512, 3]
backbone:
type: 'darknet'
darknet:
model_id: 'cspdarknet53'
max_level: 5
min_level: 3
decoder:
type: yolo_decoder
yolo_decoder:
version: v4
type: regular
activation: leaky
head:
smart_bias: true
detection_generator:
box_type:
'all': original
scale_xy:
'5': 1.05
'4': 1.1
'3': 1.2
max_boxes: 200
nms_type: greedy
iou_thresh: 0.001
nms_thresh: 0.60
loss:
use_scaled_loss: False
box_loss_type:
'all': ciou
ignore_thresh:
'all': 0.7
iou_normalizer:
'all': 0.07
cls_normalizer:
'all': 1.0
obj_normalizer:
'all': 1.0
objectness_smooth:
'all': 0.0
max_delta:
'all': 5.0
norm_activation:
activation: mish
norm_epsilon: 0.0001
norm_momentum: 0.99
use_sync_bn: true
num_classes: 80
anchor_boxes:
anchors_per_scale: 3
boxes: [box: [12, 16], box: [19, 36], box: [40, 28],
box: [36, 75], box: [76, 55], box: [72, 146],
box: [142, 110], box: [192, 243], box: [459, 401]]
train_data:
global_batch_size: 64
dtype: float32
input_path: 'gs://cam2-datasets/coco/train*'
is_training: true
drop_remainder: true
seed: 1000
parser:
mosaic:
mosaic_frequency: 0.75
mixup_frequency: 0.0
mosaic_crop_mode: 'crop'
mosaic_center: 0.2
aug_scale_min: 0.2
aug_scale_max: 1.6
jitter: 0.3
max_num_instances: 200
letter_box: False
random_flip: True
aug_rand_saturation: 1.5
aug_rand_brightness: 1.5
aug_rand_hue: 0.1
aug_scale_min: 0.1
aug_scale_max: 1.9
aug_rand_translate: 0.0
jitter: 0.3
area_thresh: 0.1
random_pad: True
use_tie_breaker: True
anchor_thresh: 0.213
validation_data:
global_batch_size: 8
dtype: float32
input_path: 'gs://cam2-datasets/coco/val*'
is_training: false
drop_remainder: true
parser:
max_num_instances: 200
letter_box: False
use_tie_breaker: True
anchor_thresh: 0.213
weight_decay: 0.000
init_checkpoint: 'gs://tensorflow2/darknet/cspdarknet53-golden'
init_checkpoint_modules: 'backbone'
annotation_file: null
trainer:
train_steps: 500500 # 160 epochs at 64 batchsize -> 500500 * 64/2
validation_steps: 625
steps_per_loop: 1850
summary_interval: 1850
validation_interval: 9250
checkpoint_interval: 1850
optimizer_config:
ema:
average_decay: 0.9998
trainable_weights_only: False
dynamic_decay: True
learning_rate:
type: stepwise
stepwise:
boundaries: [400000, 450000]
name: PiecewiseConstantDecay
values: [0.00131, 0.000131, 0.0000131]
optimizer:
type: sgd_torch
sgd_torch:
momentum: 0.949
momentum_start: 0.949
nesterov: True
warmup_steps: 1000
weight_decay: 0.0005
sim_torch: true
name: SGD
warmup:
type: 'linear'
linear:
warmup_steps: 1000 #learning rate rises from 0 to 0.0013 over 1000 steps
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""YOLO configuration definition."""
from typing import List, Optional, Union
from official.core import exp_factory
from official.modeling import hyperparams
from official.modeling.hyperparams import config_definitions as cfg
from official.vision.beta.configs import common
from official.vision.beta.projects.yolo import optimization
from official.vision.beta.projects.yolo.configs import backbones
from official.vision.beta.projects.yolo.configs import decoders
import numpy as np
import dataclasses
MIN_LEVEL = 1
MAX_LEVEL = 7
def _build_dict(min_level, max_level, value):
vals = {str(key): value for key in range(min_level, max_level + 1)}
vals["all"] = None
return lambda: vals
def _build_path_scales(min_level, max_level):
return lambda: {str(key): 2**key for key in range(min_level, max_level + 1)}
@dataclasses.dataclass
class FPNConfig(hyperparams.Config):
def get(self):
values = self.as_dict()
if "all" in values and values["all"] is not None:
for key in values:
if key != 'all':
values[key] = values["all"]
return values
# pylint: disable=missing-class-docstring
@dataclasses.dataclass
class TfExampleDecoder(hyperparams.Config):
regenerate_source_id: bool = False
coco91_to_80: bool = True
@dataclasses.dataclass
class TfExampleDecoderLabelMap(hyperparams.Config):
regenerate_source_id: bool = False
label_map: str = ''
@dataclasses.dataclass
class DataDecoder(hyperparams.OneOfConfig):
type: Optional[str] = 'simple_decoder'
simple_decoder: TfExampleDecoder = TfExampleDecoder()
label_map_decoder: TfExampleDecoderLabelMap = TfExampleDecoderLabelMap()
@dataclasses.dataclass
class Mosaic(hyperparams.Config):
mosaic_frequency: float = 0.0
mixup_frequency: float = 0.0
mosaic_center: float = 0.2
mosaic_crop_mode: Optional[str] = None
aug_scale_min: float = 1.0
aug_scale_max: float = 1.0
jitter: float = 0.0
@dataclasses.dataclass
class Parser(hyperparams.Config):
max_num_instances: int = 200
letter_box: Optional[bool] = True
random_flip: bool = True
random_pad: float = False
jitter: float = 0.0
aug_scale_min: float = 1.0
aug_scale_max: float = 1.0
aug_rand_saturation: float = 0.0
aug_rand_brightness: float = 0.0
aug_rand_hue: float = 0.0
aug_rand_angle: float = 0.0
aug_rand_translate: float = 0.0
aug_rand_perspective: float = 0.0
use_tie_breaker: bool = True
best_match_only: bool = False
anchor_thresh: float = -0.01
area_thresh: float = 0.1
mosaic: Mosaic = Mosaic()
@dataclasses.dataclass
class DataConfig(cfg.DataConfig):
"""Input config for training."""
global_batch_size: int = 64
input_path: str = ''
tfds_name: str = None
tfds_split: str = None
global_batch_size: int = 1
is_training: bool = True
dtype: str = 'float16'
decoder: DataDecoder = DataDecoder()
parser: Parser = Parser()
shuffle_buffer_size: int = 10000
tfds_download: bool = True
cache: bool = False
@dataclasses.dataclass
class YoloHead(hyperparams.Config):
"""Parameterization for the YOLO Head."""
smart_bias: bool = True
@dataclasses.dataclass
class YoloDetectionGenerator(hyperparams.Config):
box_type: FPNConfig = dataclasses.field(
default_factory=_build_dict(MIN_LEVEL, MAX_LEVEL, "original"))
scale_xy: FPNConfig = dataclasses.field(
default_factory=_build_dict(MIN_LEVEL, MAX_LEVEL, 1.0))
path_scales: FPNConfig = dataclasses.field(
default_factory=_build_path_scales(MIN_LEVEL, MAX_LEVEL))
nms_type: str = 'greedy'
iou_thresh: float = 0.001
nms_thresh: float = 0.6
max_boxes: int = 200
pre_nms_points: int = 5000
@dataclasses.dataclass
class YoloLoss(hyperparams.Config):
ignore_thresh: FPNConfig = dataclasses.field(
default_factory=_build_dict(MIN_LEVEL, MAX_LEVEL, 0.0))
truth_thresh: FPNConfig = dataclasses.field(
default_factory=_build_dict(MIN_LEVEL, MAX_LEVEL, 1.0))
box_loss_type: FPNConfig = dataclasses.field(
default_factory=_build_dict(MIN_LEVEL, MAX_LEVEL, 'ciou'))
iou_normalizer: FPNConfig = dataclasses.field(
default_factory=_build_dict(MIN_LEVEL, MAX_LEVEL, 1.0))
cls_normalizer: FPNConfig = dataclasses.field(
default_factory=_build_dict(MIN_LEVEL, MAX_LEVEL, 1.0))
obj_normalizer: FPNConfig = dataclasses.field(
default_factory=_build_dict(MIN_LEVEL, MAX_LEVEL, 1.0))
max_delta: FPNConfig = dataclasses.field(
default_factory=_build_dict(MIN_LEVEL, MAX_LEVEL, np.inf))
objectness_smooth: FPNConfig = dataclasses.field(
default_factory=_build_dict(MIN_LEVEL, MAX_LEVEL, 0.0))
label_smoothing: float = 0.0
use_scaled_loss: bool = True
update_on_repeat: bool = True
class Box(hyperparams.Config):
box: List[int] = dataclasses.field(default=list)
@dataclasses.dataclass
class AnchorBoxes(hyperparams.Config):
boxes: List[Box] = None
level_limits: Optional[List[int]] = None
anchors_per_scale: int = 3
def get(self, min_level, max_level):
if self.level_limits is None:
boxes = [box.box for box in self.boxes]
else:
boxes = [[1.0, 1.0]] * ((max_level - min_level) + 1)
self.anchors_per_scale = 1
anchors_per_level = dict()
start = 0
for i in range(min_level, max_level + 1):
anchors_per_level[str(i)] = boxes[start:start + self.anchors_per_scale]
start += self.anchors_per_scale
return anchors_per_level, self.level_limits
@dataclasses.dataclass
class Yolo(hyperparams.Config):
input_size: Optional[List[int]] = dataclasses.field(
default_factory=lambda: [512, 512, 3])
backbone: backbones.Backbone = backbones.Backbone(
type='darknet', darknet=backbones.Darknet(model_id='cspdarknet53'))
decoder: decoders.Decoder = decoders.Decoder(
type='yolo_decoder', yolo_decoder=decoders.YoloDecoder(
version='v4', type='regular'
)
)
head: YoloHead = YoloHead()
detection_generator: YoloDetectionGenerator = YoloDetectionGenerator()
loss: YoloLoss = YoloLoss()
norm_activation: common.NormActivation = common.NormActivation(
activation='mish',
use_sync_bn=True,
norm_momentum=0.99,
norm_epsilon=0.001)
num_classes: int = 80
anchor_boxes: AnchorBoxes = AnchorBoxes()
darknet_based_model: bool = False
@dataclasses.dataclass
class YoloTask(cfg.TaskConfig):
per_category_metrics: bool = False
smart_bias_lr: float = 0.0
model: Yolo = Yolo()
train_data: DataConfig = DataConfig(is_training=True)
validation_data: DataConfig = DataConfig(is_training=False)
weight_decay: float = 0.0
annotation_file: Optional[str] = None
init_checkpoint: Optional[str] = None
init_checkpoint_modules: Union[
str, List[str]] = 'all' # all, backbone, and/or decoder
gradient_clip_norm: float = 0.0
COCO_INPUT_PATH_BASE = 'coco'
COCO_TRAIN_EXAMPLES = 118287
COCO_VAL_EXAMPLES = 5000
GLOBAL_SEED = 1000
@exp_factory.register_config_factory('yolo')
def yolo() -> cfg.ExperimentConfig:
"""Yolo general config."""
return cfg.ExperimentConfig(
task=YoloTask(),
restrictions=[
'task.train_data.is_training != None',
'task.validation_data.is_training != None'
])
@exp_factory.register_config_factory('yolo_darknet')
def yolo_darknet() -> cfg.ExperimentConfig:
"""COCO object detection with YOLOv3 and v4"""
train_batch_size = 64
eval_batch_size = 8
train_epochs = 300
steps_per_epoch = COCO_TRAIN_EXAMPLES // train_batch_size
validation_interval = 5
max_num_instances = 200
config = cfg.ExperimentConfig(
runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'),
task=YoloTask(
smart_bias_lr=0.1,
init_checkpoint='',
init_checkpoint_modules='backbone',
annotation_file=None,
weight_decay=0.0,
model=Yolo(
darknet_based_model = True,
norm_activation=common.NormActivation(use_sync_bn=True),
head=YoloHead(smart_bias=True),
loss=YoloLoss(use_scaled_loss=False, update_on_repeat=True)),
train_data=DataConfig(
is_training=True,
global_batch_size=train_batch_size,
seed=GLOBAL_SEED,
dtype='float32',
parser=Parser(
letter_box=False,
aug_rand_saturation= 1.5,
aug_rand_brightness= 1.5,
aug_rand_hue= 0.1,
use_tie_breaker=True,
best_match_only=False,
anchor_thresh=0.213,
area_thresh=0.1,
max_num_instances=max_num_instances,
mosaic=Mosaic(
mosaic_frequency= 0.75,
mixup_frequency= 0.0,
mosaic_crop_mode= 'crop',
mosaic_center= 0.2
)
)),
validation_data=DataConfig(
is_training=False,
global_batch_size=eval_batch_size,
drop_remainder=True,
dtype='float32',
parser=Parser(
letter_box=False,
use_tie_breaker=True,
best_match_only=False,
anchor_thresh=0.213,
area_thresh=0.1,
max_num_instances=max_num_instances,
))),
trainer=cfg.TrainerConfig(
train_steps=train_epochs * steps_per_epoch,
validation_steps=COCO_VAL_EXAMPLES // eval_batch_size,
validation_interval=validation_interval * steps_per_epoch,
steps_per_loop=steps_per_epoch,
summary_interval=steps_per_epoch,
checkpoint_interval=steps_per_epoch,
optimizer_config=optimization.OptimizationConfig({
'ema':{
'average_decay': 0.9998,
'trainable_weights_only': False,
'dynamic_decay': True,
},
'optimizer': {
'type': 'sgd_torch',
'sgd_torch': {
'momentum': 0.949,
'momentum_start': 0.949,
'nesterov': True,
'warmup_steps': 1000,
'weight_decay': 0.0005,
'sim_torch': True,
}
},
'learning_rate': {
'type': 'stepwise',
'stepwise': {
'boundaries': [240 * steps_per_epoch, 270*steps_per_epoch],
'values': [
0.00131 * train_batch_size / 64.0,
0.000131 * train_batch_size / 64.0,
0.0000131 * train_batch_size / 64.0
]
}
},
'warmup': {
'type': 'linear',
'linear': {
'warmup_steps': 1000,
'warmup_learning_rate': 0
}
}
})),
restrictions=[
'task.train_data.is_training != None',
'task.validation_data.is_training != None'
])
return config
@exp_factory.register_config_factory('scaled_yolo')
def scaled_yolo() -> cfg.ExperimentConfig:
"""COCO object detection with YOLOv4-csp and v4"""
train_batch_size = 128
eval_batch_size = 8
train_epochs = 300
warmup_epochs = 3
validation_interval = 5
steps_per_epoch = COCO_TRAIN_EXAMPLES // train_batch_size
max_num_instances = 300
config = cfg.ExperimentConfig(
runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'),
task=YoloTask(
smart_bias_lr=0.1,
init_checkpoint_modules=None,
annotation_file=None,
weight_decay=0.0,
model=Yolo(
darknet_based_model = False,
norm_activation=common.NormActivation(
activation='mish',
use_sync_bn=True,
norm_epsilon=0.0001,
norm_momentum=0.97),
head=YoloHead(smart_bias=True),
loss=YoloLoss(use_scaled_loss=True)),
train_data=DataConfig(
is_training=True,
global_batch_size=train_batch_size,
seed=GLOBAL_SEED,
dtype='float32',
parser=Parser(
aug_rand_saturation = 0.7,
aug_rand_brightness = 0.4,
aug_rand_hue = 0.015,
letter_box=True,
use_tie_breaker=True,
best_match_only=True,
anchor_thresh=4.0,
random_pad=False,
area_thresh=0.1,
max_num_instances=max_num_instances,
mosaic=Mosaic(
mosaic_crop_mode='scale',
mosaic_frequency=1.0,
mixup_frequency=0.0,
)
)),
validation_data=DataConfig(
is_training=False,
global_batch_size=eval_batch_size,
drop_remainder=True,
dtype='float32',
parser=Parser(
letter_box=True,
use_tie_breaker=True,
best_match_only=True,
anchor_thresh=4.0,
area_thresh=0.1,
max_num_instances=max_num_instances,
))),
trainer=cfg.TrainerConfig(
train_steps=train_epochs * steps_per_epoch,
validation_steps=COCO_VAL_EXAMPLES // eval_batch_size,
validation_interval=validation_interval * steps_per_epoch,
steps_per_loop=steps_per_epoch,
summary_interval=steps_per_epoch,
checkpoint_interval=steps_per_epoch,
optimizer_config=optimization.OptimizationConfig({
'ema':{
'average_decay': 0.9999,
'trainable_weights_only': False,
'dynamic_decay': True,
},
'optimizer': {
'type': 'sgd_torch',
'sgd_torch': {
'momentum': 0.937,
'momentum_start': 0.8,
'nesterov': True,
'warmup_steps': steps_per_epoch * warmup_epochs,
'weight_decay': 0.0005 * train_batch_size/64.0,
'sim_torch': True,
}
},
'learning_rate': {
'type': 'cosine',
'cosine': {
'initial_learning_rate': 0.01,
'alpha': 0.2,
'decay_steps': train_epochs * steps_per_epoch,
}
},
'warmup': {
'type': 'linear',
'linear': {
'warmup_steps': steps_per_epoch * warmup_epochs,
'warmup_learning_rate': 0
}
}
})),
restrictions=[
'task.train_data.is_training != None',
'task.validation_data.is_training != None'
])
return config
\ No newline at end of file
...@@ -383,7 +383,7 @@ class Darknet(tf.keras.Model): ...@@ -383,7 +383,7 @@ class Darknet(tf.keras.Model):
max_level=5, max_level=5,
width_scale=1.0, width_scale=1.0,
depth_scale=1.0, depth_scale=1.0,
use_reorg_input=False, use_reorg_input = False,
csp_level_mod=(), csp_level_mod=(),
activation=None, activation=None,
use_sync_bn=False, use_sync_bn=False,
...@@ -454,6 +454,9 @@ class Darknet(tf.keras.Model): ...@@ -454,6 +454,9 @@ class Darknet(tf.keras.Model):
def _build_struct(self, net, inputs): def _build_struct(self, net, inputs):
if self._use_reorg_input: if self._use_reorg_input:
inputs = nn_blocks.Reorg()(inputs) inputs = nn_blocks.Reorg()(inputs)
net[0].filters = net[1].filters
net[0].output_name = net[1].output_name
del net[1]
endpoints = collections.OrderedDict() endpoints = collections.OrderedDict()
stack_outputs = [inputs] stack_outputs = [inputs]
...@@ -666,7 +669,6 @@ class Darknet(tf.keras.Model): ...@@ -666,7 +669,6 @@ class Darknet(tf.keras.Model):
} }
return layer_config return layer_config
@factory.register_backbone_builder('darknet') @factory.register_backbone_builder('darknet')
def build_darknet( def build_darknet(
input_specs: tf.keras.layers.InputSpec, input_specs: tf.keras.layers.InputSpec,
......
...@@ -12,11 +12,66 @@ ...@@ -12,11 +12,66 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# Lint as: python3
"""Feature Pyramid Network and Path Aggregation variants used in YOLO.""" """Feature Pyramid Network and Path Aggregation variants used in YOLO."""
from typing import Mapping, Union
from official.modeling import hyperparams
import tensorflow as tf import tensorflow as tf
from official.vision.beta.projects.yolo.modeling.layers import nn_blocks from official.vision.beta.projects.yolo.modeling.layers import nn_blocks
from official.vision.beta.modeling.decoders import factory
# model configurations
# the structure is as follows. model version, {v3, v4, v#, ... etc}
# the model config type {regular, tiny, small, large, ... etc}
YOLO_MODELS = {
"v4":
dict(
regular=dict(
embed_spp=False,
use_fpn=True,
max_level_process_len=None,
path_process_len=6),
tiny=dict(
embed_spp=False,
use_fpn=False,
max_level_process_len=2,
path_process_len=1),
csp=dict(
embed_spp=False,
use_fpn=True,
max_level_process_len=None,
csp_stack=5,
fpn_depth=5,
path_process_len=6),
csp_large=dict(
embed_spp=False,
use_fpn=True,
max_level_process_len=None,
csp_stack=7,
fpn_depth=7,
path_process_len=8,
fpn_filter_scale=2),
),
"v3":
dict(
regular=dict(
embed_spp=False,
use_fpn=False,
max_level_process_len=None,
path_process_len=6),
tiny=dict(
embed_spp=False,
use_fpn=False,
max_level_process_len=2,
path_process_len=1),
spp=dict(
embed_spp=True,
use_fpn=False,
max_level_process_len=2,
path_process_len=1),
),
}
@tf.keras.utils.register_keras_serializable(package='yolo') @tf.keras.utils.register_keras_serializable(package='yolo')
class _IdentityRoute(tf.keras.layers.Layer): class _IdentityRoute(tf.keras.layers.Layer):
...@@ -487,3 +542,65 @@ class YoloDecoder(tf.keras.Model): ...@@ -487,3 +542,65 @@ class YoloDecoder(tf.keras.Model):
@classmethod @classmethod
def from_config(cls, config, custom_objects=None): def from_config(cls, config, custom_objects=None):
return cls(**config) return cls(**config)
@factory.register_decoder_builder('yolo_decoder')
def build_yolo_decoder(input_specs: Mapping[str, tf.TensorShape],
model_config: hyperparams.Config,
l2_regularizer: tf.keras.regularizers.Regularizer = None,
**kwargs) -> Union[None, tf.keras.Model, tf.keras.layers.Layer]:
"""Builds Yolo FPN/PAN decoder from a config.
Args:
input_specs: A `dict` of input specifications. A dictionary consists of
{level: TensorShape} from a backbone.
model_config: A OneOfConfig. Model config.
l2_regularizer: A `tf.keras.regularizers.Regularizer` instance. Default to
None.
Returns:
A `tf.keras.Model` instance of the Yolo FPN/PAN decoder.
"""
decoder_cfg = model_config.decoder.get()
norm_activation_config = model_config.norm_activation
activation = (
decoder_cfg.activation
if decoder_cfg.activation != "same" else
norm_activation_config.activation)
if decoder_cfg.version is None: # custom yolo
raise Exception("decoder version cannot be None, specify v3 or v4")
if decoder_cfg.version not in YOLO_MODELS:
raise Exception(
"unsupported model version please select from {v3, v4}, \n\n \
or specify a custom decoder config using YoloDecoder in you yaml")
if decoder_cfg.type == None:
decoder_cfg.type = "regular"
if decoder_cfg.type not in YOLO_MODELS[decoder_cfg.version]:
raise Exception("unsupported model type please select from \
{yolo_model.YOLO_MODELS[decoder_cfg.version].keys()},\
\n\n or specify a custom decoder config using YoloDecoder in you yaml")
base_model = YOLO_MODELS[decoder_cfg.version][decoder_cfg.type]
cfg_dict = decoder_cfg.as_dict()
for key in base_model:
if cfg_dict[key] is not None:
base_model[key] = cfg_dict[key]
base_dict = dict(
activation=activation,
use_spatial_attention=decoder_cfg.use_spatial_attention,
use_separable_conv=decoder_cfg.use_separable_conv,
use_sync_bn=norm_activation_config.use_sync_bn,
norm_momentum=norm_activation_config.norm_momentum,
norm_epsilon=norm_activation_config.norm_epsilon,
kernel_regularizer=l2_regularizer)
base_model.update(base_dict)
model = YoloDecoder(input_specs, **base_model)
return model
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment