Commit d4fb52e7 authored by Vishnu Banna's avatar Vishnu Banna
Browse files

model builds

parent c631af40
......@@ -16,6 +16,21 @@
# pylint: disable=unused-import
from official.common import registry_imports
# import configs
from official.vision.beta.projects.yolo.configs import darknet_classification
from official.vision.beta.projects.yolo.configs import yolo
# import modeling components
from official.vision.beta.projects.yolo.modeling.backbones import darknet
from official.vision.beta.projects.yolo.modeling.decoders import yolo_decoder
# import tasks
from official.vision.beta.projects.yolo.tasks import image_classification
from official.vision.beta.projects.yolo.tasks import yolo
# import optimization packages
from official.vision.beta.projects.yolo.optimization import optimizer_factory
from official.vision.beta.projects.yolo.optimization.configs import learning_rate_config
from official.vision.beta.projects.yolo.optimization.configs import optimization_config
from official.vision.beta.projects.yolo.optimization.configs import optimizer_config
\ No newline at end of file
"""Backbones configurations."""
# Import libraries
import dataclasses
from typing import Optional, List
from official.modeling import hyperparams
from official.vision.beta.configs import decoders
@dataclasses.dataclass
class YoloDecoder(hyperparams.Config):
"""if the name is specified, or version is specified we ignore
input parameters and use version and name defaults"""
version: Optional[str] = None
type: Optional[str] = None
use_fpn: Optional[bool] = None
use_spatial_attention: bool = False
use_separable_conv: bool = False
csp_stack: Optional[bool] = None
fpn_depth: Optional[int] = None
fpn_filter_scale: Optional[int] = None
path_process_len: Optional[int] = None
max_level_process_len: Optional[int] = None
embed_spp: Optional[bool] = None
activation: Optional[str] = 'same'
@dataclasses.dataclass
class Decoder(decoders.Decoder):
type: Optional[str] = 'yolo_decoder'
yolo_decoder: YoloDecoder = YoloDecoder()
runtime:
distribution_strategy: 'mirrored'
mixed_precision_dtype: 'float16'
num_gpus: 1
task:
smart_bias_lr: 0.0
model:
darknet_based_model: True
input_size: [512, 512, 3]
backbone:
type: 'darknet'
darknet:
model_id: 'darknet53'
max_level: 5
min_level: 3
decoder:
type: yolo_decoder
yolo_decoder:
version: v3
type: regular
head:
smart_bias: true
detection_generator:
box_type:
'all': original
scale_xy:
'5': 1.05
'4': 1.1
'3': 1.2
max_boxes: 200
nms_type: greedy
iou_thresh: 0.25
nms_thresh: 0.45
pre_nms_points: 500
loss:
use_scaled_loss: False
box_loss_type:
'all': ciou
ignore_thresh:
'all': 0.7
iou_normalizer:
'all': 0.07
cls_normalizer:
'all': 1.0
obj_normalizer:
'all': 1.0
objectness_smooth:
'all': 0.0
max_delta:
'all': 5.0
norm_activation:
activation: leaky
norm_epsilon: 0.0001
norm_momentum: 0.99
use_sync_bn: true
num_classes: 80
anchor_boxes:
anchors_per_scale: 3
boxes: [box: [12, 16], box: [19, 36], box: [40, 28],
box: [36, 75], box: [76, 55], box: [72, 146],
box: [142, 110], box: [192, 243], box: [459, 401]]
train_data:
global_batch_size: 64
dtype: float32
input_path: 'gs://cam2-datasets/coco/train*'
is_training: true
drop_remainder: true
seed: 1000
parser:
mosaic:
mosaic_frequency: 0.75
mixup_frequency: 0.0
mosaic_crop_mode: 'crop'
mosaic_center: 0.2
aug_scale_min: 0.2
aug_scale_max: 1.6
jitter: 0.3
max_num_instances: 200
letter_box: False
random_flip: True
aug_rand_saturation: 1.5
aug_rand_brightness: 1.5
aug_rand_hue: 0.1
aug_scale_min: 0.1
aug_scale_max: 1.9
aug_rand_translate: 0.0
jitter: 0.3
area_thresh: 0.1
random_pad: True
use_tie_breaker: True
anchor_thresh: 0.213
validation_data:
global_batch_size: 8
dtype: float32
input_path: 'gs://cam2-datasets/coco/val*'
is_training: false
drop_remainder: true
parser:
max_num_instances: 200
letter_box: False
use_tie_breaker: True
anchor_thresh: 0.213
weight_decay: 0.000
init_checkpoint: 'gs://tensorflow2/darknet/cspdarknet53-golden'
init_checkpoint_modules: 'backbone'
annotation_file: null
trainer:
train_steps: 500500 # 160 epochs at 64 batchsize -> 500500 * 64/2
validation_steps: 625
steps_per_loop: 1850
summary_interval: 1850
validation_interval: 9250
checkpoint_interval: 1850
optimizer_config:
ema:
average_decay: 0.9998
trainable_weights_only: False
dynamic_decay: True
learning_rate:
type: stepwise
stepwise:
boundaries: [400000, 450000]
name: PiecewiseConstantDecay
values: [0.00131, 0.000131, 0.0000131]
optimizer:
type: sgd_torch
sgd_torch:
momentum: 0.949
momentum_start: 0.949
nesterov: True
warmup_steps: 1000
weight_decay: 0.0005
sim_torch: true
name: SGD
warmup:
type: 'linear'
linear:
warmup_steps: 1000 #learning rate rises from 0 to 0.0013 over 1000 steps
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'bfloat16'
task:
smart_bias_lr: 0.0
model:
darknet_based_model: True
input_size: [512, 512, 3]
backbone:
type: 'darknet'
darknet:
model_id: 'darknet53'
max_level: 5
min_level: 3
decoder:
type: yolo_decoder
yolo_decoder:
version: v3
type: regular
head:
smart_bias: true
detection_generator:
box_type:
'all': original
scale_xy:
'5': 1.05
'4': 1.1
'3': 1.2
max_boxes: 200
nms_type: greedy
iou_thresh: 0.001
nms_thresh: 0.60
loss:
use_scaled_loss: False
box_loss_type:
'all': ciou
ignore_thresh:
'all': 0.7
iou_normalizer:
'all': 0.07
cls_normalizer:
'all': 1.0
obj_normalizer:
'all': 1.0
objectness_smooth:
'all': 0.0
max_delta:
'all': 5.0
norm_activation:
activation: mish
norm_epsilon: 0.0001
norm_momentum: 0.99
use_sync_bn: true
num_classes: 80
anchor_boxes:
anchors_per_scale: 3
boxes: [box: [12, 16], box: [19, 36], box: [40, 28],
box: [36, 75], box: [76, 55], box: [72, 146],
box: [142, 110], box: [192, 243], box: [459, 401]]
train_data:
global_batch_size: 64
dtype: float32
input_path: 'gs://cam2-datasets/coco/train*'
is_training: true
drop_remainder: true
seed: 1000
parser:
mosaic:
mosaic_frequency: 1.0
mixup_frequency: 0.0
mosaic_crop_mode: 'scale'
mosaic_center: 0.25
aug_scale_min: 0.1
aug_scale_max: 1.9
max_num_instances: 200
letter_box: True
random_flip: True
aug_rand_saturation: 0.7
aug_rand_brightness: 0.4
aug_rand_hue: 0.015
aug_rand_translate: 0.1
area_thresh: 0.1
random_pad: False
use_tie_breaker: True
anchor_thresh: 0.213
validation_data:
global_batch_size: 8
dtype: float32
input_path: 'gs://cam2-datasets/coco/val*'
is_training: false
drop_remainder: true
parser:
max_num_instances: 300
letter_box: True
use_tie_breaker: True
anchor_thresh: 0.213
weight_decay: 0.000
init_checkpoint: 'gs://tensorflow2/darknet/cspdarknet53-golden'
init_checkpoint_modules: 'backbone'
annotation_file: null
trainer:
train_steps: 500500 # 160 epochs at 64 batchsize -> 500500 * 64/2
validation_steps: 625
steps_per_loop: 1850
summary_interval: 1850
validation_interval: 9250
checkpoint_interval: 1850
optimizer_config:
ema:
average_decay: 0.9998
trainable_weights_only: False
dynamic_decay: True
learning_rate:
type: stepwise
stepwise:
boundaries: [400000, 450000]
name: PiecewiseConstantDecay
values: [0.00131, 0.000131, 0.0000131]
optimizer:
type: sgd_torch
sgd_torch:
momentum: 0.949
momentum_start: 0.949
nesterov: True
warmup_steps: 1000
weight_decay: 0.0005
sim_torch: true
name: SGD
warmup:
type: 'linear'
linear:
warmup_steps: 1000 #learning rate rises from 0 to 0.0013 over 1000 steps
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'bfloat16'
task:
smart_bias_lr: 0.0
model:
darknet_based_model: True
input_size: [512, 512, 3]
backbone:
type: 'darknet'
darknet:
model_id: 'darknet53'
max_level: 5
min_level: 3
decoder:
type: yolo_decoder
yolo_decoder:
version: v3
type: regular
head:
smart_bias: true
detection_generator:
box_type:
'all': original
scale_xy:
'5': 1.05
'4': 1.1
'3': 1.2
max_boxes: 200
nms_type: greedy
iou_thresh: 0.001
nms_thresh: 0.60
loss:
use_scaled_loss: False
box_loss_type:
'all': ciou
ignore_thresh:
'all': 0.7
iou_normalizer:
'all': 0.07
cls_normalizer:
'all': 1.0
obj_normalizer:
'all': 1.0
objectness_smooth:
'all': 0.0
max_delta:
'all': 5.0
norm_activation:
activation: leaky
norm_epsilon: 0.0001
norm_momentum: 0.99
use_sync_bn: true
num_classes: 80
anchor_boxes:
anchors_per_scale: 3
boxes: [box: [12, 16], box: [19, 36], box: [40, 28],
box: [36, 75], box: [76, 55], box: [72, 146],
box: [142, 110], box: [192, 243], box: [459, 401]]
train_data:
global_batch_size: 64
dtype: float32
input_path: 'gs://cam2-datasets/coco/train*'
is_training: true
drop_remainder: true
seed: 1000
parser:
mosaic:
mosaic_frequency: 0.75
mixup_frequency: 0.0
mosaic_crop_mode: 'crop'
mosaic_center: 0.2
aug_scale_min: 0.2
aug_scale_max: 1.6
jitter: 0.3
max_num_instances: 200
letter_box: False
random_flip: True
aug_rand_saturation: 1.5
aug_rand_brightness: 1.5
aug_rand_hue: 0.1
aug_scale_min: 0.1
aug_scale_max: 1.9
aug_rand_translate: 0.0
jitter: 0.3
area_thresh: 0.1
random_pad: True
use_tie_breaker: True
anchor_thresh: 0.213
validation_data:
global_batch_size: 8
dtype: float32
input_path: 'gs://cam2-datasets/coco/val*'
is_training: false
drop_remainder: true
parser:
max_num_instances: 200
letter_box: False
use_tie_breaker: True
anchor_thresh: 0.213
weight_decay: 0.000
init_checkpoint: 'gs://tensorflow2/darknet/cspdarknet53-golden'
init_checkpoint_modules: 'backbone'
annotation_file: null
trainer:
train_steps: 500500 # 160 epochs at 64 batchsize -> 500500 * 64/2
validation_steps: 625
steps_per_loop: 1850
summary_interval: 1850
validation_interval: 9250
checkpoint_interval: 1850
optimizer_config:
ema:
average_decay: 0.9998
trainable_weights_only: False
dynamic_decay: True
learning_rate:
type: stepwise
stepwise:
boundaries: [400000, 450000]
name: PiecewiseConstantDecay
values: [0.00131, 0.000131, 0.0000131]
optimizer:
type: sgd_torch
sgd_torch:
momentum: 0.949
momentum_start: 0.949
nesterov: True
warmup_steps: 1000
weight_decay: 0.0005
sim_torch: true
name: SGD
warmup:
type: 'linear'
linear:
warmup_steps: 1000 #learning rate rises from 0 to 0.0013 over 1000 steps
runtime:
distribution_strategy: 'mirrored'
mixed_precision_dtype: 'float16'
num_gpus: 1
task:
smart_bias_lr: 0.1
model:
darknet_based_model: False
input_size: [640, 640, 3]
backbone:
type: 'darknet'
darknet:
model_id: 'altered_cspdarknet53'
max_level: 5
min_level: 3
decoder:
type: yolo_decoder
yolo_decoder:
version: v4
type: csp
head:
smart_bias: true
detection_generator:
box_type:
'all': scaled
scale_xy:
'all': 2.0
max_boxes: 300
nms_type: greedy
iou_thresh: 0.25
nms_thresh: 0.45
pre_nms_points: 500
loss:
use_scaled_loss: true
update_on_repeat: true
box_loss_type:
'all': ciou
ignore_thresh:
'all': 0.7
iou_normalizer:
'all': 0.05
cls_normalizer:
'all': 0.3
obj_normalizer:
'5': 0.28
'4': 0.70
'3': 2.80
objectness_smooth:
'all': 1.0
num_classes: 80
anchor_boxes:
anchors_per_scale: 3
boxes: [box: [12, 16], box: [19, 36], box: [40, 28],
box: [36, 75], box: [76, 55], box: [72, 146],
box: [142, 110], box: [192, 243], box: [459, 401]]
train_data:
global_batch_size: 1
input_path: '/media/vbanna/DATA_SHARE/CV/datasets/COCO_raw/records/train*'
shuffle_buffer_size: 10000
parser:
mosaic:
mosaic_frequency: 1.0
mixup_frequency: 0.0
mosaic_crop_mode: 'scale'
mosaic_center: 0.25
aug_scale_min: 0.1
aug_scale_max: 1.9
letter_box: True
random_flip: True
aug_rand_translate: 0.1
area_thresh: 0.1
validation_data:
global_batch_size: 1
input_path: '/media/vbanna/DATA_SHARE/CV/datasets/COCO_raw/records/val*'
\ No newline at end of file
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'float32'
tpu_enable_xla_dynamic_padder: false
task:
smart_bias_lr: 0.1
model:
darknet_based_model: False
input_size: [640, 640, 3]
backbone:
type: 'darknet'
darknet:
model_id: 'altered_cspdarknet53'
max_level: 5
min_level: 3
decoder:
type: yolo_decoder
yolo_decoder:
version: v4
type: csp
head:
smart_bias: true
detection_generator:
box_type:
'all': scaled
scale_xy:
'all': 2.0
max_boxes: 300
nms_type: greedy
iou_thresh: 0.001
nms_thresh: 0.60
loss:
use_scaled_loss: true
update_on_repeat: true
box_loss_type:
'all': ciou
ignore_thresh:
'all': 0.7
iou_normalizer:
'all': 0.05
cls_normalizer:
'all': 0.3
obj_normalizer:
'5': 0.28
'4': 0.70
'3': 2.80
objectness_smooth:
'all': 1.0
norm_activation:
activation: mish
norm_epsilon: 0.0001
norm_momentum: 0.97
use_sync_bn: true
num_classes: 80
anchor_boxes:
anchors_per_scale: 3
boxes: [box: [12, 16], box: [19, 36], box: [40, 28],
box: [36, 75], box: [76, 55], box: [72, 146],
box: [142, 110], box: [192, 243], box: [459, 401]]
train_data:
global_batch_size: 64
dtype: float32
input_path: 'gs://cam2-datasets/coco/train*'
is_training: true
shuffle_buffer_size: 10000
drop_remainder: true
seed: 1000
parser:
mosaic:
mosaic_frequency: 1.0
mixup_frequency: 0.0
mosaic_crop_mode: 'scale'
mosaic_center: 0.25
aug_scale_min: 0.1
aug_scale_max: 1.9
max_num_instances: 300
letter_box: True
random_flip: True
aug_rand_saturation: 0.7
aug_rand_brightness: 0.4
aug_rand_hue: 0.015
aug_rand_translate: 0.1
area_thresh: 0.1
random_pad: False
use_tie_breaker: True
anchor_thresh: 4.0
best_match_only: True
validation_data:
global_batch_size: 8
dtype: float32
input_path: 'gs://cam2-datasets/coco/val*'
is_training: false
shuffle_buffer_size: 10
drop_remainder: true
parser:
max_num_instances: 300
letter_box: True
use_tie_breaker: True
anchor_thresh: 4.0
best_match_only: True
weight_decay: 0.000
annotation_file: null
trainer:
train_steps: 555000 # 160 epochs at 64 batchsize -> 500500 * 64/2
validation_steps: 625
steps_per_loop: 1850
summary_interval: 1850
validation_interval: 1850
checkpoint_interval: 1850
optimizer_config:
ema:
average_decay: 0.9999
trainable_weights_only: False
dynamic_decay: True
learning_rate:
type: cosine
cosine:
initial_learning_rate: 0.01
name: Cosine
alpha: 0.2
decay_steps: 555000
optimizer:
type: sgd_torch
sgd_torch:
momentum: 0.937
momentum_start: 0.8
nesterov: True
warmup_steps: 5550
weight_decay: 0.0005
sim_torch: true
name: SGD
warmup:
type: 'linear'
linear:
warmup_steps: 5550 #learning rate rises from 0 to 0.0013 over 1000 steps
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'float32'
tpu_enable_xla_dynamic_padder: false
task:
model:
input_size: [640, 640, 3]
backbone:
type: 'darknet'
darknet:
model_id: 'altered_cspdarknet53'
max_level: 5
min_level: 3
decoder:
type: yolo_decoder
yolo_decoder:
version: v4
type: csp
head:
smart_bias: true
detection_generator:
box_type:
'all': scaled
scale_xy:
'all': 2.0
max_boxes: 300
nms_type: iou
iou_thresh: 0.001
nms_thresh: 0.60
loss:
use_scaled_loss: true
update_on_repeat: true
box_loss_type:
'all': ciou
ignore_thresh:
'all': 0.0
iou_normalizer:
'all': 0.05
cls_normalizer:
'all': 0.3
obj_normalizer:
'5': 0.28
'4': 0.70
'3': 2.80
objectness_smooth:
'all': 1.0
norm_activation:
use_sync_bn: true
num_classes: 80
anchor_boxes:
anchors_per_scale: 3
boxes: [box: [12, 16], box: [19, 36], box: [40, 28],
box: [36, 75], box: [76, 55], box: [72, 146],
box: [142, 110], box: [192, 243], box: [459, 401]]
train_data:
input_path: 'gs://cam2-datasets/coco/train*'
shuffle_buffer_size: 10000
parser:
mosaic:
mosaic_frequency: 1.0
mixup_frequency: 0.0
mosaic_crop_mode: 'scale'
mosaic_center: 0.25
aug_scale_min: 0.1
aug_scale_max: 1.9
max_num_instances: 300
letter_box: True
random_flip: True
aug_rand_translate: 0.1
area_thresh: 0.1
validation_data:
input_path: 'gs://cam2-datasets/coco/val*'
\ No newline at end of file
runtime:
distribution_strategy: 'mirrored'
mixed_precision_dtype: 'float16'
num_gpus: 1
task:
smart_bias_lr: 0.0
model:
darknet_based_model: True
input_size: [512, 512, 3]
backbone:
type: 'swin'
swin:
min_level: 3
max_level: 5
patch_size: 4
embed_dims: 96
window_size: [7, 7, 7, 7]
depths: [2, 2, 6, 2]
num_heads: [3, 6, 12, 24]
drop_path: 0.0
absolute_positional_embed: False
decoder:
type: yolo_decoder
yolo_decoder:
version: v4
type: csp
activation: leaky
head:
smart_bias: true
detection_generator:
box_type:
'all': original
scale_xy:
'5': 1.05
'4': 1.1
'3': 1.2
max_boxes: 200
nms_type: greedy
iou_thresh: 0.25
nms_thresh: 0.45
pre_nms_points: 500
loss:
use_scaled_loss: False
box_loss_type:
'all': ciou
ignore_thresh:
'all': 0.7
iou_normalizer:
'all': 0.07
cls_normalizer:
'all': 1.0
obj_normalizer:
'all': 1.0
objectness_smooth:
'all': 0.0
max_delta:
'all': 5.0
norm_activation:
activation: gelu
norm_epsilon: 0.0001
norm_momentum: 0.99
use_sync_bn: false
num_classes: 80
anchor_boxes:
anchors_per_scale: 3
boxes: [box: [12, 16], box: [19, 36], box: [40, 28],
box: [36, 75], box: [76, 55], box: [72, 146],
box: [142, 110], box: [192, 243], box: [459, 401]]
train_data:
global_batch_size: 4
dtype: float16
input_path: '/media/vbanna/DATA_SHARE/CV/datasets/COCO_raw/records/train*'
is_training: true
drop_remainder: true
seed: 1000
parser:
mosaic:
mosaic_frequency: 0.6
mixup_frequency: 0.0
mosaic_crop_mode: 'crop'
mosaic_center: 0.2
aug_scale_min: 0.2
aug_scale_max: 1.6
jitter: 0.3
max_num_instances: 200
letter_box: True
random_flip: True
aug_rand_saturation: 1.5
aug_rand_brightness: 1.5
aug_rand_hue: 0.1
aug_scale_min: 1.0
aug_scale_max: 1.0
aug_rand_translate: 0.0
jitter: 0.3
area_thresh: 0.1
random_pad: True
use_tie_breaker: True
anchor_thresh: 0.213
validation_data:
global_batch_size: 8
dtype: float16
input_path: '/media/vbanna/DATA_SHARE/CV/datasets/COCO_raw/records/val*'
is_training: false
drop_remainder: true
parser:
max_num_instances: 200
letter_box: True
use_tie_breaker: True
anchor_thresh: 0.213
weight_decay: 0.000
init_checkpoint: '../checkpoints/swin-baseline-3'
init_checkpoint_modules: 'backbone'
annotation_file: null
trainer:
train_steps: 500500 # 160 epochs at 64 batchsize -> 500500 * 64/2
validation_steps: 625
steps_per_loop: 10
summary_interval: 10
validation_interval: 9250
checkpoint_interval: 1850
optimizer_config:
ema: null
learning_rate:
type: stepwise
stepwise:
boundaries: [400000, 450000]
name: PiecewiseConstantDecay
values: [0.00131, 0.000131, 0.0000131]
optimizer:
type: sgd_torch
sgd_torch:
momentum: 0.949
momentum_start: 0.949
nesterov: True
warmup_steps: 1000
weight_decay: 0.0005
sim_torch: true
name: SGD
warmup:
type: 'linear'
linear:
warmup_steps: 1000 #learning rate rises from 0 to 0.0013 over 1000 steps
runtime:
distribution_strategy: 'mirrored'
mixed_precision_dtype: 'float16'
num_gpus: 1
task:
smart_bias_lr: 0.0
model:
darknet_based_model: True
input_size: [512, 512, 3]
backbone:
type: 'darknet'
darknet:
model_id: 'cspdarknet53'
max_level: 5
min_level: 3
decoder:
type: yolo_decoder
yolo_decoder:
version: v4
type: regular
activation: leaky
head:
smart_bias: true
detection_generator:
box_type:
'all': original
scale_xy:
'5': 1.05
'4': 1.1
'3': 1.2
max_boxes: 200
nms_type: greedy
iou_thresh: 0.25
nms_thresh: 0.45
pre_nms_points: 500
loss:
use_scaled_loss: False
box_loss_type:
'all': ciou
ignore_thresh:
'all': 0.7
iou_normalizer:
'all': 0.07
cls_normalizer:
'all': 1.0
obj_normalizer:
'all': 1.0
objectness_smooth:
'all': 0.0
max_delta:
'all': 5.0
norm_activation:
activation: mish
norm_epsilon: 0.0001
norm_momentum: 0.99
use_sync_bn: true
num_classes: 80
anchor_boxes:
anchors_per_scale: 3
boxes: [box: [12, 16], box: [19, 36], box: [40, 28],
box: [36, 75], box: [76, 55], box: [72, 146],
box: [142, 110], box: [192, 243], box: [459, 401]]
train_data:
# global_batch_size: 64
# dtype: float32
input_path: '/media/vbanna/DATA_SHARE/CV/datasets/COCO_raw/records/train*'
# is_training: true
# drop_remainder: true
# seed: 1000
parser:
mosaic:
mosaic_frequency: 1.0
mixup_frequency: 0.0
mosaic_crop_mode: 'scale'
mosaic_center: 0.25
aug_scale_min: 0.1
aug_scale_max: 1.9
max_num_instances: 200
letter_box: True
random_flip: True
aug_rand_translate: 0.1
random_pad: False
validation_data:
# global_batch_size: 1
# dtype: float32
input_path: '/media/vbanna/DATA_SHARE/CV/datasets/COCO_raw/records/val*'
# is_training: false
# drop_remainder: true
# parser:
# max_num_instances: 200
# letter_box: True
# use_tie_breaker: True
# anchor_thresh: 0.213
# weight_decay: 0.000
# init_checkpoint: '../checkpoints/512-wd-baseline-e1'
# init_checkpoint_modules: 'all'
# annotation_file: null
# trainer:
# train_steps: 500500 # 160 epochs at 64 batchsize -> 500500 * 64/2
# validation_steps: 625
# steps_per_loop: 1850
# summary_interval: 1850
# validation_interval: 9250
# checkpoint_interval: 1850
# optimizer_config:
# ema:
# average_decay: 0.9998
# trainable_weights_only: False
# dynamic_decay: True
# learning_rate:
# type: stepwise
# stepwise:
# boundaries: [400000, 450000]
# name: PiecewiseConstantDecay
# values: [0.00131, 0.000131, 0.0000131]
# optimizer:
# type: sgd_torch
# sgd_torch:
# momentum: 0.949
# momentum_start: 0.949
# nesterov: True
# warmup_steps: 1000
# weight_decay: 0.0005
# sim_torch: true
# name: SGD
# warmup:
# type: 'linear'
# linear:
# warmup_steps: 1000 #learning rate rises from 0 to 0.0013 over 1000 steps
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'bfloat16'
task:
smart_bias_lr: 0.0
model:
darknet_based_model: True
input_size: [512, 512, 3]
backbone:
type: 'darknet'
darknet:
model_id: 'cspdarknet53'
max_level: 5
min_level: 3
decoder:
type: yolo_decoder
yolo_decoder:
version: v4
type: regular
activation: leaky
head:
smart_bias: true
detection_generator:
box_type:
'all': original
scale_xy:
'5': 1.05
'4': 1.1
'3': 1.2
max_boxes: 200
nms_type: iou
iou_thresh: 0.001
nms_thresh: 0.60
loss:
use_scaled_loss: False
box_loss_type:
'all': ciou
ignore_thresh:
'all': 0.7
iou_normalizer:
'all': 0.07
cls_normalizer:
'all': 1.0
obj_normalizer:
'all': 1.0
objectness_smooth:
'all': 0.0
max_delta:
'all': 5.0
norm_activation:
activation: mish
norm_epsilon: 0.0001
norm_momentum: 0.99
use_sync_bn: true
num_classes: 80
anchor_boxes:
anchors_per_scale: 3
boxes: [box: [12, 16], box: [19, 36], box: [40, 28],
box: [36, 75], box: [76, 55], box: [72, 146],
box: [142, 110], box: [192, 243], box: [459, 401]]
train_data:
input_path: 'gs://cam2-datasets/coco/train*'
parser:
mosaic:
mosaic_frequency: 1.0
mixup_frequency: 0.0
mosaic_crop_mode: 'scale'
mosaic_center: 0.25
aug_scale_min: 0.1
aug_scale_max: 1.9
jitter: 0.3
max_num_instances: 200
letter_box: False
random_flip: True
aug_rand_translate: 0.1
random_pad: False
validation_data:
input_path: 'gs://cam2-datasets/coco/val*'
parser:
letter_box: False
weight_decay: 0.000
init_checkpoint: 'gs://tensorflow2/darknet/cspdarknet53-golden'
init_checkpoint_modules: 'backbone'
annotation_file: null
# trainer:
# train_steps: 500500 # 160 epochs at 64 batchsize -> 500500 * 64/2
# validation_steps: 625
# steps_per_loop: 1850
# summary_interval: 1850
# validation_interval: 9250
# checkpoint_interval: 1850
# optimizer_config:
# ema:
# average_decay: 0.9998
# trainable_weights_only: False
# dynamic_decay: True
# learning_rate:
# type: stepwise
# stepwise:
# boundaries: [400000, 450000]
# values: [0.00131, 0.000131, 0.0000131]
# optimizer:
# type: sgd_torch
# sgd_torch:
# momentum: 0.949
# momentum_start: 0.949
# nesterov: True
# warmup_steps: 1000
# weight_decay: 0.0005
# sim_torch: true
# name: SGD
# warmup:
# type: 'linear'
# linear:
# warmup_steps: 1000 #learning rate rises from 0 to 0.0013 over 1000 steps
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'bfloat16'
task:
smart_bias_lr: 0.0
model:
darknet_based_model: True
input_size: [512, 512, 3]
backbone:
type: 'swin'
swin:
min_level: 3
max_level: 5
patch_size: 4
embed_dims: 96
window_size: [7, 7, 7, 7]
depths: [2, 2, 6, 2]
num_heads: [3, 6, 12, 24]
drop_path: 0.0
absolute_positional_embed: False
decoder:
type: yolo_decoder
yolo_decoder:
version: v4
type: csp
activation: leaky
head:
smart_bias: true
detection_generator:
box_type:
'all': original
scale_xy:
'5': 1.05
'4': 1.1
'3': 1.2
max_boxes: 200
nms_type: greedy
iou_thresh: 0.001
nms_thresh: 0.60
loss:
use_scaled_loss: False
box_loss_type:
'all': ciou
ignore_thresh:
'all': 0.7
iou_normalizer:
'all': 0.07
cls_normalizer:
'all': 1.0
obj_normalizer:
'all': 1.0
objectness_smooth:
'all': 0.0
max_delta:
'all': 5.0
norm_activation:
activation: mish
norm_epsilon: 0.0001
norm_momentum: 0.99
use_sync_bn: true
num_classes: 80
anchor_boxes:
anchors_per_scale: 3
boxes: [box: [12, 16], box: [19, 36], box: [40, 28],
box: [36, 75], box: [76, 55], box: [72, 146],
box: [142, 110], box: [192, 243], box: [459, 401]]
train_data:
global_batch_size: 64
dtype: float32
input_path: 'gs://cam2-datasets/coco/train*'
is_training: true
drop_remainder: true
seed: 1000
parser:
mosaic:
mosaic_frequency: 1.0
mixup_frequency: 0.0
mosaic_crop_mode: 'scale'
mosaic_center: 0.25
aug_scale_min: 0.1
aug_scale_max: 1.9
max_num_instances: 200
letter_box: True
random_flip: True
aug_rand_saturation: 0.7
aug_rand_brightness: 0.4
aug_rand_hue: 0.015
aug_rand_translate: 0.1
area_thresh: 0.1
random_pad: False
use_tie_breaker: True
anchor_thresh: 0.213
validation_data:
global_batch_size: 8
dtype: float32
input_path: 'gs://cam2-datasets/coco/val*'
is_training: false
drop_remainder: true
parser:
max_num_instances: 300
letter_box: True
use_tie_breaker: True
anchor_thresh: 0.213
weight_decay: 0.000
init_checkpoint: 'gs://tensorflow2/darknet/cspdarknet53-golden'
init_checkpoint_modules: 'backbone'
annotation_file: null
trainer:
train_steps: 500500 # 160 epochs at 64 batchsize -> 500500 * 64/2
validation_steps: 625
steps_per_loop: 1850
summary_interval: 1850
validation_interval: 9250
checkpoint_interval: 1850
optimizer_config:
ema: null
learning_rate:
type: stepwise
stepwise:
boundaries: [400000, 450000]
name: PiecewiseConstantDecay
values: [0.00131, 0.000131, 0.0000131]
optimizer:
type: sgd_torch
sgd_torch:
momentum: 0.949
momentum_start: 0.949
nesterov: True
warmup_steps: 1000
weight_decay: 0.0005
sim_torch: true
name: SGD
warmup:
type: 'linear'
linear:
warmup_steps: 1000 #learning rate rises from 0 to 0.0013 over 1000 steps
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'bfloat16'
task:
smart_bias_lr: 0.0
model:
darknet_based_model: True
input_size: [512, 512, 3]
backbone:
type: 'darknet'
darknet:
model_id: 'cspdarknet53'
max_level: 5
min_level: 3
decoder:
type: yolo_decoder
yolo_decoder:
version: v4
type: regular
activation: leaky
head:
smart_bias: true
detection_generator:
box_type:
'all': original
scale_xy:
'5': 1.05
'4': 1.1
'3': 1.2
max_boxes: 200
nms_type: greedy
iou_thresh: 0.001
nms_thresh: 0.60
loss:
use_scaled_loss: False
box_loss_type:
'all': ciou
ignore_thresh:
'all': 0.7
iou_normalizer:
'all': 0.07
cls_normalizer:
'all': 1.0
obj_normalizer:
'all': 1.0
objectness_smooth:
'all': 0.0
max_delta:
'all': 5.0
norm_activation:
activation: mish
norm_epsilon: 0.0001
norm_momentum: 0.99
use_sync_bn: true
num_classes: 80
anchor_boxes:
anchors_per_scale: 3
boxes: [box: [12, 16], box: [19, 36], box: [40, 28],
box: [36, 75], box: [76, 55], box: [72, 146],
box: [142, 110], box: [192, 243], box: [459, 401]]
train_data:
global_batch_size: 64
dtype: float32
input_path: 'gs://cam2-datasets/coco/train*'
is_training: true
drop_remainder: true
seed: 1000
parser:
mosaic:
mosaic_frequency: 0.75
mixup_frequency: 0.0
mosaic_crop_mode: 'crop'
mosaic_center: 0.2
aug_scale_min: 0.2
aug_scale_max: 1.6
jitter: 0.3
max_num_instances: 200
letter_box: False
random_flip: True
aug_rand_saturation: 1.5
aug_rand_brightness: 1.5
aug_rand_hue: 0.1
aug_scale_min: 0.1
aug_scale_max: 1.9
aug_rand_translate: 0.0
jitter: 0.3
area_thresh: 0.1
random_pad: True
use_tie_breaker: True
anchor_thresh: 0.213
validation_data:
global_batch_size: 8
dtype: float32
input_path: 'gs://cam2-datasets/coco/val*'
is_training: false
drop_remainder: true
parser:
max_num_instances: 200
letter_box: False
use_tie_breaker: True
anchor_thresh: 0.213
weight_decay: 0.000
init_checkpoint: 'gs://tensorflow2/darknet/cspdarknet53-golden'
init_checkpoint_modules: 'backbone'
annotation_file: null
trainer:
train_steps: 500500 # 160 epochs at 64 batchsize -> 500500 * 64/2
validation_steps: 625
steps_per_loop: 1850
summary_interval: 1850
validation_interval: 9250
checkpoint_interval: 1850
optimizer_config:
ema:
average_decay: 0.9998
trainable_weights_only: False
dynamic_decay: True
learning_rate:
type: stepwise
stepwise:
boundaries: [400000, 450000]
name: PiecewiseConstantDecay
values: [0.00131, 0.000131, 0.0000131]
optimizer:
type: sgd_torch
sgd_torch:
momentum: 0.949
momentum_start: 0.949
nesterov: True
warmup_steps: 1000
weight_decay: 0.0005
sim_torch: true
name: SGD
warmup:
type: 'linear'
linear:
warmup_steps: 1000 #learning rate rises from 0 to 0.0013 over 1000 steps
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""YOLO configuration definition."""
from typing import List, Optional, Union
from official.core import exp_factory
from official.modeling import hyperparams
from official.modeling.hyperparams import config_definitions as cfg
from official.vision.beta.configs import common
from official.vision.beta.projects.yolo import optimization
from official.vision.beta.projects.yolo.configs import backbones
from official.vision.beta.projects.yolo.configs import decoders
import numpy as np
import dataclasses
MIN_LEVEL = 1
MAX_LEVEL = 7
def _build_dict(min_level, max_level, value):
vals = {str(key): value for key in range(min_level, max_level + 1)}
vals["all"] = None
return lambda: vals
def _build_path_scales(min_level, max_level):
return lambda: {str(key): 2**key for key in range(min_level, max_level + 1)}
@dataclasses.dataclass
class FPNConfig(hyperparams.Config):
def get(self):
values = self.as_dict()
if "all" in values and values["all"] is not None:
for key in values:
if key != 'all':
values[key] = values["all"]
return values
# pylint: disable=missing-class-docstring
@dataclasses.dataclass
class TfExampleDecoder(hyperparams.Config):
regenerate_source_id: bool = False
coco91_to_80: bool = True
@dataclasses.dataclass
class TfExampleDecoderLabelMap(hyperparams.Config):
regenerate_source_id: bool = False
label_map: str = ''
@dataclasses.dataclass
class DataDecoder(hyperparams.OneOfConfig):
type: Optional[str] = 'simple_decoder'
simple_decoder: TfExampleDecoder = TfExampleDecoder()
label_map_decoder: TfExampleDecoderLabelMap = TfExampleDecoderLabelMap()
@dataclasses.dataclass
class Mosaic(hyperparams.Config):
mosaic_frequency: float = 0.0
mixup_frequency: float = 0.0
mosaic_center: float = 0.2
mosaic_crop_mode: Optional[str] = None
aug_scale_min: float = 1.0
aug_scale_max: float = 1.0
jitter: float = 0.0
@dataclasses.dataclass
class Parser(hyperparams.Config):
max_num_instances: int = 200
letter_box: Optional[bool] = True
random_flip: bool = True
random_pad: float = False
jitter: float = 0.0
aug_scale_min: float = 1.0
aug_scale_max: float = 1.0
aug_rand_saturation: float = 0.0
aug_rand_brightness: float = 0.0
aug_rand_hue: float = 0.0
aug_rand_angle: float = 0.0
aug_rand_translate: float = 0.0
aug_rand_perspective: float = 0.0
use_tie_breaker: bool = True
best_match_only: bool = False
anchor_thresh: float = -0.01
area_thresh: float = 0.1
mosaic: Mosaic = Mosaic()
@dataclasses.dataclass
class DataConfig(cfg.DataConfig):
"""Input config for training."""
global_batch_size: int = 64
input_path: str = ''
tfds_name: str = None
tfds_split: str = None
global_batch_size: int = 1
is_training: bool = True
dtype: str = 'float16'
decoder: DataDecoder = DataDecoder()
parser: Parser = Parser()
shuffle_buffer_size: int = 10000
tfds_download: bool = True
cache: bool = False
@dataclasses.dataclass
class YoloHead(hyperparams.Config):
"""Parameterization for the YOLO Head."""
smart_bias: bool = True
@dataclasses.dataclass
class YoloDetectionGenerator(hyperparams.Config):
box_type: FPNConfig = dataclasses.field(
default_factory=_build_dict(MIN_LEVEL, MAX_LEVEL, "original"))
scale_xy: FPNConfig = dataclasses.field(
default_factory=_build_dict(MIN_LEVEL, MAX_LEVEL, 1.0))
path_scales: FPNConfig = dataclasses.field(
default_factory=_build_path_scales(MIN_LEVEL, MAX_LEVEL))
nms_type: str = 'greedy'
iou_thresh: float = 0.001
nms_thresh: float = 0.6
max_boxes: int = 200
pre_nms_points: int = 5000
@dataclasses.dataclass
class YoloLoss(hyperparams.Config):
ignore_thresh: FPNConfig = dataclasses.field(
default_factory=_build_dict(MIN_LEVEL, MAX_LEVEL, 0.0))
truth_thresh: FPNConfig = dataclasses.field(
default_factory=_build_dict(MIN_LEVEL, MAX_LEVEL, 1.0))
box_loss_type: FPNConfig = dataclasses.field(
default_factory=_build_dict(MIN_LEVEL, MAX_LEVEL, 'ciou'))
iou_normalizer: FPNConfig = dataclasses.field(
default_factory=_build_dict(MIN_LEVEL, MAX_LEVEL, 1.0))
cls_normalizer: FPNConfig = dataclasses.field(
default_factory=_build_dict(MIN_LEVEL, MAX_LEVEL, 1.0))
obj_normalizer: FPNConfig = dataclasses.field(
default_factory=_build_dict(MIN_LEVEL, MAX_LEVEL, 1.0))
max_delta: FPNConfig = dataclasses.field(
default_factory=_build_dict(MIN_LEVEL, MAX_LEVEL, np.inf))
objectness_smooth: FPNConfig = dataclasses.field(
default_factory=_build_dict(MIN_LEVEL, MAX_LEVEL, 0.0))
label_smoothing: float = 0.0
use_scaled_loss: bool = True
update_on_repeat: bool = True
class Box(hyperparams.Config):
box: List[int] = dataclasses.field(default=list)
@dataclasses.dataclass
class AnchorBoxes(hyperparams.Config):
boxes: List[Box] = None
level_limits: Optional[List[int]] = None
anchors_per_scale: int = 3
def get(self, min_level, max_level):
if self.level_limits is None:
boxes = [box.box for box in self.boxes]
else:
boxes = [[1.0, 1.0]] * ((max_level - min_level) + 1)
self.anchors_per_scale = 1
anchors_per_level = dict()
start = 0
for i in range(min_level, max_level + 1):
anchors_per_level[str(i)] = boxes[start:start + self.anchors_per_scale]
start += self.anchors_per_scale
return anchors_per_level, self.level_limits
@dataclasses.dataclass
class Yolo(hyperparams.Config):
input_size: Optional[List[int]] = dataclasses.field(
default_factory=lambda: [512, 512, 3])
backbone: backbones.Backbone = backbones.Backbone(
type='darknet', darknet=backbones.Darknet(model_id='cspdarknet53'))
decoder: decoders.Decoder = decoders.Decoder(
type='yolo_decoder', yolo_decoder=decoders.YoloDecoder(
version='v4', type='regular'
)
)
head: YoloHead = YoloHead()
detection_generator: YoloDetectionGenerator = YoloDetectionGenerator()
loss: YoloLoss = YoloLoss()
norm_activation: common.NormActivation = common.NormActivation(
activation='mish',
use_sync_bn=True,
norm_momentum=0.99,
norm_epsilon=0.001)
num_classes: int = 80
anchor_boxes: AnchorBoxes = AnchorBoxes()
darknet_based_model: bool = False
@dataclasses.dataclass
class YoloTask(cfg.TaskConfig):
per_category_metrics: bool = False
smart_bias_lr: float = 0.0
model: Yolo = Yolo()
train_data: DataConfig = DataConfig(is_training=True)
validation_data: DataConfig = DataConfig(is_training=False)
weight_decay: float = 0.0
annotation_file: Optional[str] = None
init_checkpoint: Optional[str] = None
init_checkpoint_modules: Union[
str, List[str]] = 'all' # all, backbone, and/or decoder
gradient_clip_norm: float = 0.0
COCO_INPUT_PATH_BASE = 'coco'
COCO_TRAIN_EXAMPLES = 118287
COCO_VAL_EXAMPLES = 5000
GLOBAL_SEED = 1000
@exp_factory.register_config_factory('yolo')
def yolo() -> cfg.ExperimentConfig:
"""Yolo general config."""
return cfg.ExperimentConfig(
task=YoloTask(),
restrictions=[
'task.train_data.is_training != None',
'task.validation_data.is_training != None'
])
@exp_factory.register_config_factory('yolo_darknet')
def yolo_darknet() -> cfg.ExperimentConfig:
"""COCO object detection with YOLOv3 and v4"""
train_batch_size = 64
eval_batch_size = 8
train_epochs = 300
steps_per_epoch = COCO_TRAIN_EXAMPLES // train_batch_size
validation_interval = 5
max_num_instances = 200
config = cfg.ExperimentConfig(
runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'),
task=YoloTask(
smart_bias_lr=0.1,
init_checkpoint='',
init_checkpoint_modules='backbone',
annotation_file=None,
weight_decay=0.0,
model=Yolo(
darknet_based_model = True,
norm_activation=common.NormActivation(use_sync_bn=True),
head=YoloHead(smart_bias=True),
loss=YoloLoss(use_scaled_loss=False, update_on_repeat=True)),
train_data=DataConfig(
is_training=True,
global_batch_size=train_batch_size,
seed=GLOBAL_SEED,
dtype='float32',
parser=Parser(
letter_box=False,
aug_rand_saturation= 1.5,
aug_rand_brightness= 1.5,
aug_rand_hue= 0.1,
use_tie_breaker=True,
best_match_only=False,
anchor_thresh=0.213,
area_thresh=0.1,
max_num_instances=max_num_instances,
mosaic=Mosaic(
mosaic_frequency= 0.75,
mixup_frequency= 0.0,
mosaic_crop_mode= 'crop',
mosaic_center= 0.2
)
)),
validation_data=DataConfig(
is_training=False,
global_batch_size=eval_batch_size,
drop_remainder=True,
dtype='float32',
parser=Parser(
letter_box=False,
use_tie_breaker=True,
best_match_only=False,
anchor_thresh=0.213,
area_thresh=0.1,
max_num_instances=max_num_instances,
))),
trainer=cfg.TrainerConfig(
train_steps=train_epochs * steps_per_epoch,
validation_steps=COCO_VAL_EXAMPLES // eval_batch_size,
validation_interval=validation_interval * steps_per_epoch,
steps_per_loop=steps_per_epoch,
summary_interval=steps_per_epoch,
checkpoint_interval=steps_per_epoch,
optimizer_config=optimization.OptimizationConfig({
'ema':{
'average_decay': 0.9998,
'trainable_weights_only': False,
'dynamic_decay': True,
},
'optimizer': {
'type': 'sgd_torch',
'sgd_torch': {
'momentum': 0.949,
'momentum_start': 0.949,
'nesterov': True,
'warmup_steps': 1000,
'weight_decay': 0.0005,
'sim_torch': True,
}
},
'learning_rate': {
'type': 'stepwise',
'stepwise': {
'boundaries': [240 * steps_per_epoch, 270*steps_per_epoch],
'values': [
0.00131 * train_batch_size / 64.0,
0.000131 * train_batch_size / 64.0,
0.0000131 * train_batch_size / 64.0
]
}
},
'warmup': {
'type': 'linear',
'linear': {
'warmup_steps': 1000,
'warmup_learning_rate': 0
}
}
})),
restrictions=[
'task.train_data.is_training != None',
'task.validation_data.is_training != None'
])
return config
@exp_factory.register_config_factory('scaled_yolo')
def scaled_yolo() -> cfg.ExperimentConfig:
"""COCO object detection with YOLOv4-csp and v4"""
train_batch_size = 128
eval_batch_size = 8
train_epochs = 300
warmup_epochs = 3
validation_interval = 5
steps_per_epoch = COCO_TRAIN_EXAMPLES // train_batch_size
max_num_instances = 300
config = cfg.ExperimentConfig(
runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'),
task=YoloTask(
smart_bias_lr=0.1,
init_checkpoint_modules=None,
annotation_file=None,
weight_decay=0.0,
model=Yolo(
darknet_based_model = False,
norm_activation=common.NormActivation(
activation='mish',
use_sync_bn=True,
norm_epsilon=0.0001,
norm_momentum=0.97),
head=YoloHead(smart_bias=True),
loss=YoloLoss(use_scaled_loss=True)),
train_data=DataConfig(
is_training=True,
global_batch_size=train_batch_size,
seed=GLOBAL_SEED,
dtype='float32',
parser=Parser(
aug_rand_saturation = 0.7,
aug_rand_brightness = 0.4,
aug_rand_hue = 0.015,
letter_box=True,
use_tie_breaker=True,
best_match_only=True,
anchor_thresh=4.0,
random_pad=False,
area_thresh=0.1,
max_num_instances=max_num_instances,
mosaic=Mosaic(
mosaic_crop_mode='scale',
mosaic_frequency=1.0,
mixup_frequency=0.0,
)
)),
validation_data=DataConfig(
is_training=False,
global_batch_size=eval_batch_size,
drop_remainder=True,
dtype='float32',
parser=Parser(
letter_box=True,
use_tie_breaker=True,
best_match_only=True,
anchor_thresh=4.0,
area_thresh=0.1,
max_num_instances=max_num_instances,
))),
trainer=cfg.TrainerConfig(
train_steps=train_epochs * steps_per_epoch,
validation_steps=COCO_VAL_EXAMPLES // eval_batch_size,
validation_interval=validation_interval * steps_per_epoch,
steps_per_loop=steps_per_epoch,
summary_interval=steps_per_epoch,
checkpoint_interval=steps_per_epoch,
optimizer_config=optimization.OptimizationConfig({
'ema':{
'average_decay': 0.9999,
'trainable_weights_only': False,
'dynamic_decay': True,
},
'optimizer': {
'type': 'sgd_torch',
'sgd_torch': {
'momentum': 0.937,
'momentum_start': 0.8,
'nesterov': True,
'warmup_steps': steps_per_epoch * warmup_epochs,
'weight_decay': 0.0005 * train_batch_size/64.0,
'sim_torch': True,
}
},
'learning_rate': {
'type': 'cosine',
'cosine': {
'initial_learning_rate': 0.01,
'alpha': 0.2,
'decay_steps': train_epochs * steps_per_epoch,
}
},
'warmup': {
'type': 'linear',
'linear': {
'warmup_steps': steps_per_epoch * warmup_epochs,
'warmup_learning_rate': 0
}
}
})),
restrictions=[
'task.train_data.is_training != None',
'task.validation_data.is_training != None'
])
return config
\ No newline at end of file
......@@ -383,7 +383,7 @@ class Darknet(tf.keras.Model):
max_level=5,
width_scale=1.0,
depth_scale=1.0,
use_reorg_input=False,
use_reorg_input = False,
csp_level_mod=(),
activation=None,
use_sync_bn=False,
......@@ -454,6 +454,9 @@ class Darknet(tf.keras.Model):
def _build_struct(self, net, inputs):
if self._use_reorg_input:
inputs = nn_blocks.Reorg()(inputs)
net[0].filters = net[1].filters
net[0].output_name = net[1].output_name
del net[1]
endpoints = collections.OrderedDict()
stack_outputs = [inputs]
......@@ -666,7 +669,6 @@ class Darknet(tf.keras.Model):
}
return layer_config
@factory.register_backbone_builder('darknet')
def build_darknet(
input_specs: tf.keras.layers.InputSpec,
......
......@@ -12,11 +12,66 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python3
"""Feature Pyramid Network and Path Aggregation variants used in YOLO."""
from typing import Mapping, Union
from official.modeling import hyperparams
import tensorflow as tf
from official.vision.beta.projects.yolo.modeling.layers import nn_blocks
from official.vision.beta.modeling.decoders import factory
# model configurations
# the structure is as follows. model version, {v3, v4, v#, ... etc}
# the model config type {regular, tiny, small, large, ... etc}
YOLO_MODELS = {
"v4":
dict(
regular=dict(
embed_spp=False,
use_fpn=True,
max_level_process_len=None,
path_process_len=6),
tiny=dict(
embed_spp=False,
use_fpn=False,
max_level_process_len=2,
path_process_len=1),
csp=dict(
embed_spp=False,
use_fpn=True,
max_level_process_len=None,
csp_stack=5,
fpn_depth=5,
path_process_len=6),
csp_large=dict(
embed_spp=False,
use_fpn=True,
max_level_process_len=None,
csp_stack=7,
fpn_depth=7,
path_process_len=8,
fpn_filter_scale=2),
),
"v3":
dict(
regular=dict(
embed_spp=False,
use_fpn=False,
max_level_process_len=None,
path_process_len=6),
tiny=dict(
embed_spp=False,
use_fpn=False,
max_level_process_len=2,
path_process_len=1),
spp=dict(
embed_spp=True,
use_fpn=False,
max_level_process_len=2,
path_process_len=1),
),
}
@tf.keras.utils.register_keras_serializable(package='yolo')
class _IdentityRoute(tf.keras.layers.Layer):
......@@ -487,3 +542,65 @@ class YoloDecoder(tf.keras.Model):
@classmethod
def from_config(cls, config, custom_objects=None):
return cls(**config)
@factory.register_decoder_builder('yolo_decoder')
def build_yolo_decoder(input_specs: Mapping[str, tf.TensorShape],
model_config: hyperparams.Config,
l2_regularizer: tf.keras.regularizers.Regularizer = None,
**kwargs) -> Union[None, tf.keras.Model, tf.keras.layers.Layer]:
"""Builds Yolo FPN/PAN decoder from a config.
Args:
input_specs: A `dict` of input specifications. A dictionary consists of
{level: TensorShape} from a backbone.
model_config: A OneOfConfig. Model config.
l2_regularizer: A `tf.keras.regularizers.Regularizer` instance. Default to
None.
Returns:
A `tf.keras.Model` instance of the Yolo FPN/PAN decoder.
"""
decoder_cfg = model_config.decoder.get()
norm_activation_config = model_config.norm_activation
activation = (
decoder_cfg.activation
if decoder_cfg.activation != "same" else
norm_activation_config.activation)
if decoder_cfg.version is None: # custom yolo
raise Exception("decoder version cannot be None, specify v3 or v4")
if decoder_cfg.version not in YOLO_MODELS:
raise Exception(
"unsupported model version please select from {v3, v4}, \n\n \
or specify a custom decoder config using YoloDecoder in you yaml")
if decoder_cfg.type == None:
decoder_cfg.type = "regular"
if decoder_cfg.type not in YOLO_MODELS[decoder_cfg.version]:
raise Exception("unsupported model type please select from \
{yolo_model.YOLO_MODELS[decoder_cfg.version].keys()},\
\n\n or specify a custom decoder config using YoloDecoder in you yaml")
base_model = YOLO_MODELS[decoder_cfg.version][decoder_cfg.type]
cfg_dict = decoder_cfg.as_dict()
for key in base_model:
if cfg_dict[key] is not None:
base_model[key] = cfg_dict[key]
base_dict = dict(
activation=activation,
use_spatial_attention=decoder_cfg.use_spatial_attention,
use_separable_conv=decoder_cfg.use_separable_conv,
use_sync_bn=norm_activation_config.use_sync_bn,
norm_momentum=norm_activation_config.norm_momentum,
norm_epsilon=norm_activation_config.norm_epsilon,
kernel_regularizer=l2_regularizer)
base_model.update(base_dict)
model = YoloDecoder(input_specs, **base_model)
return model
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment