Commit 6a55ecde authored by A. Unique TensorFlower's avatar A. Unique TensorFlower
Browse files

Merge pull request #10286 from PurdueDualityLab:task_pr

PiperOrigin-RevId: 402338060
parents 2d353306 379d64c5
......@@ -73,10 +73,14 @@ connected to a new, more powerful backbone if a person chose to.
| Yolo-v3 spp |
| Yolo-v4 |
| Yolo-v4 tiny |
| Yolo-v4 csp |
| Yolo-v4 large |
## Models Zoo
## Requirements
[![TensorFlow 2.2](https://img.shields.io/badge/TensorFlow-2.2-FF6F00?logo=tensorflow)](https://github.com/tensorflow/tensorflow/releases/tag/v2.2.0)
## Requirements
[![TensorFlow 2.6](https://img.shields.io/badge/TensorFlow-2.6-FF6F00?logo=tensorflow)](https://github.com/tensorflow/tensorflow/releases/tag/v2.6.0)
[![Python 3.8](https://img.shields.io/badge/Python-3.8-3776AB)](https://www.python.org/downloads/release/python-380/)
......
......@@ -15,7 +15,22 @@
"""All necessary imports for registration."""
# pylint: disable=unused-import
# pylint: disable=g-bad-import-order
from official.common import registry_imports
# import configs
from official.vision.beta.projects.yolo.configs import darknet_classification
from official.vision.beta.projects.yolo.configs import yolo as yolo_config
# import modeling components
from official.vision.beta.projects.yolo.modeling.backbones import darknet
from official.vision.beta.projects.yolo.modeling.decoders import yolo_decoder
# import tasks
from official.vision.beta.projects.yolo.tasks import image_classification
from official.vision.beta.projects.yolo.tasks import yolo as yolo_task
# import optimization packages
from official.vision.beta.projects.yolo.optimization import optimizer_factory
from official.vision.beta.projects.yolo.optimization.configs import optimizer_config
from official.vision.beta.projects.yolo.optimization.configs import optimization_config
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Decoders configurations."""
import dataclasses
from typing import Optional
from official.modeling import hyperparams
from official.vision.beta.configs import decoders
@dataclasses.dataclass
class YoloDecoder(hyperparams.Config):
"""Builds Yolo decoder.
If the name is specified, or version is specified we ignore input parameters
and use version and name defaults.
"""
version: Optional[str] = None
type: Optional[str] = None
use_fpn: Optional[bool] = None
use_spatial_attention: bool = False
use_separable_conv: bool = False
csp_stack: Optional[bool] = None
fpn_depth: Optional[int] = None
fpn_filter_scale: Optional[int] = None
path_process_len: Optional[int] = None
max_level_process_len: Optional[int] = None
embed_spp: Optional[bool] = None
activation: Optional[str] = 'same'
@dataclasses.dataclass
class Decoder(decoders.Decoder):
type: Optional[str] = 'yolo_decoder'
yolo_decoder: YoloDecoder = YoloDecoder()
# --experiment_type=scaled_yolo
# mAP 47.6
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'float32'
tpu_enable_xla_dynamic_padder: false
task:
model:
input_size: [640, 640, 3]
backbone:
type: 'darknet'
darknet:
model_id: 'altered_cspdarknet53'
max_level: 5
min_level: 3
decoder:
type: yolo_decoder
yolo_decoder:
version: v4
type: csp
head:
smart_bias: true
detection_generator:
box_type:
'all': scaled
scale_xy:
'all': 2.0
max_boxes: 300
nms_type: iou
iou_thresh: 0.001
nms_thresh: 0.60
loss:
use_scaled_loss: true
update_on_repeat: true
box_loss_type:
'all': ciou
ignore_thresh:
'all': 0.0
iou_normalizer:
'all': 0.05
cls_normalizer:
'all': 0.3
object_normalizer:
'5': 0.28
'4': 0.70
'3': 2.80
objectness_smooth:
'all': 1.0
norm_activation:
use_sync_bn: true
num_classes: 80
anchor_boxes:
anchors_per_scale: 3
boxes: [box: [12, 16], box: [19, 36], box: [40, 28],
box: [36, 75], box: [76, 55], box: [72, 146],
box: [142, 110], box: [192, 243], box: [459, 401]]
train_data:
input_path: '/readahead/200M/placer/prod/home/tensorflow-performance-data/datasets/coco/train*'
shuffle_buffer_size: 10000
parser:
mosaic:
mosaic_frequency: 1.0
mixup_frequency: 0.0
mosaic_crop_mode: 'scale'
mosaic_center: 0.25
aug_scale_min: 0.1
aug_scale_max: 1.9
max_num_instances: 300
letter_box: true
random_flip: true
aug_rand_translate: 0.1
area_thresh: 0.1
validation_data:
input_path: '/readahead/200M/placer/prod/home/tensorflow-performance-data/datasets/coco/val*'
# --experiment_type=yolo_darknet
# mAP 43.0
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'bfloat16'
task:
smart_bias_lr: 0.0
model:
darknet_based_model: true
input_size: [512, 512, 3]
backbone:
type: 'darknet'
darknet:
model_id: 'cspdarknet53'
max_level: 5
min_level: 3
decoder:
type: yolo_decoder
yolo_decoder:
version: v4
type: regular
activation: leaky
head:
smart_bias: true
detection_generator:
box_type:
'all': original
scale_xy:
'5': 1.05
'4': 1.1
'3': 1.2
max_boxes: 200
nms_type: iou
iou_thresh: 0.001
nms_thresh: 0.60
loss:
use_scaled_loss: false
box_loss_type:
'all': ciou
ignore_thresh:
'all': 0.7
iou_normalizer:
'all': 0.07
cls_normalizer:
'all': 1.0
object_normalizer:
'all': 1.0
objectness_smooth:
'all': 0.0
max_delta:
'all': 5.0
norm_activation:
activation: mish
norm_epsilon: 0.0001
norm_momentum: 0.99
use_sync_bn: true
num_classes: 80
anchor_boxes:
anchors_per_scale: 3
boxes: [box: [12, 16], box: [19, 36], box: [40, 28],
box: [36, 75], box: [76, 55], box: [72, 146],
box: [142, 110], box: [192, 243], box: [459, 401]]
train_data:
global_batch_size: 64
dtype: float32
input_path: '/readahead/200M/placer/prod/home/tensorflow-performance-data/datasets/coco/train*'
is_training: true
drop_remainder: true
seed: 1000
parser:
mosaic:
mosaic_frequency: 0.75
mixup_frequency: 0.0
mosaic_crop_mode: 'crop'
mosaic_center: 0.2
aug_scale_min: 0.2
aug_scale_max: 1.6
jitter: 0.3
max_num_instances: 200
letter_box: false
random_flip: true
aug_rand_saturation: 1.5
aug_rand_brightness: 1.5
aug_rand_hue: 0.1
aug_scale_min: 0.1
aug_scale_max: 1.9
aug_rand_translate: 0.0
jitter: 0.3
area_thresh: 0.1
random_pad: true
use_tie_breaker: true
anchor_thresh: 0.4
validation_data:
global_batch_size: 8
dtype: float32
input_path: '/readahead/200M/placer/prod/home/tensorflow-performance-data/datasets/coco/val*'
is_training: false
drop_remainder: true
parser:
max_num_instances: 200
letter_box: false
use_tie_breaker: true
anchor_thresh: 0.4
weight_decay: 0.000
init_checkpoint: 'gs://tf_model_garden/vision/yolo/ckpt-15000'
init_checkpoint_modules: 'backbone'
annotation_file: null
trainer:
train_steps: 555000
validation_steps: 625
steps_per_loop: 1850
summary_interval: 1850
validation_interval: 9250
checkpoint_interval: 1850
optimizer_config:
ema:
average_decay: 0.9998
trainable_weights_only: false
dynamic_decay: true
learning_rate:
type: stepwise
stepwise:
boundaries: [400000]
name: PiecewiseConstantDecay
values: [0.00131, 0.000131]
optimizer:
type: sgd_torch
sgd_torch:
momentum: 0.949
momentum_start: 0.949
nesterov: true
warmup_steps: 1000
weight_decay: 0.0005
name: SGD
warmup:
type: 'linear'
linear:
warmup_steps: 1000 # learning rate rises from 0 to 0.0013 over 1000 steps
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""YOLO configuration definition."""
import dataclasses
import os
from typing import Any, List, Optional, Union
import numpy as np
from official.core import config_definitions as cfg
from official.core import exp_factory
from official.modeling import hyperparams
from official.vision.beta.configs import common
from official.vision.beta.projects.yolo import optimization
from official.vision.beta.projects.yolo.configs import backbones
from official.vision.beta.projects.yolo.configs import decoders
# pytype: disable=annotation-type-mismatch
MIN_LEVEL = 1
MAX_LEVEL = 7
GLOBAL_SEED = 1000
def _build_dict(min_level, max_level, value):
vals = {str(key): value for key in range(min_level, max_level + 1)}
vals['all'] = None
return lambda: vals
def _build_path_scales(min_level, max_level):
return lambda: {str(key): 2**key for key in range(min_level, max_level + 1)}
@dataclasses.dataclass
class FPNConfig(hyperparams.Config):
"""FPN config."""
all: Optional[Any] = None
def get(self):
"""Allow for a key for each level or a single key for all the levels."""
values = self.as_dict()
if 'all' in values and values['all'] is not None:
for key in values:
if key != 'all':
values[key] = values['all']
return values
# pylint: disable=missing-class-docstring
@dataclasses.dataclass
class TfExampleDecoder(hyperparams.Config):
regenerate_source_id: bool = False
coco91_to_80: bool = True
@dataclasses.dataclass
class TfExampleDecoderLabelMap(hyperparams.Config):
regenerate_source_id: bool = False
label_map: str = ''
@dataclasses.dataclass
class DataDecoder(hyperparams.OneOfConfig):
type: Optional[str] = 'simple_decoder'
simple_decoder: TfExampleDecoder = TfExampleDecoder()
label_map_decoder: TfExampleDecoderLabelMap = TfExampleDecoderLabelMap()
@dataclasses.dataclass
class Mosaic(hyperparams.Config):
mosaic_frequency: float = 0.0
mixup_frequency: float = 0.0
mosaic_center: float = 0.2
mosaic_crop_mode: Optional[str] = None
aug_scale_min: float = 1.0
aug_scale_max: float = 1.0
jitter: float = 0.0
@dataclasses.dataclass
class Parser(hyperparams.Config):
max_num_instances: int = 200
letter_box: Optional[bool] = True
random_flip: bool = True
random_pad: float = False
jitter: float = 0.0
aug_scale_min: float = 1.0
aug_scale_max: float = 1.0
aug_rand_saturation: float = 0.0
aug_rand_brightness: float = 0.0
aug_rand_hue: float = 0.0
aug_rand_angle: float = 0.0
aug_rand_translate: float = 0.0
aug_rand_perspective: float = 0.0
use_tie_breaker: bool = True
best_match_only: bool = False
anchor_thresh: float = -0.01
area_thresh: float = 0.1
mosaic: Mosaic = Mosaic()
@dataclasses.dataclass
class DataConfig(cfg.DataConfig):
"""Input config for training."""
global_batch_size: int = 64
input_path: str = ''
tfds_name: str = ''
tfds_split: str = ''
global_batch_size: int = 1
is_training: bool = True
dtype: str = 'float16'
decoder: DataDecoder = DataDecoder()
parser: Parser = Parser()
shuffle_buffer_size: int = 10000
tfds_download: bool = True
cache: bool = False
drop_remainder: bool = True
@dataclasses.dataclass
class YoloHead(hyperparams.Config):
"""Parameterization for the YOLO Head."""
smart_bias: bool = True
@dataclasses.dataclass
class YoloDetectionGenerator(hyperparams.Config):
box_type: FPNConfig = dataclasses.field(
default_factory=_build_dict(MIN_LEVEL, MAX_LEVEL, 'original'))
scale_xy: FPNConfig = dataclasses.field(
default_factory=_build_dict(MIN_LEVEL, MAX_LEVEL, 1.0))
path_scales: FPNConfig = dataclasses.field(
default_factory=_build_path_scales(MIN_LEVEL, MAX_LEVEL))
nms_type: str = 'greedy'
iou_thresh: float = 0.001
nms_thresh: float = 0.6
max_boxes: int = 200
pre_nms_points: int = 5000
@dataclasses.dataclass
class YoloLoss(hyperparams.Config):
ignore_thresh: FPNConfig = dataclasses.field(
default_factory=_build_dict(MIN_LEVEL, MAX_LEVEL, 0.0))
truth_thresh: FPNConfig = dataclasses.field(
default_factory=_build_dict(MIN_LEVEL, MAX_LEVEL, 1.0))
box_loss_type: FPNConfig = dataclasses.field(
default_factory=_build_dict(MIN_LEVEL, MAX_LEVEL, 'ciou'))
iou_normalizer: FPNConfig = dataclasses.field(
default_factory=_build_dict(MIN_LEVEL, MAX_LEVEL, 1.0))
cls_normalizer: FPNConfig = dataclasses.field(
default_factory=_build_dict(MIN_LEVEL, MAX_LEVEL, 1.0))
object_normalizer: FPNConfig = dataclasses.field(
default_factory=_build_dict(MIN_LEVEL, MAX_LEVEL, 1.0))
max_delta: FPNConfig = dataclasses.field(
default_factory=_build_dict(MIN_LEVEL, MAX_LEVEL, np.inf))
objectness_smooth: FPNConfig = dataclasses.field(
default_factory=_build_dict(MIN_LEVEL, MAX_LEVEL, 0.0))
label_smoothing: float = 0.0
use_scaled_loss: bool = True
update_on_repeat: bool = True
@dataclasses.dataclass
class Box(hyperparams.Config):
box: List[int] = dataclasses.field(default=list)
@dataclasses.dataclass
class AnchorBoxes(hyperparams.Config):
boxes: Optional[List[Box]] = None
level_limits: Optional[List[int]] = None
anchors_per_scale: int = 3
def get(self, min_level, max_level):
"""Distribute them in order to each level.
Args:
min_level: `int` the lowest output level.
max_level: `int` the heighest output level.
Returns:
anchors_per_level: A `Dict[List[int]]` of the anchor boxes for each level.
self.level_limits: A `List[int]` of the box size limits to link to each
level under anchor free conditions.
"""
if self.level_limits is None:
boxes = [box.box for box in self.boxes]
else:
boxes = [[1.0, 1.0]] * ((max_level - min_level) + 1)
self.anchors_per_scale = 1
anchors_per_level = dict()
start = 0
for i in range(min_level, max_level + 1):
anchors_per_level[str(i)] = boxes[start:start + self.anchors_per_scale]
start += self.anchors_per_scale
return anchors_per_level, self.level_limits
@dataclasses.dataclass
class Yolo(hyperparams.Config):
input_size: Optional[List[int]] = dataclasses.field(
default_factory=lambda: [512, 512, 3])
backbone: backbones.Backbone = backbones.Backbone(
type='darknet', darknet=backbones.Darknet(model_id='cspdarknet53'))
decoder: decoders.Decoder = decoders.Decoder(
type='yolo_decoder',
yolo_decoder=decoders.YoloDecoder(version='v4', type='regular'))
head: YoloHead = YoloHead()
detection_generator: YoloDetectionGenerator = YoloDetectionGenerator()
loss: YoloLoss = YoloLoss()
norm_activation: common.NormActivation = common.NormActivation(
activation='mish',
use_sync_bn=True,
norm_momentum=0.99,
norm_epsilon=0.001)
num_classes: int = 80
anchor_boxes: AnchorBoxes = AnchorBoxes()
darknet_based_model: bool = False
@dataclasses.dataclass
class YoloTask(cfg.TaskConfig):
per_category_metrics: bool = False
smart_bias_lr: float = 0.0
model: Yolo = Yolo()
train_data: DataConfig = DataConfig(is_training=True)
validation_data: DataConfig = DataConfig(is_training=False)
weight_decay: float = 0.0
annotation_file: Optional[str] = None
init_checkpoint: Optional[str] = None
init_checkpoint_modules: Union[
str, List[str]] = 'all' # all, backbone, and/or decoder
gradient_clip_norm: float = 0.0
seed = GLOBAL_SEED
COCO_INPUT_PATH_BASE = 'coco'
COCO_TRAIN_EXAMPLES = 118287
COCO_VAL_EXAMPLES = 5000
@exp_factory.register_config_factory('yolo')
def yolo() -> cfg.ExperimentConfig:
"""Yolo general config."""
return cfg.ExperimentConfig(
task=YoloTask(),
restrictions=[
'task.train_data.is_training != None',
'task.validation_data.is_training != None'
])
@exp_factory.register_config_factory('yolo_darknet')
def yolo_darknet() -> cfg.ExperimentConfig:
"""COCO object detection with YOLOv3 and v4."""
train_batch_size = 64
eval_batch_size = 8
train_epochs = 300
steps_per_epoch = COCO_TRAIN_EXAMPLES // train_batch_size
validation_interval = 5
max_num_instances = 200
config = cfg.ExperimentConfig(
runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'),
task=YoloTask(
smart_bias_lr=0.1,
init_checkpoint='',
init_checkpoint_modules='backbone',
annotation_file=None,
weight_decay=0.0,
model=Yolo(
darknet_based_model=True,
norm_activation=common.NormActivation(use_sync_bn=True),
head=YoloHead(smart_bias=True),
loss=YoloLoss(use_scaled_loss=False, update_on_repeat=True),
anchor_boxes=AnchorBoxes(
anchors_per_scale=3,
boxes=[
Box(box=[12, 16]),
Box(box=[19, 36]),
Box(box=[40, 28]),
Box(box=[36, 75]),
Box(box=[76, 55]),
Box(box=[72, 146]),
Box(box=[142, 110]),
Box(box=[192, 243]),
Box(box=[459, 401])
])),
train_data=DataConfig(
input_path=os.path.join(COCO_INPUT_PATH_BASE, 'train*'),
is_training=True,
global_batch_size=train_batch_size,
dtype='float32',
parser=Parser(
letter_box=False,
aug_rand_saturation=1.5,
aug_rand_brightness=1.5,
aug_rand_hue=0.1,
use_tie_breaker=True,
best_match_only=False,
anchor_thresh=0.4,
area_thresh=0.1,
max_num_instances=max_num_instances,
mosaic=Mosaic(
mosaic_frequency=0.75,
mixup_frequency=0.0,
mosaic_crop_mode='crop',
mosaic_center=0.2))),
validation_data=DataConfig(
input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'),
is_training=False,
global_batch_size=eval_batch_size,
drop_remainder=True,
dtype='float32',
parser=Parser(
letter_box=False,
use_tie_breaker=True,
best_match_only=False,
anchor_thresh=0.4,
area_thresh=0.1,
max_num_instances=max_num_instances,
))),
trainer=cfg.TrainerConfig(
train_steps=train_epochs * steps_per_epoch,
validation_steps=COCO_VAL_EXAMPLES // eval_batch_size,
validation_interval=validation_interval * steps_per_epoch,
steps_per_loop=steps_per_epoch,
summary_interval=steps_per_epoch,
checkpoint_interval=steps_per_epoch,
optimizer_config=optimization.OptimizationConfig({
'ema': {
'average_decay': 0.9998,
'trainable_weights_only': False,
'dynamic_decay': True,
},
'optimizer': {
'type': 'sgd_torch',
'sgd_torch': {
'momentum': 0.949,
'momentum_start': 0.949,
'nesterov': True,
'warmup_steps': 1000,
'weight_decay': 0.0005,
}
},
'learning_rate': {
'type': 'stepwise',
'stepwise': {
'boundaries': [
240 * steps_per_epoch
],
'values': [
0.00131 * train_batch_size / 64.0,
0.000131 * train_batch_size / 64.0,
]
}
},
'warmup': {
'type': 'linear',
'linear': {
'warmup_steps': 1000,
'warmup_learning_rate': 0
}
}
})),
restrictions=[
'task.train_data.is_training != None',
'task.validation_data.is_training != None'
])
return config
@exp_factory.register_config_factory('scaled_yolo')
def scaled_yolo() -> cfg.ExperimentConfig:
"""COCO object detection with YOLOv4-csp and v4."""
train_batch_size = 64
eval_batch_size = 8
train_epochs = 300
warmup_epochs = 3
validation_interval = 5
steps_per_epoch = COCO_TRAIN_EXAMPLES // train_batch_size
max_num_instances = 300
config = cfg.ExperimentConfig(
runtime=cfg.RuntimeConfig(mixed_precision_dtype='bfloat16'),
task=YoloTask(
smart_bias_lr=0.1,
init_checkpoint_modules='',
annotation_file=None,
weight_decay=0.0,
model=Yolo(
darknet_based_model=False,
norm_activation=common.NormActivation(
activation='mish',
use_sync_bn=True,
norm_epsilon=0.0001,
norm_momentum=0.97),
head=YoloHead(smart_bias=True),
loss=YoloLoss(use_scaled_loss=True),
anchor_boxes=AnchorBoxes(
anchors_per_scale=3,
boxes=[
Box(box=[12, 16]),
Box(box=[19, 36]),
Box(box=[40, 28]),
Box(box=[36, 75]),
Box(box=[76, 55]),
Box(box=[72, 146]),
Box(box=[142, 110]),
Box(box=[192, 243]),
Box(box=[459, 401])
])),
train_data=DataConfig(
input_path=os.path.join(COCO_INPUT_PATH_BASE, 'train*'),
is_training=True,
global_batch_size=train_batch_size,
dtype='float32',
parser=Parser(
aug_rand_saturation=0.7,
aug_rand_brightness=0.4,
aug_rand_hue=0.015,
letter_box=True,
use_tie_breaker=True,
best_match_only=True,
anchor_thresh=4.0,
random_pad=False,
area_thresh=0.1,
max_num_instances=max_num_instances,
mosaic=Mosaic(
mosaic_crop_mode='scale',
mosaic_frequency=1.0,
mixup_frequency=0.0,
))),
validation_data=DataConfig(
input_path=os.path.join(COCO_INPUT_PATH_BASE, 'val*'),
is_training=False,
global_batch_size=eval_batch_size,
drop_remainder=True,
dtype='float32',
parser=Parser(
letter_box=True,
use_tie_breaker=True,
best_match_only=True,
anchor_thresh=4.0,
area_thresh=0.1,
max_num_instances=max_num_instances,
))),
trainer=cfg.TrainerConfig(
train_steps=train_epochs * steps_per_epoch,
validation_steps=COCO_VAL_EXAMPLES // eval_batch_size,
validation_interval=validation_interval * steps_per_epoch,
steps_per_loop=steps_per_epoch,
summary_interval=steps_per_epoch,
checkpoint_interval=steps_per_epoch,
optimizer_config=optimization.OptimizationConfig({
'ema': {
'average_decay': 0.9999,
'trainable_weights_only': False,
'dynamic_decay': True,
},
'optimizer': {
'type': 'sgd_torch',
'sgd_torch': {
'momentum': 0.937,
'momentum_start': 0.8,
'nesterov': True,
'warmup_steps': steps_per_epoch * warmup_epochs,
'weight_decay': 0.0005 * train_batch_size / 64.0,
}
},
'learning_rate': {
'type': 'cosine',
'cosine': {
'initial_learning_rate': 0.01,
'alpha': 0.2,
'decay_steps': train_epochs * steps_per_epoch,
}
},
'warmup': {
'type': 'linear',
'linear': {
'warmup_steps': steps_per_epoch * warmup_epochs,
'warmup_learning_rate': 0
}
}
})),
restrictions=[
'task.train_data.is_training != None',
'task.validation_data.is_training != None'
])
return config
......@@ -75,11 +75,11 @@ class Parser(parser.Parser):
saturation. saturation will be scaled between 1/value and value.
aug_rand_brightness: `float` indicating the maximum scaling value for
brightness. brightness will be scaled between 1/value and value.
letter_box: `boolean` indicating whether upon start of the datapipeline
letter_box: `boolean` indicating whether upon start of the data pipeline
regardless of the preprocessing ops that are used, the aspect ratio of
the images should be preserved.
random_pad: `bool` indiccating wether to use padding to apply random
translation true for darknet yolo false for scaled yolo.
translation, true for darknet yolo false for scaled yolo.
random_flip: `boolean` indicating whether or not to randomly flip the
image horizontally.
jitter: `float` for the maximum change in aspect ratio expected in each
......@@ -147,6 +147,7 @@ class Parser(parser.Parser):
# Set the per level values needed for operation
self._darknet = darknet
self._area_thresh = area_thresh
self._level_limits = level_limits
self._seed = seed
self._dtype = dtype
......@@ -259,7 +260,7 @@ class Parser(parser.Parser):
self._aug_rand_saturation,
self._aug_rand_brightness,
seed=self._seed,
darknet=self._darknet)
darknet=self._darknet or self._level_limits is not None)
# Cast the image to the selcted datatype.
image, labels = self._build_label(
......
......@@ -40,7 +40,7 @@ class YoloLossBase(object, metaclass=abc.ABCMeta):
loss_type='ciou',
iou_normalizer=1.0,
cls_normalizer=1.0,
obj_normalizer=1.0,
object_normalizer=1.0,
label_smoothing=0.0,
objectness_smooth=True,
update_on_repeat=False,
......@@ -65,7 +65,8 @@ class YoloLossBase(object, metaclass=abc.ABCMeta):
iou_normalizer: `float` for how much to scale the loss on the IOU or the
boxes.
cls_normalizer: `float` for how much to scale the loss on the classes.
obj_normalizer: `float` for how much to scale loss on the detection map.
object_normalizer: `float` for how much to scale loss on the detection
map.
label_smoothing: `float` for how much to smooth the loss on the classes.
objectness_smooth: `float` for how much to smooth the loss on the
detection map.
......@@ -90,7 +91,7 @@ class YoloLossBase(object, metaclass=abc.ABCMeta):
self._iou_normalizer = iou_normalizer
self._cls_normalizer = cls_normalizer
self._obj_normalizer = obj_normalizer
self._object_normalizer = object_normalizer
self._scale_x_y = scale_x_y
self._max_delta = max_delta
......@@ -240,9 +241,14 @@ class YoloLossBase(object, metaclass=abc.ABCMeta):
Returns:
loss: `tf.float` scalar for the scaled loss.
scale: `tf.float` how much the loss was scaled by.
"""
del box_loss, conf_loss, class_loss, ground_truths, predictions
return loss
del box_loss
del conf_loss
del class_loss
del ground_truths
del predictions
return loss, tf.ones_like(loss)
@abc.abstractmethod
def cross_replica_aggregation(self, loss, num_replicas_in_sync):
......@@ -349,16 +355,16 @@ class DarknetLoss(YoloLossBase):
tf.cast(true_class, tf.int32),
depth=tf.shape(pred_class)[-1],
dtype=pred_class.dtype)
true_classes = tf.stop_gradient(loss_utils.apply_mask(ind_mask, true_class))
true_class = tf.stop_gradient(loss_utils.apply_mask(ind_mask, true_class))
# Reorganize the one hot class list as a grid.
true_class = loss_utils.build_grid(
inds, true_classes, pred_class, ind_mask, update=False)
true_class = tf.stop_gradient(true_class)
true_class_grid = loss_utils.build_grid(
inds, true_class, pred_class, ind_mask, update=False)
true_class_grid = tf.stop_gradient(true_class_grid)
# Use the class mask to find the number of objects located in
# each predicted grid cell/pixel.
counts = true_class
counts = true_class_grid
counts = tf.reduce_sum(counts, axis=-1, keepdims=True)
reps = tf.gather_nd(counts, inds, batch_dims=1)
reps = tf.squeeze(reps, axis=-1)
......@@ -372,26 +378,50 @@ class DarknetLoss(YoloLossBase):
box_loss = math_ops.divide_no_nan(box_loss, reps)
box_loss = tf.cast(tf.reduce_sum(box_loss, axis=1), dtype=y_pred.dtype)
# Compute the sigmoid binary cross entropy for the class maps.
class_loss = tf.reduce_mean(
loss_utils.sigmoid_bce(
tf.expand_dims(true_class, axis=-1),
tf.expand_dims(pred_class, axis=-1), self._label_smoothing),
axis=-1)
# Apply normalization to the class losses.
if self._cls_normalizer < 1.0:
# Build a mask based on the true class locations.
cls_norm_mask = true_class
# Apply the classes weight to class indexes were one_hot is one.
class_loss *= ((1 - cls_norm_mask) + cls_norm_mask * self._cls_normalizer)
# Mask to the class loss and compute the sum over all the objects.
class_loss = tf.reduce_sum(class_loss, axis=-1)
class_loss = loss_utils.apply_mask(grid_mask, class_loss)
class_loss = math_ops.rm_nan_inf(class_loss, val=0.0)
class_loss = tf.cast(
tf.reduce_sum(class_loss, axis=(1, 2, 3)), dtype=y_pred.dtype)
if self._update_on_repeat:
# Converts list of gound truths into a grid where repeated values
# are replaced by the most recent value. So some class identities may
# get lost but the loss computation will be more stable. Results are
# more consistent.
# Compute the sigmoid binary cross entropy for the class maps.
class_loss = tf.reduce_mean(
loss_utils.sigmoid_bce(
tf.expand_dims(true_class_grid, axis=-1),
tf.expand_dims(pred_class, axis=-1), self._label_smoothing),
axis=-1)
# Apply normalization to the class losses.
if self._cls_normalizer < 1.0:
# Build a mask based on the true class locations.
cls_norm_mask = true_class_grid
# Apply the classes weight to class indexes were one_hot is one.
class_loss *= ((1 - cls_norm_mask) +
cls_norm_mask * self._cls_normalizer)
# Mask to the class loss and compute the sum over all the objects.
class_loss = tf.reduce_sum(class_loss, axis=-1)
class_loss = loss_utils.apply_mask(grid_mask, class_loss)
class_loss = math_ops.rm_nan_inf(class_loss, val=0.0)
class_loss = tf.cast(
tf.reduce_sum(class_loss, axis=(1, 2, 3)), dtype=y_pred.dtype)
else:
# Computes the loss while keeping the structure as a list in
# order to ensure all objects are considered. In some cases can
# make training more unstable but may also return higher APs.
pred_class = loss_utils.apply_mask(
ind_mask, tf.gather_nd(pred_class, inds, batch_dims=1))
class_loss = tf.keras.losses.binary_crossentropy(
tf.expand_dims(true_class, axis=-1),
tf.expand_dims(pred_class, axis=-1),
label_smoothing=self._label_smoothing,
from_logits=True)
class_loss = loss_utils.apply_mask(ind_mask, class_loss)
class_loss = math_ops.divide_no_nan(class_loss,
tf.expand_dims(reps, axis=-1))
class_loss = tf.cast(
tf.reduce_sum(class_loss, axis=(1, 2)), dtype=y_pred.dtype)
class_loss *= self._cls_normalizer
# Compute the sigmoid binary cross entropy for the confidence maps.
bce = tf.reduce_mean(
......@@ -406,7 +436,7 @@ class DarknetLoss(YoloLossBase):
# Apply the weights to each loss.
box_loss *= self._iou_normalizer
conf_loss *= self._obj_normalizer
conf_loss *= self._object_normalizer
# Add all the losses together then take the mean over the batches.
loss = box_loss + class_loss + conf_loss
......@@ -547,7 +577,7 @@ class ScaledLoss(YoloLossBase):
# Apply the weights to each loss.
box_loss *= self._iou_normalizer
class_loss *= self._cls_normalizer
conf_loss *= self._obj_normalizer
conf_loss *= self._object_normalizer
# Add all the losses together then take the sum over the batches.
mean_loss = box_loss + class_loss + conf_loss
......@@ -575,12 +605,13 @@ class ScaledLoss(YoloLossBase):
predictions: `Dict` holding all the predicted values.
Returns:
loss: `tf.float` scalar for the scaled loss.
scale: `tf.float` how much the loss was scaled by.
"""
scale = tf.stop_gradient(3 / len(list(predictions.keys())))
return loss * scale
return loss * scale, 1 / scale
def cross_replica_aggregation(self, loss, num_replicas_in_sync):
"""this method is not specific to each loss path, but each loss type."""
"""This method is not specific to each loss path, but each loss type."""
return loss
......@@ -597,7 +628,7 @@ class YoloLoss:
loss_types=None,
iou_normalizers=None,
cls_normalizers=None,
obj_normalizers=None,
object_normalizers=None,
objectness_smooths=None,
box_types=None,
scale_xys=None,
......@@ -627,8 +658,8 @@ class YoloLoss:
or the boxes for each FPN path.
cls_normalizers: `Dict[float]` for how much to scale the loss on the
classes for each FPN path.
obj_normalizers: `Dict[float]` for how much to scale loss on the detection
map for each FPN path.
object_normalizers: `Dict[float]` for how much to scale loss on the
detection map for each FPN path.
objectness_smooths: `Dict[float]` for how much to smooth the loss on the
detection map for each FPN path.
box_types: `Dict[bool]` for which scaling type to use for each FPN path.
......@@ -666,7 +697,7 @@ class YoloLoss:
loss_type=loss_types[key],
iou_normalizer=iou_normalizers[key],
cls_normalizer=cls_normalizers[key],
obj_normalizer=obj_normalizers[key],
object_normalizer=object_normalizers[key],
box_type=box_types[key],
objectness_smooth=objectness_smooths[key],
max_delta=max_deltas[key],
......@@ -695,10 +726,8 @@ class YoloLoss:
# after computing the loss, scale loss as needed for aggregation
# across FPN levels
loss = self._loss_dict[key].post_path_aggregation(loss, loss_box,
loss_conf, loss_class,
ground_truth,
predictions)
loss, scale = self._loss_dict[key].post_path_aggregation(
loss, loss_box, loss_conf, loss_class, ground_truth, predictions)
# after completing the scaling of the loss on each replica, handle
# scaling the loss for mergeing the loss across replicas
......@@ -708,12 +737,13 @@ class YoloLoss:
# detach all the below gradients: none of them should make a
# contribution to the gradient form this point forwards
metric_loss += tf.stop_gradient(mean_loss)
metric_dict[key]['loss'] = tf.stop_gradient(mean_loss)
metric_loss += tf.stop_gradient(mean_loss / scale)
metric_dict[key]['loss'] = tf.stop_gradient(mean_loss / scale)
metric_dict[key]['avg_iou'] = tf.stop_gradient(avg_iou)
metric_dict[key]['avg_obj'] = tf.stop_gradient(avg_obj)
metric_dict['net']['box'] += tf.stop_gradient(loss_box)
metric_dict['net']['class'] += tf.stop_gradient(loss_class)
metric_dict['net']['conf'] += tf.stop_gradient(loss_conf)
metric_dict['net']['box'] += tf.stop_gradient(loss_box / scale)
metric_dict['net']['class'] += tf.stop_gradient(loss_class / scale)
metric_dict['net']['conf'] += tf.stop_gradient(loss_conf / scale)
return loss_val, metric_loss, metric_dict
......@@ -60,7 +60,7 @@ class YoloDecoderTest(parameterized.TestCase, tf.test.TestCase):
loss_types={key: 'ciou' for key in keys},
iou_normalizers={key: 0.05 for key in keys},
cls_normalizers={key: 0.5 for key in keys},
obj_normalizers={key: 1.0 for key in keys},
object_normalizers={key: 1.0 for key in keys},
objectness_smooths={key: 1.0 for key in keys},
box_types={key: 'scaled' for key in keys},
scale_xys={key: 2.0 for key in keys},
......
......@@ -454,6 +454,9 @@ class Darknet(tf.keras.Model):
def _build_struct(self, net, inputs):
if self._use_reorg_input:
inputs = nn_blocks.Reorg()(inputs)
net[0].filters = net[1].filters
net[0].output_name = net[1].output_name
del net[1]
endpoints = collections.OrderedDict()
stack_outputs = [inputs]
......
......@@ -13,10 +13,66 @@
# limitations under the License.
"""Feature Pyramid Network and Path Aggregation variants used in YOLO."""
from typing import Mapping, Union, Optional
import tensorflow as tf
from official.modeling import hyperparams
from official.vision.beta.modeling.decoders import factory
from official.vision.beta.projects.yolo.modeling.layers import nn_blocks
# model configurations
# the structure is as follows. model version, {v3, v4, v#, ... etc}
# the model config type {regular, tiny, small, large, ... etc}
YOLO_MODELS = {
'v4':
dict(
regular=dict(
embed_spp=False,
use_fpn=True,
max_level_process_len=None,
path_process_len=6),
tiny=dict(
embed_spp=False,
use_fpn=False,
max_level_process_len=2,
path_process_len=1),
csp=dict(
embed_spp=False,
use_fpn=True,
max_level_process_len=None,
csp_stack=5,
fpn_depth=5,
path_process_len=6),
csp_large=dict(
embed_spp=False,
use_fpn=True,
max_level_process_len=None,
csp_stack=7,
fpn_depth=7,
path_process_len=8,
fpn_filter_scale=2),
),
'v3':
dict(
regular=dict(
embed_spp=False,
use_fpn=False,
max_level_process_len=None,
path_process_len=6),
tiny=dict(
embed_spp=False,
use_fpn=False,
max_level_process_len=2,
path_process_len=1),
spp=dict(
embed_spp=True,
use_fpn=False,
max_level_process_len=2,
path_process_len=1),
),
}
@tf.keras.utils.register_keras_serializable(package='yolo')
class _IdentityRoute(tf.keras.layers.Layer):
......@@ -487,3 +543,66 @@ class YoloDecoder(tf.keras.Model):
@classmethod
def from_config(cls, config, custom_objects=None):
return cls(**config)
@factory.register_decoder_builder('yolo_decoder')
def build_yolo_decoder(
input_specs: Mapping[str, tf.TensorShape],
model_config: hyperparams.Config,
l2_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
**kwargs) -> Union[None, tf.keras.Model, tf.keras.layers.Layer]:
"""Builds Yolo FPN/PAN decoder from a config.
Args:
input_specs: A `dict` of input specifications. A dictionary consists of
{level: TensorShape} from a backbone.
model_config: A OneOfConfig. Model config.
l2_regularizer: A `tf.keras.regularizers.Regularizer` instance. Default to
None.
**kwargs: Additional kwargs arguments.
Returns:
A `tf.keras.Model` instance of the Yolo FPN/PAN decoder.
"""
decoder_cfg = model_config.decoder.get()
norm_activation_config = model_config.norm_activation
activation = (
decoder_cfg.activation if decoder_cfg.activation != 'same' else
norm_activation_config.activation)
if decoder_cfg.version is None: # custom yolo
raise ValueError('Decoder version cannot be None, specify v3 or v4.')
if decoder_cfg.version not in YOLO_MODELS:
raise ValueError(
'Unsupported model version please select from {v3, v4}, '
'or specify a custom decoder config using YoloDecoder in you yaml')
if decoder_cfg.type is None:
decoder_cfg.type = 'regular'
if decoder_cfg.type not in YOLO_MODELS[decoder_cfg.version]:
raise ValueError('Unsupported model type please select from '
'{yolo_model.YOLO_MODELS[decoder_cfg.version].keys()}'
'or specify a custom decoder config using YoloDecoder.')
base_model = YOLO_MODELS[decoder_cfg.version][decoder_cfg.type]
cfg_dict = decoder_cfg.as_dict()
for key in base_model:
if cfg_dict[key] is not None:
base_model[key] = cfg_dict[key]
base_dict = dict(
activation=activation,
use_spatial_attention=decoder_cfg.use_spatial_attention,
use_separable_conv=decoder_cfg.use_separable_conv,
use_sync_bn=norm_activation_config.use_sync_bn,
norm_momentum=norm_activation_config.norm_momentum,
norm_epsilon=norm_activation_config.norm_epsilon,
kernel_regularizer=l2_regularizer)
base_model.update(base_dict)
model = YoloDecoder(input_specs, **base_model, **kwargs)
return model
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains common factory functions yolo neural networks."""
from absl import logging
from official.vision.beta.modeling.backbones import factory as backbone_factory
from official.vision.beta.modeling.decoders import factory as decoder_factory
from official.vision.beta.projects.yolo.configs import yolo
from official.vision.beta.projects.yolo.modeling import yolo_model
from official.vision.beta.projects.yolo.modeling.heads import yolo_head
from official.vision.beta.projects.yolo.modeling.layers import detection_generator
def build_yolo_detection_generator(model_config: yolo.Yolo, anchor_boxes):
"""Builds yolo detection generator."""
model = detection_generator.YoloLayer(
classes=model_config.num_classes,
anchors=anchor_boxes,
iou_thresh=model_config.detection_generator.iou_thresh,
nms_thresh=model_config.detection_generator.nms_thresh,
max_boxes=model_config.detection_generator.max_boxes,
pre_nms_points=model_config.detection_generator.pre_nms_points,
nms_type=model_config.detection_generator.nms_type,
box_type=model_config.detection_generator.box_type.get(),
path_scale=model_config.detection_generator.path_scales.get(),
scale_xy=model_config.detection_generator.scale_xy.get(),
label_smoothing=model_config.loss.label_smoothing,
use_scaled_loss=model_config.loss.use_scaled_loss,
update_on_repeat=model_config.loss.update_on_repeat,
truth_thresh=model_config.loss.truth_thresh.get(),
loss_type=model_config.loss.box_loss_type.get(),
max_delta=model_config.loss.max_delta.get(),
iou_normalizer=model_config.loss.iou_normalizer.get(),
cls_normalizer=model_config.loss.cls_normalizer.get(),
object_normalizer=model_config.loss.object_normalizer.get(),
ignore_thresh=model_config.loss.ignore_thresh.get(),
objectness_smooth=model_config.loss.objectness_smooth.get())
return model
def build_yolo_head(input_specs, model_config: yolo.Yolo, l2_regularization):
"""Builds yolo head."""
min_level = min(map(int, input_specs.keys()))
max_level = max(map(int, input_specs.keys()))
head = yolo_head.YoloHead(
min_level=min_level,
max_level=max_level,
classes=model_config.num_classes,
boxes_per_level=model_config.anchor_boxes.anchors_per_scale,
norm_momentum=model_config.norm_activation.norm_momentum,
norm_epsilon=model_config.norm_activation.norm_epsilon,
kernel_regularizer=l2_regularization,
smart_bias=model_config.head.smart_bias)
return head
def build_yolo(input_specs, model_config, l2_regularization):
"""Builds yolo model."""
backbone = model_config.backbone.get()
anchor_dict, _ = model_config.anchor_boxes.get(
backbone.min_level, backbone.max_level)
backbone = backbone_factory.build_backbone(input_specs, model_config.backbone,
model_config.norm_activation,
l2_regularization)
decoder = decoder_factory.build_decoder(backbone.output_specs, model_config,
l2_regularization)
head = build_yolo_head(decoder.output_specs, model_config, l2_regularization)
detection_generator_obj = build_yolo_detection_generator(model_config,
anchor_dict)
model = yolo_model.Yolo(
backbone=backbone,
decoder=decoder,
head=head,
detection_generator=detection_generator_obj)
model.build(input_specs.shape)
model.summary(print_fn=logging.info)
losses = detection_generator_obj.get_losses()
return model, losses
......@@ -36,7 +36,7 @@ class YoloLayer(tf.keras.Model):
loss_type='ciou',
iou_normalizer=1.0,
cls_normalizer=1.0,
obj_normalizer=1.0,
object_normalizer=1.0,
use_scaled_loss=False,
update_on_repeat=False,
pre_nms_points=5000,
......@@ -67,7 +67,8 @@ class YoloLayer(tf.keras.Model):
iou_normalizer: `float` for how much to scale the loss on the IOU or the
boxes.
cls_normalizer: `float` for how much to scale the loss on the classes.
obj_normalizer: `float` for how much to scale loss on the detection map.
object_normalizer: `float` for how much to scale loss on the detection
map.
use_scaled_loss: `bool` for whether to use the scaled loss
or the traditional loss.
update_on_repeat: `bool` indicating how you would like to handle repeated
......@@ -110,7 +111,7 @@ class YoloLayer(tf.keras.Model):
self._truth_thresh = truth_thresh
self._iou_normalizer = iou_normalizer
self._cls_normalizer = cls_normalizer
self._obj_normalizer = obj_normalizer
self._object_normalizer = object_normalizer
self._objectness_smooth = objectness_smooth
self._nms_thresh = nms_thresh
self._max_boxes = max_boxes
......@@ -289,7 +290,7 @@ class YoloLayer(tf.keras.Model):
loss_types=self._loss_type,
iou_normalizers=self._iou_normalizer,
cls_normalizers=self._cls_normalizer,
obj_normalizers=self._obj_normalizer,
object_normalizers=self._object_normalizer,
objectness_smooths=self._objectness_smooth,
box_types=self._box_type,
max_deltas=self._max_delta,
......
......@@ -14,7 +14,9 @@
"""Contains common building blocks for yolo neural networks."""
from typing import Callable, List, Tuple
import tensorflow as tf
from official.modeling import tf_utils
from official.vision.beta.ops import spatial_transform_ops
......@@ -141,6 +143,7 @@ class ConvBN(tf.keras.layers.Layer):
# activation params
self._activation = activation
self._leaky_alpha = leaky_alpha
self._fuse = False
super().__init__(**kwargs)
......@@ -164,6 +167,8 @@ class ConvBN(tf.keras.layers.Layer):
momentum=self._norm_momentum,
epsilon=self._norm_epsilon,
axis=self._bn_axis)
else:
self.bn = None
if self._activation == 'leaky':
self._activation_fn = tf.keras.layers.LeakyReLU(alpha=self._leaky_alpha)
......@@ -174,11 +179,44 @@ class ConvBN(tf.keras.layers.Layer):
def call(self, x):
x = self.conv(x)
if self._use_bn:
if self._use_bn and not self._fuse:
x = self.bn(x)
x = self._activation_fn(x)
return x
def fuse(self):
if self.bn is not None and not self._use_separable_conv:
# Fuse convolution and batchnorm, gives me +2 to 3 FPS 2ms latency.
# layers: https://tehnokv.com/posts/fusing-batchnorm-and-conv/
if self._fuse:
return
self._fuse = True
conv_weights = self.conv.get_weights()[0]
gamma, beta, moving_mean, moving_variance = self.bn.get_weights()
self.conv.use_bias = True
infilters = conv_weights.shape[-2]
self.conv.build([None, None, None, infilters])
base = tf.sqrt(self._norm_epsilon + moving_variance)
w_conv_base = tf.transpose(conv_weights, perm=(3, 2, 0, 1))
w_conv = tf.reshape(w_conv_base, [conv_weights.shape[-1], -1])
w_bn = tf.linalg.diag(gamma / base)
w_conv = tf.reshape(tf.matmul(w_bn, w_conv), w_conv_base.get_shape())
w_conv = tf.transpose(w_conv, perm=(2, 3, 1, 0))
b_bn = beta - gamma * moving_mean / base
self.conv.set_weights([w_conv, b_bn])
del self.bn
self.trainable = False
self.conv.trainable = False
self.bn = None
return
def get_config(self):
# used to store/share parameters to reconstruct the model
layer_config = {
......
......@@ -14,72 +14,19 @@
"""Yolo models."""
from typing import Mapping, Union
import tensorflow as tf
# static base Yolo Models that do not require configuration
# similar to a backbone model id.
# this is done greatly simplify the model config
# the structure is as follows. model version, {v3, v4, v#, ... etc}
# the model config type {regular, tiny, small, large, ... etc}
YOLO_MODELS = {
"v4":
dict(
regular=dict(
embed_spp=False,
use_fpn=True,
max_level_process_len=None,
path_process_len=6),
tiny=dict(
embed_spp=False,
use_fpn=False,
max_level_process_len=2,
path_process_len=1),
csp=dict(
embed_spp=False,
use_fpn=True,
max_level_process_len=None,
csp_stack=5,
fpn_depth=5,
path_process_len=6),
csp_large=dict(
embed_spp=False,
use_fpn=True,
max_level_process_len=None,
csp_stack=7,
fpn_depth=7,
path_process_len=8,
fpn_filter_scale=2),
),
"v3":
dict(
regular=dict(
embed_spp=False,
use_fpn=False,
max_level_process_len=None,
path_process_len=6),
tiny=dict(
embed_spp=False,
use_fpn=False,
max_level_process_len=2,
path_process_len=1),
spp=dict(
embed_spp=True,
use_fpn=False,
max_level_process_len=2,
path_process_len=1),
),
}
from official.vision.beta.projects.yolo.modeling.layers import nn_blocks
class Yolo(tf.keras.Model):
"""The YOLO model class."""
def __init__(self,
backbone=None,
decoder=None,
head=None,
detection_generator=None,
backbone,
decoder,
head,
detection_generator,
**kwargs):
"""Detection initialization function.
......@@ -93,10 +40,10 @@ class Yolo(tf.keras.Model):
super(Yolo, self).__init__(**kwargs)
self._config_dict = {
"backbone": backbone,
"decoder": decoder,
"head": head,
"filter": detection_generator
'backbone': backbone,
'decoder': decoder,
'head': head,
'detection_generator': detection_generator
}
# model components
......@@ -104,18 +51,19 @@ class Yolo(tf.keras.Model):
self._decoder = decoder
self._head = head
self._detection_generator = detection_generator
self._fused = False
return
def call(self, inputs, training=False):
maps = self._backbone(inputs)
decoded_maps = self._decoder(maps)
raw_predictions = self._head(decoded_maps)
maps = self.backbone(inputs)
decoded_maps = self.decoder(maps)
raw_predictions = self.head(decoded_maps)
if training:
return {"raw_output": raw_predictions}
return {'raw_output': raw_predictions}
else:
# Post-processing.
predictions = self._detection_generator(raw_predictions)
predictions.update({"raw_output": raw_predictions})
predictions = self.detection_generator(raw_predictions)
predictions.update({'raw_output': raw_predictions})
return predictions
@property
......@@ -141,28 +89,22 @@ class Yolo(tf.keras.Model):
def from_config(cls, config):
return cls(**config)
def get_weight_groups(self, train_vars):
"""Sort the list of trainable variables into groups for optimization.
Args:
train_vars: a list of tf.Variables that need to get sorted into their
respective groups.
Returns:
weights: a list of tf.Variables for the weights.
bias: a list of tf.Variables for the bias.
other: a list of tf.Variables for the other operations.
"""
bias = []
weights = []
other = []
for var in train_vars:
if "bias" in var.name:
bias.append(var)
elif "beta" in var.name:
bias.append(var)
elif "kernel" in var.name or "weight" in var.name:
weights.append(var)
else:
other.append(var)
return weights, bias, other
@property
def checkpoint_items(
self) -> Mapping[str, Union[tf.keras.Model, tf.keras.layers.Layer]]:
"""Returns a dictionary of items to be additionally checkpointed."""
items = dict(backbone=self.backbone, head=self.head)
if self.decoder is not None:
items.update(decoder=self.decoder)
return items
def fuse(self):
"""Fuses all Convolution and Batchnorm layers to get better latency."""
print('Fusing Conv Batch Norm Layers.')
if not self._fused:
self._fused = True
for layer in self.submodules:
if isinstance(layer, nn_blocks.ConvBN):
layer.fuse()
self.summary()
return
......@@ -14,7 +14,6 @@
"""Mosaic op."""
import random
import tensorflow as tf
import tensorflow_addons as tfa
......@@ -55,7 +54,7 @@ class Mosaic:
the images should be preserved.
jitter: `float` for the maximum change in aspect ratio expected in each
preprocessing step.
mosaic_crop_mode: `str` they type of mosaic to apply. The options are
mosaic_crop_mode: `str` the type of mosaic to apply. The options are
{crop, scale, None}, crop will construct a mosaic by slicing images
togther, scale will create a mosaic by concatnating and shifting the
image, and None will default to scale and apply no post processing to
......@@ -325,6 +324,12 @@ class Mosaic:
else:
return self._add_param(noop)
def _beta(self, alpha, beta):
"""Generates a random number using the beta distribution."""
a = tf.random.gamma([], alpha)
b = tf.random.gamma([], beta)
return b / (a + b)
def _mixup(self, one, two):
"""Blend together 2 images for the mixup data augmentation."""
if self._mixup_frequency >= 1.0:
......@@ -337,8 +342,8 @@ class Mosaic:
if domo >= (1 - self._mixup_frequency):
sample = one
otype = one['image'].dtype
r = preprocessing_ops.random_uniform_strong(
0.4, 0.6, tf.float32, seed=self._seed)
r = self._beta(8.0, 8.0)
sample['image'] = (
r * tf.cast(one['image'], tf.float32) +
(1 - r) * tf.cast(two['image'], tf.float32))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment