Commit 9cd84cc1 authored by A. Unique TensorFlower's avatar A. Unique TensorFlower
Browse files

Merge pull request #10333 from PurdueDualityLab:exp_pr2

PiperOrigin-RevId: 409474266
parents 987238e6 7a45b513
...@@ -33,6 +33,8 @@ class YoloDecoder(hyperparams.Config): ...@@ -33,6 +33,8 @@ class YoloDecoder(hyperparams.Config):
use_separable_conv: bool = False use_separable_conv: bool = False
csp_stack: Optional[bool] = None csp_stack: Optional[bool] = None
fpn_depth: Optional[int] = None fpn_depth: Optional[int] = None
max_fpn_depth: Optional[int] = None
max_csp_stack: Optional[int] = None
fpn_filter_scale: Optional[int] = None fpn_filter_scale: Optional[int] = None
path_process_len: Optional[int] = None path_process_len: Optional[int] = None
max_level_process_len: Optional[int] = None max_level_process_len: Optional[int] = None
......
# --experiment_type=scaled_yolo
# mAP 47.6
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'float32'
tpu_enable_xla_dynamic_padder: false
task:
model:
input_size: [640, 640, 3]
backbone:
type: 'darknet'
darknet:
model_id: 'altered_cspdarknet53'
max_level: 5
min_level: 3
decoder:
type: yolo_decoder
yolo_decoder:
version: v4
type: csp
head:
smart_bias: true
detection_generator:
box_type:
'all': scaled
scale_xy:
'all': 2.0
max_boxes: 300
nms_type: iou
iou_thresh: 0.001
nms_thresh: 0.65
loss:
use_scaled_loss: true
update_on_repeat: true
box_loss_type:
'all': ciou
ignore_thresh:
'all': 0.0
iou_normalizer:
'all': 0.05
cls_normalizer:
'all': 0.3
object_normalizer:
'5': 0.28
'4': 0.70
'3': 2.80
objectness_smooth:
'all': 1.0
norm_activation:
use_sync_bn: true
num_classes: 80
anchor_boxes:
anchors_per_scale: 3
boxes: [box: [12, 16], box: [19, 36], box: [40, 28],
box: [36, 75], box: [76, 55], box: [72, 146],
box: [142, 110], box: [192, 243], box: [459, 401]]
train_data:
input_path: '/readahead/200M/placer/prod/home/tensorflow-performance-data/datasets/coco/train*'
shuffle_buffer_size: 10000
parser:
mosaic:
mosaic_frequency: 1.0
mixup_frequency: 0.0
mosaic_crop_mode: 'scale'
mosaic_center: 0.25
aug_scale_min: 0.1
aug_scale_max: 1.9
max_num_instances: 300
letter_box: true
random_flip: true
aug_rand_translate: 0.1
area_thresh: 0.1
validation_data:
input_path: '/readahead/200M/placer/prod/home/tensorflow-performance-data/datasets/coco/val*'
# --experiment_type=scaled_yolo
# mAP 51.1%
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'float32'
tpu_enable_xla_dynamic_padder: false
task:
model:
input_size: [896, 896, 3]
backbone:
type: 'darknet'
darknet:
model_id: 'csp-large'
max_level: 5
min_level: 3
width_scale: 1.00
depth_scale: 1.00
decoder:
type: yolo_decoder
yolo_decoder:
version: v4
type: csp_large
head:
smart_bias: true
detection_generator:
box_type:
'all': scaled
scale_xy:
'all': 2.0
max_boxes: 300
nms_type: iou
iou_thresh: 0.001
nms_thresh: 0.65
loss:
use_scaled_loss: true
update_on_repeat: true
box_loss_type:
'all': ciou
ignore_thresh:
'all': 0.0
iou_normalizer:
'all': 0.05
cls_normalizer:
'all': 0.3
object_normalizer:
'5': 0.28
'4': 0.70
'3': 2.80
objectness_smooth:
'all': 1.0
norm_activation:
use_sync_bn: true
num_classes: 80
anchor_boxes:
anchors_per_scale: 4
boxes: [box: [13, 17], box: [31, 25], box: [24, 51], box: [61, 45],
box: [48, 102], box: [119, 96], box: [97, 189], box: [217, 184],
box: [171, 384], box: [324, 451], box: [616, 618], box: [800, 800]]
train_data:
input_path: '/readahead/200M/placer/prod/home/tensorflow-performance-data/datasets/coco/train*'
shuffle_buffer_size: 10000
parser:
mosaic:
mosaic_frequency: 1.0
mixup_frequency: 0.2
mosaic_crop_mode: 'scale'
mosaic_center: 0.25
aug_scale_min: 0.1
aug_scale_max: 1.9
max_num_instances: 300
letter_box: true
random_flip: true
aug_rand_translate: 0.1
area_thresh: 0.1
validation_data:
input_path: '/readahead/200M/placer/prod/home/tensorflow-performance-data/datasets/coco/val*'
trainer:
best_checkpoint_eval_metric: 'AP'
best_checkpoint_export_subdir: 'best_ckpt'
best_checkpoint_metric_comp: 'higher'
train_steps: 231000 # 500 epochs
optimizer_config:
learning_rate:
cosine:
decay_steps: 231000 # 500 epochs
# --experiment_type=scaled_yolo
# mAP 54%
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'float32'
tpu_enable_xla_dynamic_padder: false
task:
model:
input_size: [1280, 1280, 3]
backbone:
type: 'darknet'
darknet:
model_id: 'csp-large'
max_level: 6
min_level: 3
width_scale: 1.00
depth_scale: 1.00
decoder:
type: yolo_decoder
yolo_decoder:
version: v4
type: csp_large
head:
smart_bias: true
detection_generator:
box_type:
'all': scaled
scale_xy:
'all': 2.0
max_boxes: 300
nms_type: iou
iou_thresh: 0.001
nms_thresh: 0.65
loss:
use_scaled_loss: true
update_on_repeat: true
box_loss_type:
'all': ciou
ignore_thresh:
'all': 0.0
iou_normalizer:
'all': 0.05
cls_normalizer:
'all': 0.5
object_normalizer:
'6': 0.07
'5': 0.29
'4': 0.7
'3': 2.8
objectness_smooth:
'all': 1.0
norm_activation:
use_sync_bn: true
num_classes: 80
anchor_boxes:
anchors_per_scale: 4
boxes: [box: [13, 17], box: [31, 25], box: [24, 51], box: [61, 45],
box: [61, 45], box: [48, 102], box: [119, 96], box: [97, 189],
box: [97, 189], box: [217, 184], box: [171, 384], box: [324, 451],
box: [324, 451], box: [545, 357], box: [616, 618], box: [1024, 1024]]
train_data:
input_path: '/readahead/200M/placer/prod/home/tensorflow-performance-data/datasets/coco/train*'
shuffle_buffer_size: 10000
parser:
mosaic:
mosaic_frequency: 1.0
mixup_frequency: 0.2
mosaic_crop_mode: 'scale'
mosaic_center: 0.25
aug_scale_min: 0.1
aug_scale_max: 1.9
max_num_instances: 300
letter_box: true
random_flip: true
aug_rand_translate: 0.1
area_thresh: 0.1
validation_data:
input_path: '/readahead/200M/placer/prod/home/tensorflow-performance-data/datasets/coco/val*'
trainer:
best_checkpoint_eval_metric: 'AP'
best_checkpoint_export_subdir: 'best_ckpt'
best_checkpoint_metric_comp: 'higher'
train_steps: 231000 # 500 epochs
optimizer_config:
learning_rate:
cosine:
decay_steps: 231000 # 500 epochs
# --experiment_type=scaled_yolo
# mAP 54.7%
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'float32'
tpu_enable_xla_dynamic_padder: false
task:
model:
input_size: [1536, 1536, 3]
backbone:
type: 'darknet'
darknet:
model_id: 'csp-large'
max_level: 7
min_level: 3
width_scale: 1.25
depth_scale: 1.00
decoder:
type: yolo_decoder
yolo_decoder:
version: v4
type: csp_large
head:
smart_bias: true
detection_generator:
box_type:
'all': scaled
scale_xy:
'all': 2.0
max_boxes: 300
nms_type: iou
iou_thresh: 0.001
nms_thresh: 0.65
loss:
use_scaled_loss: true
update_on_repeat: true
box_loss_type:
'all': ciou
ignore_thresh:
'all': 0.0
iou_normalizer:
'all': 0.05
cls_normalizer:
'all': 0.5
object_normalizer:
'7': 0.07
'6': 0.22
'5': 0.35
'4': 0.7
'3': 2.8
objectness_smooth:
'all': 1.0
norm_activation:
use_sync_bn: true
num_classes: 80
anchor_boxes:
anchors_per_scale: 4
boxes: [box: [13, 17], box: [22, 25], box: [55, 41], box: [27, 66],
box: [57, 88], box: [112, 69], box: [69, 177], box: [136, 138],
box: [136, 138], box: [287, 114], box: [134, 275], box: [268, 248],
box: [268, 248], box: [232, 504], box: [445, 416], box: [640, 640],
box: [812, 393], box: [477, 808], box: [1070, 908], box: [1408, 1408]]
train_data:
input_path: '/readahead/200M/placer/prod/home/tensorflow-performance-data/datasets/coco/train*'
shuffle_buffer_size: 10000
parser:
mosaic:
mosaic_frequency: 1.0
mixup_frequency: 0.2
mosaic_crop_mode: 'scale'
mosaic_center: 0.25
aug_scale_min: 0.1
aug_scale_max: 1.9
max_num_instances: 300
letter_box: true
random_flip: true
aug_rand_translate: 0.1
area_thresh: 0.1
validation_data:
input_path: '/readahead/200M/placer/prod/home/tensorflow-performance-data/datasets/coco/val*'
trainer:
best_checkpoint_eval_metric: 'AP'
best_checkpoint_export_subdir: 'best_ckpt'
best_checkpoint_metric_comp: 'higher'
train_steps: 231000 # 500 epochs
optimizer_config:
learning_rate:
cosine:
decay_steps: 231000 # 500 epochs
runtime:
distribution_strategy: 'mirrored'
mixed_precision_dtype: 'float32'
task:
model:
num_classes: 1001
input_size: [256, 256, 3]
backbone:
type: 'darknet'
darknet:
model_id: 'cspdarknet53'
norm_activation:
activation: 'mish'
losses:
l2_weight_decay: 0.0005
one_hot: true
label_smoothing: 0.1
train_data:
input_path: 'imagenet-2012-tfrecord/train*'
is_training: true
global_batch_size: 128
dtype: 'float16'
validation_data:
input_path: 'imagenet-2012-tfrecord/valid*'
is_training: true
global_batch_size: 128
dtype: 'float16'
drop_remainder: false
trainer:
train_steps: 1200000 # epochs: 120
validation_steps: 400 # size of validation data
validation_interval: 10000
steps_per_loop: 10000
summary_interval: 10000
checkpoint_interval: 10000
optimizer_config:
optimizer:
type: 'sgd'
sgd:
momentum: 0.9
learning_rate:
type: 'polynomial'
polynomial:
initial_learning_rate: 0.1
end_learning_rate: 0.0001
power: 4.0
decay_steps: 1200000
warmup:
type: 'linear'
linear:
warmup_steps: 1000 # learning rate rises from 0 to 0.1 over 1000 steps
...@@ -268,7 +268,7 @@ def yolo() -> cfg.ExperimentConfig: ...@@ -268,7 +268,7 @@ def yolo() -> cfg.ExperimentConfig:
@exp_factory.register_config_factory('yolo_darknet') @exp_factory.register_config_factory('yolo_darknet')
def yolo_darknet() -> cfg.ExperimentConfig: def yolo_darknet() -> cfg.ExperimentConfig:
"""COCO object detection with YOLOv3 and v4.""" """COCO object detection with YOLOv3 and v4."""
train_batch_size = 64 train_batch_size = 256
eval_batch_size = 8 eval_batch_size = 8
train_epochs = 300 train_epochs = 300
steps_per_epoch = COCO_TRAIN_EXAMPLES // train_batch_size steps_per_epoch = COCO_TRAIN_EXAMPLES // train_batch_size
...@@ -389,7 +389,7 @@ def yolo_darknet() -> cfg.ExperimentConfig: ...@@ -389,7 +389,7 @@ def yolo_darknet() -> cfg.ExperimentConfig:
@exp_factory.register_config_factory('scaled_yolo') @exp_factory.register_config_factory('scaled_yolo')
def scaled_yolo() -> cfg.ExperimentConfig: def scaled_yolo() -> cfg.ExperimentConfig:
"""COCO object detection with YOLOv4-csp and v4.""" """COCO object detection with YOLOv4-csp and v4."""
train_batch_size = 64 train_batch_size = 256
eval_batch_size = 8 eval_batch_size = 8
train_epochs = 300 train_epochs = 300
warmup_epochs = 3 warmup_epochs = 3
...@@ -411,7 +411,7 @@ def scaled_yolo() -> cfg.ExperimentConfig: ...@@ -411,7 +411,7 @@ def scaled_yolo() -> cfg.ExperimentConfig:
norm_activation=common.NormActivation( norm_activation=common.NormActivation(
activation='mish', activation='mish',
use_sync_bn=True, use_sync_bn=True,
norm_epsilon=0.0001, norm_epsilon=0.001,
norm_momentum=0.97), norm_momentum=0.97),
head=YoloHead(smart_bias=True), head=YoloHead(smart_bias=True),
loss=YoloLoss(use_scaled_loss=True), loss=YoloLoss(use_scaled_loss=True),
...@@ -469,7 +469,7 @@ def scaled_yolo() -> cfg.ExperimentConfig: ...@@ -469,7 +469,7 @@ def scaled_yolo() -> cfg.ExperimentConfig:
validation_interval=validation_interval * steps_per_epoch, validation_interval=validation_interval * steps_per_epoch,
steps_per_loop=steps_per_epoch, steps_per_loop=steps_per_epoch,
summary_interval=steps_per_epoch, summary_interval=steps_per_epoch,
checkpoint_interval=steps_per_epoch, checkpoint_interval=5 * steps_per_epoch,
optimizer_config=optimization.OptimizationConfig({ optimizer_config=optimization.OptimizationConfig({
'ema': { 'ema': {
'average_decay': 0.9999, 'average_decay': 0.9999,
...@@ -483,7 +483,7 @@ def scaled_yolo() -> cfg.ExperimentConfig: ...@@ -483,7 +483,7 @@ def scaled_yolo() -> cfg.ExperimentConfig:
'momentum_start': 0.8, 'momentum_start': 0.8,
'nesterov': True, 'nesterov': True,
'warmup_steps': steps_per_epoch * warmup_epochs, 'warmup_steps': steps_per_epoch * warmup_epochs,
'weight_decay': 0.0005 * train_batch_size / 64.0, 'weight_decay': 0.0005,
} }
}, },
'learning_rate': { 'learning_rate': {
......
...@@ -237,14 +237,14 @@ class Parser(parser.Parser): ...@@ -237,14 +237,14 @@ class Parser(parser.Parser):
affine=affine, affine=affine,
shuffle_boxes=False, shuffle_boxes=False,
area_thresh=self._area_thresh, area_thresh=self._area_thresh,
augment=True, filter_and_clip_boxes=True,
seed=self._seed) seed=self._seed)
classes = tf.gather(classes, inds) classes = tf.gather(classes, inds)
info = infos[-1] info = infos[-1]
else: else:
image = tf.image.resize( image = tf.image.resize(
image, (self._image_h, self._image_w), method='nearest') image, (self._image_h, self._image_w), method='nearest')
output_size = tf.cast([640, 640], tf.float32) output_size = tf.cast([self._image_h, self._image_w], tf.float32)
boxes_ = bbox_ops.denormalize_boxes(boxes, output_size) boxes_ = bbox_ops.denormalize_boxes(boxes, output_size)
inds = bbox_ops.get_non_empty_box_indices(boxes_) inds = bbox_ops.get_non_empty_box_indices(boxes_)
boxes = tf.gather(boxes, inds) boxes = tf.gather(boxes, inds)
...@@ -286,7 +286,8 @@ class Parser(parser.Parser): ...@@ -286,7 +286,8 @@ class Parser(parser.Parser):
# Clip and clean boxes. # Clip and clean boxes.
image = image / 255.0 image = image / 255.0
boxes, inds = preprocessing_ops.transform_and_clip_boxes( boxes, inds = preprocessing_ops.transform_and_clip_boxes(
boxes, infos, shuffle_boxes=False, area_thresh=0.0, augment=True) boxes, infos, shuffle_boxes=False, area_thresh=0.0,
filter_and_clip_boxes=False)
classes = tf.gather(classes, inds) classes = tf.gather(classes, inds)
info = infos[-1] info = infos[-1]
...@@ -342,17 +343,26 @@ class Parser(parser.Parser): ...@@ -342,17 +343,26 @@ class Parser(parser.Parser):
# Update the labels dictionary. # Update the labels dictionary.
if not is_training: if not is_training:
# Sets up groundtruth data for evaluation. # Sets up groundtruth data for evaluation.
groundtruths = { groundtruths = {
'source_id': labels['source_id'], 'source_id':
'height': height, labels['source_id'],
'width': width, 'height':
'num_detections': tf.shape(gt_boxes)[0], data['height'],
'image_info': info, 'width':
'boxes': gt_boxes, data['width'],
'classes': gt_classes, 'num_detections':
'areas': tf.gather(data['groundtruth_area'], inds), tf.shape(data['groundtruth_boxes'])[0],
'image_info':
info,
'boxes':
bbox_ops.denormalize_boxes(
data['groundtruth_boxes'],
tf.cast([data['height'], data['width']], gt_boxes.dtype)),
'classes':
data['groundtruth_classes'],
'areas':
data['groundtruth_area'],
'is_crowds': 'is_crowds':
tf.cast(tf.gather(data['groundtruth_is_crowd'], inds), tf.int32), tf.cast(tf.gather(data['groundtruth_is_crowd'], inds), tf.int32),
} }
......
...@@ -225,7 +225,7 @@ LARGECSP53 = { ...@@ -225,7 +225,7 @@ LARGECSP53 = {
False False
], ],
[ [
'DarkRes', 'csp', 1, True, 64, None, None, None, None, 'mish', -1, 'DarkRes', 'csp', 1, False, 64, None, None, None, None, 'mish', -1,
1, 1, False 1, 1, False
], ],
[ [
......
...@@ -50,8 +50,18 @@ YOLO_MODELS = { ...@@ -50,8 +50,18 @@ YOLO_MODELS = {
max_level_process_len=None, max_level_process_len=None,
csp_stack=7, csp_stack=7,
fpn_depth=7, fpn_depth=7,
max_fpn_depth=5,
max_csp_stack=5,
path_process_len=8, path_process_len=8,
fpn_filter_scale=2), fpn_filter_scale=1),
csp_xlarge=dict(
embed_spp=False,
use_fpn=True,
max_level_process_len=None,
csp_stack=7,
fpn_depth=7,
path_process_len=8,
fpn_filter_scale=1),
), ),
'v3': 'v3':
dict( dict(
...@@ -87,6 +97,8 @@ class YoloFPN(tf.keras.layers.Layer): ...@@ -87,6 +97,8 @@ class YoloFPN(tf.keras.layers.Layer):
def __init__(self, def __init__(self,
fpn_depth=4, fpn_depth=4,
max_fpn_depth=None,
max_csp_stack=None,
use_spatial_attention=False, use_spatial_attention=False,
csp_stack=False, csp_stack=False,
activation='leaky', activation='leaky',
...@@ -104,6 +116,10 @@ class YoloFPN(tf.keras.layers.Layer): ...@@ -104,6 +116,10 @@ class YoloFPN(tf.keras.layers.Layer):
Args: Args:
fpn_depth: `int`, number of layers to use in each FPN path fpn_depth: `int`, number of layers to use in each FPN path
if you choose to use an FPN. if you choose to use an FPN.
max_fpn_depth: `int`, number of layers to use in each FPN path
if you choose to use an FPN along the largest FPN level.
max_csp_stack: `int`, number of layers to use for CSP on the largest_path
only.
use_spatial_attention: `bool`, use the spatial attention module. use_spatial_attention: `bool`, use the spatial attention module.
csp_stack: `bool`, CSPize the FPN. csp_stack: `bool`, CSPize the FPN.
activation: `str`, the activation function to use typically leaky or mish. activation: `str`, the activation function to use typically leaky or mish.
...@@ -121,6 +137,7 @@ class YoloFPN(tf.keras.layers.Layer): ...@@ -121,6 +137,7 @@ class YoloFPN(tf.keras.layers.Layer):
super().__init__(**kwargs) super().__init__(**kwargs)
self._fpn_depth = fpn_depth self._fpn_depth = fpn_depth
self._max_fpn_depth = max_fpn_depth or self._fpn_depth
self._activation = activation self._activation = activation
self._use_sync_bn = use_sync_bn self._use_sync_bn = use_sync_bn
...@@ -133,6 +150,7 @@ class YoloFPN(tf.keras.layers.Layer): ...@@ -133,6 +150,7 @@ class YoloFPN(tf.keras.layers.Layer):
self._use_spatial_attention = use_spatial_attention self._use_spatial_attention = use_spatial_attention
self._filter_scale = fpn_filter_scale self._filter_scale = fpn_filter_scale
self._csp_stack = csp_stack self._csp_stack = csp_stack
self._max_csp_stack = max_csp_stack or min(self._max_fpn_depth, csp_stack)
self._base_config = dict( self._base_config = dict(
activation=self._activation, activation=self._activation,
...@@ -184,6 +202,7 @@ class YoloFPN(tf.keras.layers.Layer): ...@@ -184,6 +202,7 @@ class YoloFPN(tf.keras.layers.Layer):
for level, depth in zip( for level, depth in zip(
reversed(range(self._min_level, self._max_level + 1)), self._depths): reversed(range(self._min_level, self._max_level + 1)), self._depths):
if level == self._min_level: if level == self._min_level:
self.resamples[str(level)] = nn_blocks.PathAggregationBlock( self.resamples[str(level)] = nn_blocks.PathAggregationBlock(
filters=depth // 2, filters=depth // 2,
...@@ -211,10 +230,10 @@ class YoloFPN(tf.keras.layers.Layer): ...@@ -211,10 +230,10 @@ class YoloFPN(tf.keras.layers.Layer):
else: else:
self.preprocessors[str(level)] = nn_blocks.DarkRouteProcess( self.preprocessors[str(level)] = nn_blocks.DarkRouteProcess(
filters=depth, filters=depth,
repetitions=self._fpn_depth + 1 * int(self._csp_stack == 0), repetitions=self._max_fpn_depth + 1 * int(self._csp_stack == 0),
insert_spp=True, insert_spp=True,
block_invert=False, block_invert=False,
csp_stack=self._csp_stack, csp_stack=min(self._csp_stack, self._max_fpn_depth),
**self._base_config) **self._base_config)
def call(self, inputs): def call(self, inputs):
...@@ -349,13 +368,16 @@ class YoloPAN(tf.keras.layers.Layer): ...@@ -349,13 +368,16 @@ class YoloPAN(tf.keras.layers.Layer):
downsample = False downsample = False
upsample = True upsample = True
if self._csp_stack == 0:
proc_filters = lambda x: x
resample_filters = lambda x: x // 2
else:
proc_filters = lambda x: x * 2
resample_filters = lambda x: x
for level, depth in zip(self._iterator, self._depths): for level, depth in zip(self._iterator, self._depths):
if level > 5:
proc_filters = lambda x: x * 2
resample_filters = lambda x: x
elif self._csp_stack == 0:
proc_filters = lambda x: x
resample_filters = lambda x: x // 2
else:
proc_filters = lambda x: x * 2
resample_filters = lambda x: x
if level == self._input: if level == self._input:
self.preprocessors[str(level)] = nn_blocks.DarkRouteProcess( self.preprocessors[str(level)] = nn_blocks.DarkRouteProcess(
filters=proc_filters(depth), filters=proc_filters(depth),
...@@ -396,7 +418,7 @@ class YoloPAN(tf.keras.layers.Layer): ...@@ -396,7 +418,7 @@ class YoloPAN(tf.keras.layers.Layer):
depths = [] depths = []
if len(inputs.keys()) > 3 or self._fpn_filter_scale > 1: if len(inputs.keys()) > 3 or self._fpn_filter_scale > 1:
for i in range(self._min_level, self._max_level + 1): for i in range(self._min_level, self._max_level + 1):
depths.append(inputs[str(i)][-1] * 2) depths.append(inputs[str(i)][-1])
else: else:
for _ in range(self._min_level, self._max_level + 1): for _ in range(self._min_level, self._max_level + 1):
depths.append(minimum_depth) depths.append(minimum_depth)
...@@ -429,6 +451,8 @@ class YoloDecoder(tf.keras.Model): ...@@ -429,6 +451,8 @@ class YoloDecoder(tf.keras.Model):
use_spatial_attention=False, use_spatial_attention=False,
csp_stack=False, csp_stack=False,
fpn_depth=4, fpn_depth=4,
max_fpn_depth=None,
max_csp_stack=None,
fpn_filter_scale=1, fpn_filter_scale=1,
path_process_len=6, path_process_len=6,
max_level_process_len=None, max_level_process_len=None,
...@@ -455,6 +479,8 @@ class YoloDecoder(tf.keras.Model): ...@@ -455,6 +479,8 @@ class YoloDecoder(tf.keras.Model):
csp_stack: `bool`, CSPize the FPN. csp_stack: `bool`, CSPize the FPN.
fpn_depth: `int`, number of layers ot use in each FPN path if you choose fpn_depth: `int`, number of layers ot use in each FPN path if you choose
to use an FPN. to use an FPN.
max_fpn_depth: `int`, maximum fpn depth.
max_csp_stack: `int`, maximum csp stack.
fpn_filter_scale: `int`, scaling factor for the FPN filters. fpn_filter_scale: `int`, scaling factor for the FPN filters.
path_process_len: `int`, number of layers ot use in each Decoder path. path_process_len: `int`, number of layers ot use in each Decoder path.
max_level_process_len: `int`, number of layers ot use in the largest max_level_process_len: `int`, number of layers ot use in the largest
...@@ -475,6 +501,8 @@ class YoloDecoder(tf.keras.Model): ...@@ -475,6 +501,8 @@ class YoloDecoder(tf.keras.Model):
self._input_specs = input_specs self._input_specs = input_specs
self._use_fpn = use_fpn self._use_fpn = use_fpn
self._fpn_depth = fpn_depth self._fpn_depth = fpn_depth
self._max_fpn_depth = max_fpn_depth
self._max_csp_stack = max_csp_stack
self._path_process_len = path_process_len self._path_process_len = path_process_len
self._max_level_process_len = max_level_process_len self._max_level_process_len = max_level_process_len
self._embed_spp = embed_spp self._embed_spp = embed_spp
...@@ -514,8 +542,10 @@ class YoloDecoder(tf.keras.Model): ...@@ -514,8 +542,10 @@ class YoloDecoder(tf.keras.Model):
} }
if self._use_fpn: if self._use_fpn:
inter_outs = YoloFPN( inter_outs = YoloFPN(
fpn_depth=self._fpn_depth, **self._base_config)( fpn_depth=self._fpn_depth,
inputs) max_fpn_depth=self._max_fpn_depth,
max_csp_stack=self._max_csp_stack,
**self._base_config)(inputs)
outputs = YoloPAN(**self._decoder_config)(inter_outs) outputs = YoloPAN(**self._decoder_config)(inter_outs)
else: else:
inter_outs = None inter_outs = None
......
...@@ -179,7 +179,7 @@ class Mosaic: ...@@ -179,7 +179,7 @@ class Mosaic:
infos, infos,
area_thresh=self._area_thresh, area_thresh=self._area_thresh,
shuffle_boxes=False, shuffle_boxes=False,
augment=True, filter_and_clip_boxes=True,
seed=self._seed) seed=self._seed)
classes, is_crowd, area = self._select_ind(inds, classes, is_crowd, area) # pylint:disable=unbalanced-tuple-unpacking classes, is_crowd, area = self._select_ind(inds, classes, is_crowd, area) # pylint:disable=unbalanced-tuple-unpacking
return image, boxes, classes, is_crowd, area, crop_points return image, boxes, classes, is_crowd, area, crop_points
......
...@@ -482,11 +482,15 @@ def resize_and_jitter_image(image, ...@@ -482,11 +482,15 @@ def resize_and_jitter_image(image,
image_ = tf.pad( image_ = tf.pad(
cropped_image, [[pad[0], pad[2]], [pad[1], pad[3]], [0, 0]], cropped_image, [[pad[0], pad[2]], [pad[1], pad[3]], [0, 0]],
constant_values=PAD_VALUE) constant_values=PAD_VALUE)
# Pad and scale info
isize = tf.cast(tf.shape(image_)[:2], dtype=tf.float32)
osize = tf.cast((desired_size[0], desired_size[1]), dtype=tf.float32)
pad_info = tf.stack([ pad_info = tf.stack([
tf.cast(tf.shape(cropped_image)[:2], tf.float32), tf.cast(tf.shape(cropped_image)[:2], tf.float32),
tf.cast(tf.shape(image_)[:2], dtype=tf.float32), osize,
tf.ones_like(original_dims, dtype=tf.float32), osize/isize,
(-tf.cast(pad[:2], tf.float32)) (-tf.cast(pad[:2], tf.float32)*osize/isize)
]) ])
infos.append(pad_info) infos.append(pad_info)
...@@ -761,7 +765,9 @@ def boxes_candidates(clipped_boxes, ...@@ -761,7 +765,9 @@ def boxes_candidates(clipped_boxes,
Returns: Returns:
indices[:, 0]: A `Tensor` representing valid boxes after filtering. indices[:, 0]: A `Tensor` representing valid boxes after filtering.
""" """
if area_thr == 0.0:
wh_thr = 0
ar_thr = np.inf
area_thr = tf.math.abs(area_thr) area_thr = tf.math.abs(area_thr)
# Get the scaled and shifted heights of the original # Get the scaled and shifted heights of the original
...@@ -778,8 +784,8 @@ def boxes_candidates(clipped_boxes, ...@@ -778,8 +784,8 @@ def boxes_candidates(clipped_boxes,
clipped_height / (clipped_width + 1e-16)) clipped_height / (clipped_width + 1e-16))
# Ensure the clipped width adn height are larger than a preset threshold. # Ensure the clipped width adn height are larger than a preset threshold.
conda = clipped_width > wh_thr conda = clipped_width >= wh_thr
condb = clipped_height > wh_thr condb = clipped_height >= wh_thr
# Ensure the area of the clipped box is larger than the area threshold. # Ensure the area of the clipped box is larger than the area threshold.
area = (clipped_height * clipped_width) / (og_width * og_height + 1e-16) area = (clipped_height * clipped_width) / (og_width * og_height + 1e-16)
...@@ -837,7 +843,7 @@ def transform_and_clip_boxes(boxes, ...@@ -837,7 +843,7 @@ def transform_and_clip_boxes(boxes,
shuffle_boxes=False, shuffle_boxes=False,
area_thresh=0.1, area_thresh=0.1,
seed=None, seed=None,
augment=True): filter_and_clip_boxes=True):
"""Clips and cleans the boxes. """Clips and cleans the boxes.
Args: Args:
...@@ -847,7 +853,8 @@ def transform_and_clip_boxes(boxes, ...@@ -847,7 +853,8 @@ def transform_and_clip_boxes(boxes,
shuffle_boxes: A `bool` for shuffling the boxes. shuffle_boxes: A `bool` for shuffling the boxes.
area_thresh: An `int` for the area threshold. area_thresh: An `int` for the area threshold.
seed: seed for random number generation. seed: seed for random number generation.
augment: A `bool` for clipping the boxes to [0, 1]. filter_and_clip_boxes: A `bool` for filtering and clipping the boxes to
[0, 1].
Returns: Returns:
boxes: A `Tensor` representing the augmented boxes. boxes: A `Tensor` representing the augmented boxes.
...@@ -868,8 +875,8 @@ def transform_and_clip_boxes(boxes, ...@@ -868,8 +875,8 @@ def transform_and_clip_boxes(boxes,
# Make sure all boxes are valid to start, clip to [0, 1] and get only the # Make sure all boxes are valid to start, clip to [0, 1] and get only the
# valid boxes. # valid boxes.
output_size = tf.cast([640, 640], tf.float32) output_size = None
if augment: if filter_and_clip_boxes:
boxes = tf.math.maximum(tf.math.minimum(boxes, 1.0), 0.0) boxes = tf.math.maximum(tf.math.minimum(boxes, 1.0), 0.0)
cond = get_valid_boxes(boxes) cond = get_valid_boxes(boxes)
...@@ -918,16 +925,18 @@ def transform_and_clip_boxes(boxes, ...@@ -918,16 +925,18 @@ def transform_and_clip_boxes(boxes,
boxes *= tf.cast(tf.expand_dims(cond, axis=-1), boxes.dtype) boxes *= tf.cast(tf.expand_dims(cond, axis=-1), boxes.dtype)
# Threshold the existing boxes. # Threshold the existing boxes.
if augment: if filter_and_clip_boxes:
boxes_ = bbox_ops.denormalize_boxes(boxes, output_size) if output_size is not None:
box_history_ = bbox_ops.denormalize_boxes(box_history, output_size) boxes_ = bbox_ops.denormalize_boxes(boxes, output_size)
inds = boxes_candidates(boxes_, box_history_, area_thr=area_thresh) box_history_ = bbox_ops.denormalize_boxes(box_history, output_size)
inds = boxes_candidates(boxes_, box_history_, area_thr=area_thresh)
else:
inds = boxes_candidates(
boxes, box_history, wh_thr=0.0, area_thr=area_thresh)
# Select and gather the good boxes. # Select and gather the good boxes.
if shuffle_boxes: if shuffle_boxes:
inds = tf.random.shuffle(inds, seed=seed) inds = tf.random.shuffle(inds, seed=seed)
else: else:
boxes = box_history inds = bbox_ops.get_non_empty_box_indices(boxes)
boxes_ = bbox_ops.denormalize_boxes(boxes, output_size)
inds = bbox_ops.get_non_empty_box_indices(boxes_)
boxes = tf.gather(boxes, inds) boxes = tf.gather(boxes, inds)
return boxes, inds return boxes, inds
...@@ -302,6 +302,7 @@ class SGDTorch(tf.keras.optimizers.Optimizer): ...@@ -302,6 +302,7 @@ class SGDTorch(tf.keras.optimizers.Optimizer):
"decay": self._initial_decay, "decay": self._initial_decay,
"momentum": self._serialize_hyperparameter("momentum"), "momentum": self._serialize_hyperparameter("momentum"),
"momentum_start": self._serialize_hyperparameter("momentum_start"), "momentum_start": self._serialize_hyperparameter("momentum_start"),
"weight_decay": self._serialize_hyperparameter("weight_decay"),
"warmup_steps": self._serialize_hyperparameter("warmup_steps"), "warmup_steps": self._serialize_hyperparameter("warmup_steps"),
"nesterov": self.nesterov, "nesterov": self.nesterov,
}) })
......
...@@ -255,16 +255,22 @@ class YoloTask(base_task.Task): ...@@ -255,16 +255,22 @@ class YoloTask(base_task.Task):
logs.update({m.name: m.result()}) logs.update({m.name: m.result()})
return logs return logs
def _reorg_boxes(self, boxes, num_detections, image): def _reorg_boxes(self, boxes, info, num_detections):
"""Scale and Clean boxes prior to Evaluation.""" """Scale and Clean boxes prior to Evaluation."""
# Build a prediciton mask to take only the number of detections
mask = tf.sequence_mask(num_detections, maxlen=tf.shape(boxes)[1]) mask = tf.sequence_mask(num_detections, maxlen=tf.shape(boxes)[1])
mask = tf.cast(tf.expand_dims(mask, axis=-1), boxes.dtype) mask = tf.cast(tf.expand_dims(mask, axis=-1), boxes.dtype)
# Denormalize the boxes by the shape of the image # Denormalize the boxes by the shape of the image
inshape = tf.cast(preprocessing_ops.get_image_shape(image), boxes.dtype) inshape = tf.expand_dims(info[:, 1, :], axis=1)
ogshape = tf.expand_dims(info[:, 0, :], axis=1)
scale = tf.expand_dims(info[:, 2, :], axis=1)
offset = tf.expand_dims(info[:, 3, :], axis=1)
boxes = box_ops.denormalize_boxes(boxes, inshape) boxes = box_ops.denormalize_boxes(boxes, inshape)
boxes = box_ops.clip_boxes(boxes, inshape)
boxes += tf.tile(offset, [1, 1, 2])
boxes /= tf.tile(scale, [1, 1, 2])
boxes = box_ops.clip_boxes(boxes, ogshape)
# Mask the boxes for usage # Mask the boxes for usage
boxes *= mask boxes *= mask
...@@ -292,10 +298,8 @@ class YoloTask(base_task.Task): ...@@ -292,10 +298,8 @@ class YoloTask(base_task.Task):
logs = {self.loss: metric_loss} logs = {self.loss: metric_loss}
# Reorganize and rescale the boxes # Reorganize and rescale the boxes
boxes = self._reorg_boxes(y_pred['bbox'], y_pred['num_detections'], image) info = label['groundtruths']['image_info']
label['groundtruths']['boxes'] = self._reorg_boxes( boxes = self._reorg_boxes(y_pred['bbox'], info, y_pred['num_detections'])
label['groundtruths']['boxes'], label['groundtruths']['num_detections'],
image)
# Build the input for the coc evaluation metric # Build the input for the coc evaluation metric
coco_model_outputs = { coco_model_outputs = {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment