Commit 9474c108 authored by Vishnu Banna's avatar Vishnu Banna
Browse files

comments addressed

parent bcd5283d
...@@ -22,8 +22,8 @@ from official.vision.beta.configs import backbones ...@@ -22,8 +22,8 @@ from official.vision.beta.configs import backbones
class Darknet(hyperparams.Config): class Darknet(hyperparams.Config):
"""DarkNet config.""" """DarkNet config."""
model_id: str = 'cspdarknet53' model_id: str = 'cspdarknet53'
width_scale: int = 1.0 width_scale: float = 1.0
depth_scale: int = 1.0 depth_scale: float = 1.0
dilate: bool = False dilate: bool = False
min_level: int = 3 min_level: int = 3
max_level: int = 5 max_level: int = 5
......
...@@ -59,10 +59,20 @@ class TfExampleDecoder(tf_example_decoder.TfExampleDecoder): ...@@ -59,10 +59,20 @@ class TfExampleDecoder(tf_example_decoder.TfExampleDecoder):
"""Tensorflow Example proto decoder.""" """Tensorflow Example proto decoder."""
def __init__(self, def __init__(self,
coco91_to_80, coco91_to_80=None,
include_mask=False, include_mask=False,
regenerate_source_id=False, regenerate_source_id=False,
mask_binarize_threshold=None): mask_binarize_threshold=None):
"""Initialize the example decoder.
Args:
coco91_to_80: `bool` indicating whether to convert coco from its 91 class
format to the 80 class format.
include_mask: `bool` indicating if the decoder should also decode instance
masks for instance segmentation.
regenerate_source_id: `bool` indicating if the source id needs to be
recreated for each image sample.
"""
if coco91_to_80 and include_mask: if coco91_to_80 and include_mask:
raise ValueError("If masks are included you cannot \ raise ValueError("If masks are included you cannot \
convert coco from the 91 class format \ convert coco from the 91 class format \
......
""" Detection Data parser and processing for YOLO. # Copyright 2021 The TensorFlow Authors. All Rights Reserved.
Parse image and ground truths in a dataset to training targets and package them #
into (image, labels) tuple for RetinaNet. # Licensed under the Apache License, Version 2.0 (the "License");
""" # you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Detection Data parser and processing for YOLO."""
import tensorflow as tf import tensorflow as tf
import numpy as np import numpy as np
from official.vision.beta.projects.yolo.ops import preprocessing_ops from official.vision.beta.projects.yolo.ops import preprocessing_ops
...@@ -19,7 +30,7 @@ class Parser(parser.Parser): ...@@ -19,7 +30,7 @@ class Parser(parser.Parser):
output_size, output_size,
anchors, anchors,
expanded_strides, expanded_strides,
level_limit=None, level_limits=None,
max_num_instances=200, max_num_instances=200,
area_thresh=0.1, area_thresh=0.1,
aug_rand_hue=1.0, aug_rand_hue=1.0,
...@@ -48,11 +59,13 @@ class Parser(parser.Parser): ...@@ -48,11 +59,13 @@ class Parser(parser.Parser):
output_size should be divided by the largest feature stride 2^max_level. output_size should be divided by the largest feature stride 2^max_level.
anchors: `Dict[List[Union[int, float]]]` values for each anchor box. anchors: `Dict[List[Union[int, float]]]` values for each anchor box.
expanded_strides: `Dict[int]` for how much the model scales down the expanded_strides: `Dict[int]` for how much the model scales down the
images at the largest level. images at the largest level. For example, level 3 down samples the image
level_limit: `List` the box sizes that will be allowed at each FPN by a factor of 16, in the expanded strides dictionary, we will pass
along {3: 16} indicating that relative to the original image, the
shapes must be reduced by a factor of 16 to compute the loss.
level_limits: `List` the box sizes that will be allowed at each FPN
level as is done in the FCOS and YOLOX paper for anchor free box level as is done in the FCOS and YOLOX paper for anchor free box
assignment. Anchor free will perform worse than Anchor based, but only assignment.
slightly.
max_num_instances: `int` for the number of boxes to compute loss on. max_num_instances: `int` for the number of boxes to compute loss on.
area_thresh: `float` for the minimum area of a box to allow to pass area_thresh: `float` for the minimum area of a box to allow to pass
through for optimization. through for optimization.
...@@ -108,20 +121,9 @@ class Parser(parser.Parser): ...@@ -108,20 +121,9 @@ class Parser(parser.Parser):
assert output_size[1] % expanded_strides[str(key)] == 0 assert output_size[1] % expanded_strides[str(key)] == 0
assert output_size[0] % expanded_strides[str(key)] == 0 assert output_size[0] % expanded_strides[str(key)] == 0
# scale of each FPN level
self._strides = expanded_strides
# Set the width and height properly and base init: # Set the width and height properly and base init:
self._image_w = output_size[1] self._image_w = output_size[1]
self._image_h = output_size[0] self._image_h = output_size[0]
# Set the anchor boxes for each scale
self._anchors = anchors
self._level_limit = level_limit
# anchor labeling paramters
self._use_tie_breaker = use_tie_breaker
self._best_match_only = best_match_only
self._max_num_instances = max_num_instances self._max_num_instances = max_num_instances
# Image scaling params # Image scaling params
...@@ -143,33 +145,23 @@ class Parser(parser.Parser): ...@@ -143,33 +145,23 @@ class Parser(parser.Parser):
self._aug_rand_hue = aug_rand_hue self._aug_rand_hue = aug_rand_hue
# Set the per level values needed for operation # Set the per level values needed for operation
self._scale_xy = scale_xy
self._anchor_t = anchor_t
self._darknet = darknet self._darknet = darknet
self._area_thresh = area_thresh self._area_thresh = area_thresh
keys = list(self._anchors.keys())
if self._level_limit is not None:
maxim = 2000
self._scale_up = {key: maxim // self._max_num_instances for key in keys}
self._anchor_t = -0.01
elif not self._darknet:
self._scale_up = {key: 6 - i for i, key in enumerate(keys)}
else:
self._scale_up = {key: 1 for key in keys}
self._seed = seed self._seed = seed
# Set the data type based on input string
self._dtype = dtype self._dtype = dtype
self._label_builder = anchor.YoloAnchorLabeler( self._label_builder = anchor.YoloAnchorLabeler(
anchors = self._anchors, anchors = anchors,
match_threshold=self._anchor_t, anchor_free_level_limits = level_limits,
best_matches_only=self._best_match_only, level_strides=expanded_strides,
use_tie_breaker=self._use_tie_breaker center_radius=scale_xy,
) max_num_instances=max_num_instances,
match_threshold=anchor_t,
best_matches_only=best_match_only,
use_tie_breaker=use_tie_breaker,
darknet=darknet,
dtype=dtype)
def _pad_infos_object(self, image): def _pad_infos_object(self, image):
"""Get a Tensor to pad the info object list.""" """Get a Tensor to pad the info object list."""
...@@ -307,57 +299,22 @@ class Parser(parser.Parser): ...@@ -307,57 +299,22 @@ class Parser(parser.Parser):
is_training=False) is_training=False)
return image, labels return image, labels
def set_shape(self, values, pad_axis=0, pad_value=0, inds=None, scale=1): def set_shape(self, values, pad_axis=0, pad_value=0, inds=None):
"""Calls set shape for all input objects.""" """Calls set shape for all input objects."""
if inds is not None: if inds is not None:
values = tf.gather(values, inds) values = tf.gather(values, inds)
vshape = values.get_shape().as_list() vshape = values.get_shape().as_list()
if pad_value is not None: values = preprocessing_ops.pad_max_instances(
values = preprocessing_ops.pad_max_instances(
values, values,
self._max_num_instances, self._max_num_instances,
pad_axis=pad_axis, pad_axis=pad_axis,
pad_value=pad_value) pad_value=pad_value)
vshape[pad_axis] = self._max_num_instances * scale vshape[pad_axis] = self._max_num_instances
values.set_shape(vshape) values.set_shape(vshape)
return values return values
def _build_grid(self, boxes, classes, width, height):
"""Private function for building the full scale object and class grid."""
indexes = {}
updates = {}
true_grids = {}
if self._level_limit is not None:
self._level_limit = [0.0] + self._level_limit + [np.inf]
# for each prediction path generate a properly scaled output prediction map
for i, key in enumerate(self._anchors.keys()):
if self._level_limit is not None:
fpn_limits = self._level_limit[i:i + 2]
else:
fpn_limits = None
scale_xy = self._scale_xy[key] if not self._darknet else 1
indexes[key], updates[key], true_grids[key] = self._label_builder(
key, boxes, classes, self._anchors[key],
width, height, self._strides[str(key)],
scale_xy, self._max_num_instances * self._scale_up[key],
fpn_limits = fpn_limits)
# set/fix the shapes
indexes[key] = self.set_shape(indexes[key], -2, None, None,
self._scale_up[key])
updates[key] = self.set_shape(updates[key], -2, None, None,
self._scale_up[key])
# add all the values to the final dictionary
updates[key] = tf.cast(updates[key], dtype=self._dtype)
return indexes, updates, true_grids
def _build_label(self, def _build_label(self,
image, image,
gt_boxes, gt_boxes,
...@@ -376,16 +333,15 @@ class Parser(parser.Parser): ...@@ -376,16 +333,15 @@ class Parser(parser.Parser):
image.set_shape(imshape) image.set_shape(imshape)
labels = dict() labels = dict()
labels['inds'], labels['upds'], labels['true_conf'] = self._build_grid( (labels['inds'],
gt_boxes, gt_classes, width, height) labels['upds'], labels['true_conf']) = self._label_builder(gt_boxes,
gt_classes,
width,
height)
# Set/fix the boxes shape. # Set/fix the boxes shape.
boxes = self.set_shape(gt_boxes, pad_axis=0, pad_value=0) boxes = self.set_shape(gt_boxes, pad_axis=0, pad_value=0)
classes = self.set_shape(gt_classes, pad_axis=0, pad_value=-1) classes = self.set_shape(gt_classes, pad_axis=0, pad_value=-1)
area = self.set_shape(
data['groundtruth_area'], pad_axis=0, pad_value=0, inds=inds)
is_crowd = self.set_shape(
data['groundtruth_is_crowd'], pad_axis=0, pad_value=0, inds=inds)
# Build the dictionary set. # Build the dictionary set.
labels.update({ labels.update({
...@@ -396,6 +352,7 @@ class Parser(parser.Parser): ...@@ -396,6 +352,7 @@ class Parser(parser.Parser):
# Update the labels dictionary. # Update the labels dictionary.
if not is_training: if not is_training:
# Sets up groundtruth data for evaluation. # Sets up groundtruth data for evaluation.
groundtruths = { groundtruths = {
'source_id': labels['source_id'], 'source_id': labels['source_id'],
...@@ -405,8 +362,9 @@ class Parser(parser.Parser): ...@@ -405,8 +362,9 @@ class Parser(parser.Parser):
'image_info': info, 'image_info': info,
'boxes': gt_boxes, 'boxes': gt_boxes,
'classes': gt_classes, 'classes': gt_classes,
'areas': area, 'areas': tf.gather(data['groundtruth_area'], inds),
'is_crowds': tf.cast(is_crowd, tf.int32), 'is_crowds': tf.cast(
tf.gather(data['groundtruth_is_crowd'], inds), tf.int32),
} }
groundtruths['source_id'] = utils.process_source_id( groundtruths['source_id'] = utils.process_source_id(
groundtruths['source_id']) groundtruths['source_id'])
......
...@@ -14,13 +14,12 @@ ...@@ -14,13 +14,12 @@
"""Yolo Loss function.""" """Yolo Loss function."""
import abc import abc
import collections
import functools import functools
import collections
import tensorflow as tf import tensorflow as tf
from official.vision.beta.projects.yolo.ops import box_ops
from official.vision.beta.projects.yolo.ops import loss_utils from official.vision.beta.projects.yolo.ops import loss_utils
from official.vision.beta.projects.yolo.ops import box_ops
from official.vision.beta.projects.yolo.ops import math_ops from official.vision.beta.projects.yolo.ops import math_ops
...@@ -33,7 +32,6 @@ class YoloLossBase(object, metaclass=abc.ABCMeta): ...@@ -33,7 +32,6 @@ class YoloLossBase(object, metaclass=abc.ABCMeta):
def __init__(self, def __init__(self,
classes, classes,
mask,
anchors, anchors,
path_stride=1, path_stride=1,
ignore_thresh=0.7, ignore_thresh=0.7,
...@@ -52,8 +50,6 @@ class YoloLossBase(object, metaclass=abc.ABCMeta): ...@@ -52,8 +50,6 @@ class YoloLossBase(object, metaclass=abc.ABCMeta):
Args: Args:
classes: `int` for the number of classes classes: `int` for the number of classes
mask: `List[int]` for the output level that this specific model output
level
anchors: `List[List[int]]` for the anchor boxes that are used in the model anchors: `List[List[int]]` for the anchor boxes that are used in the model
at all levels. For anchor free prediction set the anchor list to be the at all levels. For anchor free prediction set the anchor list to be the
same as the image resolution. same as the image resolution.
...@@ -86,10 +82,9 @@ class YoloLossBase(object, metaclass=abc.ABCMeta): ...@@ -86,10 +82,9 @@ class YoloLossBase(object, metaclass=abc.ABCMeta):
""" """
self._loss_type = loss_type self._loss_type = loss_type
self._classes = classes self._classes = classes
self._num = tf.cast(len(mask), dtype=tf.int32) self._num = tf.cast(len(anchors), dtype=tf.int32)
self._truth_thresh = truth_thresh self._truth_thresh = truth_thresh
self._ignore_thresh = ignore_thresh self._ignore_thresh = ignore_thresh
self._masks = mask
self._anchors = anchors self._anchors = anchors
self._iou_normalizer = iou_normalizer self._iou_normalizer = iou_normalizer
...@@ -112,7 +107,7 @@ class YoloLossBase(object, metaclass=abc.ABCMeta): ...@@ -112,7 +107,7 @@ class YoloLossBase(object, metaclass=abc.ABCMeta):
self._decode_boxes = functools.partial( self._decode_boxes = functools.partial(
loss_utils.get_predicted_box, **box_kwargs) loss_utils.get_predicted_box, **box_kwargs)
self._search_pairs = None self._search_pairs = lambda *args: (None, None, None, None)
self._build_per_path_attributes() self._build_per_path_attributes()
def box_loss(self, true_box, pred_box, darknet=False): def box_loss(self, true_box, pred_box, darknet=False):
...@@ -136,13 +131,18 @@ class YoloLossBase(object, metaclass=abc.ABCMeta): ...@@ -136,13 +131,18 @@ class YoloLossBase(object, metaclass=abc.ABCMeta):
scale=None): scale=None):
"""Search of all groundtruths to associate groundtruths to predictions.""" """Search of all groundtruths to associate groundtruths to predictions."""
if self._search_pairs is None: boxes = box_ops.yxyx_to_xcycwh(boxes)
return true_conf, tf.ones_like(true_conf)
if scale is not None:
boxes = boxes * tf.cast(tf.stop_gradient(scale), boxes.dtype)
# Search all predictions against ground truths to find mathcing boxes for # Search all predictions against ground truths to find mathcing boxes for
# each pixel. # each pixel.
_, _, iou_max, _ = self._search_pairs( _, _, iou_max, _ = self._search_pairs(pred_boxes, pred_classes,
pred_boxes, pred_classes, boxes, classes, scale=scale, yxyx=True) boxes, classes)
if iou_max is None:
return true_conf, tf.ones_like(true_conf)
# Find the exact indexes to ignore and keep. # Find the exact indexes to ignore and keep.
ignore_mask = tf.cast(iou_max < self._ignore_thresh, pred_boxes.dtype) ignore_mask = tf.cast(iou_max < self._ignore_thresh, pred_boxes.dtype)
...@@ -196,7 +196,7 @@ class YoloLossBase(object, metaclass=abc.ABCMeta): ...@@ -196,7 +196,7 @@ class YoloLossBase(object, metaclass=abc.ABCMeta):
predictions. predictions.
""" """
(loss, box_loss, conf_loss, class_loss, mean_loss, iou, pred_conf, ind_mask, (loss, box_loss, conf_loss, class_loss, mean_loss, iou, pred_conf, ind_mask,
grid_mask) = self._compute_loss(true_counts, inds, y_true, boxes, classes, grid_mask) = self._compute_loss(true_counts, inds, y_true, boxes, classes,
y_pred) y_pred)
# Metric compute using done here to save time and resources. # Metric compute using done here to save time and resources.
...@@ -219,7 +219,8 @@ class YoloLossBase(object, metaclass=abc.ABCMeta): ...@@ -219,7 +219,8 @@ class YoloLossBase(object, metaclass=abc.ABCMeta):
"""The actual logic to apply to the raw model for optimization.""" """The actual logic to apply to the raw model for optimization."""
... ...
def post_path_aggregation(self, loss, ground_truths, predictions): # pylint:disable=unused-argument def post_path_aggregation(self,
loss, box_loss, conf_loss, class_loss, ground_truths, predictions): # pylint:disable=unused-argument
"""This method allows for post processing of a loss value. """This method allows for post processing of a loss value.
After the loss has been aggregated across all the FPN levels some post After the loss has been aggregated across all the FPN levels some post
...@@ -277,7 +278,6 @@ class DarknetLoss(YoloLossBase): ...@@ -277,7 +278,6 @@ class DarknetLoss(YoloLossBase):
association. association.
""" """
self._anchor_generator = loss_utils.GridGenerator( self._anchor_generator = loss_utils.GridGenerator(
masks=self._masks,
anchors=self._anchors, anchors=self._anchors,
scale_anchors=self._path_stride) scale_anchors=self._path_stride)
...@@ -428,14 +428,13 @@ class ScaledLoss(YoloLossBase): ...@@ -428,14 +428,13 @@ class ScaledLoss(YoloLossBase):
association. association.
""" """
self._anchor_generator = loss_utils.GridGenerator( self._anchor_generator = loss_utils.GridGenerator(
masks=self._masks,
anchors=self._anchors, anchors=self._anchors,
scale_anchors=self._path_stride) scale_anchors=self._path_stride)
if self._ignore_thresh > 0.0: if self._ignore_thresh > 0.0:
self._search_pairs = loss_utils.PairWiseSearch( self._search_pairs = loss_utils.PairWiseSearch(
iou_type=self._loss_type, any_match=False, min_conf=0.25) iou_type=self._loss_type, any_match=False, min_conf=0.25)
self._cls_normalizer = self._cls_normalizer * self._classes/80 self._cls_normalizer = self._cls_normalizer * self._classes/80
return return
...@@ -550,7 +549,8 @@ class ScaledLoss(YoloLossBase): ...@@ -550,7 +549,8 @@ class ScaledLoss(YoloLossBase):
return (loss, box_loss, conf_loss, class_loss, mean_loss, iou, pred_conf, return (loss, box_loss, conf_loss, class_loss, mean_loss, iou, pred_conf,
ind_mask, grid_mask) ind_mask, grid_mask)
def post_path_aggregation(self, loss, ground_truths, predictions): def post_path_aggregation(self,
loss, box_loss, conf_loss, class_loss, ground_truths, predictions):
"""This method allows for post processing of a loss value. """This method allows for post processing of a loss value.
By default the model will have about 3 FPN levels {3, 4, 5}, on By default the model will have about 3 FPN levels {3, 4, 5}, on
...@@ -559,19 +559,12 @@ class ScaledLoss(YoloLossBase): ...@@ -559,19 +559,12 @@ class ScaledLoss(YoloLossBase):
magintude as the model with 3 FPN levels. This helps to prevent gradient magintude as the model with 3 FPN levels. This helps to prevent gradient
explosions. explosions.
Args:
loss: `tf.float` scalar for the actual loss.
ground_truths: `Dict` holding all the ground truth tensors.
predictions: `Dict` holding all the predicted values.
Returns:
loss: `tf.float` scalar for the scaled loss.
""" """
scale = tf.stop_gradient(3 / len(list(predictions.keys()))) scale = tf.stop_gradient(3 / len(list(predictions.keys())))
return loss * scale return loss * scale
def cross_replica_aggregation(self, loss, num_replicas_in_sync): def cross_replica_aggregation(self, loss, num_replicas_in_sync):
"""In the scaled loss, take the sum of the loss across replicas.""" """this method is not specific to each loss path, but each loss type"""
return loss return loss
...@@ -582,7 +575,6 @@ class YoloLoss: ...@@ -582,7 +575,6 @@ class YoloLoss:
keys, keys,
classes, classes,
anchors, anchors,
masks=None,
path_strides=None, path_strides=None,
truth_thresholds=None, truth_thresholds=None,
ignore_thresholds=None, ignore_thresholds=None,
...@@ -606,8 +598,6 @@ class YoloLoss: ...@@ -606,8 +598,6 @@ class YoloLoss:
anchors: `List[List[int]]` for the anchor boxes that are used in the model anchors: `List[List[int]]` for the anchor boxes that are used in the model
at all levels. For anchor free prediction set the anchor list to be the at all levels. For anchor free prediction set the anchor list to be the
same as the image resolution. same as the image resolution.
masks: `List[int]` for the output level that this specific model output
level
path_strides: `Dict[int]` for how much to scale this level to get the path_strides: `Dict[int]` for how much to scale this level to get the
orginal input shape for each FPN path. orginal input shape for each FPN path.
truth_thresholds: `Dict[float]` for the IOU value over which the loss is truth_thresholds: `Dict[float]` for the IOU value over which the loss is
...@@ -649,13 +639,12 @@ class YoloLoss: ...@@ -649,13 +639,12 @@ class YoloLoss:
loss_type = 'scaled' loss_type = 'scaled'
else: else:
loss_type = 'darknet' loss_type = 'darknet'
self._loss_dict = {} self._loss_dict = {}
for key in keys: for key in keys:
self._loss_dict[key] = losses[loss_type]( self._loss_dict[key] = losses[loss_type](
classes=classes, classes=classes,
anchors=anchors, anchors=anchors[key],
mask=masks[key],
truth_thresh=truth_thresholds[key], truth_thresh=truth_thresholds[key],
ignore_thresh=ignore_thresholds[key], ignore_thresh=ignore_thresholds[key],
loss_type=loss_types[key], loss_type=loss_types[key],
...@@ -691,7 +680,7 @@ class YoloLoss: ...@@ -691,7 +680,7 @@ class YoloLoss:
# after computing the loss, scale loss as needed for aggregation # after computing the loss, scale loss as needed for aggregation
# across FPN levels # across FPN levels
loss = self._loss_dict[key].post_path_aggregation( loss = self._loss_dict[key].post_path_aggregation(
loss, ground_truth, predictions) loss, loss_box, loss_conf, loss_class, ground_truth, predictions)
# after completing the scaling of the loss on each replica, handle # after completing the scaling of the loss on each replica, handle
# scaling the loss for mergeing the loss across replicas # scaling the loss for mergeing the loss across replicas
......
...@@ -42,10 +42,9 @@ class YoloDecoderTest(parameterized.TestCase, tf.test.TestCase): ...@@ -42,10 +42,9 @@ class YoloDecoderTest(parameterized.TestCase, tf.test.TestCase):
'5': [1, 13, 13, 255] '5': [1, 13, 13, 255]
} }
classes = 80 classes = 80
masks = {'3': [0, 1, 2], '4': [3, 4, 5], '5': [6, 7, 8]} anchors = {'3': [[12.0, 19.0], [31.0, 46.0], [96.0, 54.0]],
anchors = [[12.0, 19.0], [31.0, 46.0], [96.0, 54.0], [46.0, 114.0], '4': [[46.0, 114.0],[133.0, 127.0], [79.0, 225.0]],
[133.0, 127.0], [79.0, 225.0], [301.0, 150.0], [172.0, 286.0], '5': [[301.0, 150.0], [172.0, 286.0], [348.0, 340.0]]}
[348.0, 340.0]]
keys = ['3', '4', '5'] keys = ['3', '4', '5']
path_strides = {key: 2**int(key) for key in keys} path_strides = {key: 2**int(key) for key in keys}
...@@ -53,7 +52,6 @@ class YoloDecoderTest(parameterized.TestCase, tf.test.TestCase): ...@@ -53,7 +52,6 @@ class YoloDecoderTest(parameterized.TestCase, tf.test.TestCase):
keys, keys,
classes, classes,
anchors, anchors,
masks=masks,
path_strides=path_strides, path_strides=path_strides,
truth_thresholds={key: 1.0 for key in keys}, truth_thresholds={key: 1.0 for key in keys},
ignore_thresholds={key: 0.7 for key in keys}, ignore_thresholds={key: 0.7 for key in keys},
......
...@@ -11,7 +11,6 @@ ...@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""Contains common building blocks for yolo layer (detection layer).""" """Contains common building blocks for yolo layer (detection layer)."""
import tensorflow as tf import tensorflow as tf
...@@ -26,7 +25,6 @@ class YoloLayer(tf.keras.Model): ...@@ -26,7 +25,6 @@ class YoloLayer(tf.keras.Model):
"""Yolo layer (detection generator).""" """Yolo layer (detection generator)."""
def __init__(self, def __init__(self,
masks,
anchors, anchors,
classes, classes,
iou_thresh=0.0, iou_thresh=0.0,
...@@ -52,8 +50,6 @@ class YoloLayer(tf.keras.Model): ...@@ -52,8 +50,6 @@ class YoloLayer(tf.keras.Model):
"""Parameters for the loss functions used at each detection head output. """Parameters for the loss functions used at each detection head output.
Args: Args:
masks: `List[int]` for the output level that this specific model output
level.
anchors: `List[List[int]]` for the anchor boxes that are used in the anchors: `List[List[int]]` for the anchor boxes that are used in the
model. model.
classes: `int` for the number of classes. classes: `int` for the number of classes.
...@@ -107,7 +103,6 @@ class YoloLayer(tf.keras.Model): ...@@ -107,7 +103,6 @@ class YoloLayer(tf.keras.Model):
**kwargs: Addtional keyword arguments. **kwargs: Addtional keyword arguments.
""" """
super().__init__(**kwargs) super().__init__(**kwargs)
self._masks = masks
self._anchors = anchors self._anchors = anchors
self._thresh = iou_thresh self._thresh = iou_thresh
self._ignore_thresh = ignore_thresh self._ignore_thresh = ignore_thresh
...@@ -127,30 +122,24 @@ class YoloLayer(tf.keras.Model): ...@@ -127,30 +122,24 @@ class YoloLayer(tf.keras.Model):
self._pre_nms_points = pre_nms_points self._pre_nms_points = pre_nms_points
self._label_smoothing = label_smoothing self._label_smoothing = label_smoothing
self._keys = list(masks.keys())
self._keys = list(anchors.keys())
self._len_keys = len(self._keys) self._len_keys = len(self._keys)
self._box_type = box_type self._box_type = box_type
self._path_scale = path_scale or { self._path_scale = path_scale or {key: 2**int(key) for key in self._keys}
key: 2**int(key) for key, _ in masks.items()
}
self._nms_type = nms_type self._nms_type = nms_type
self._scale_xy = scale_xy or {key: 1.0 for key, _ in masks.items()} self._scale_xy = scale_xy or {key: 1.0 for key, _ in anchors.items()}
self._generator = {} self._generator = {}
self._len_mask = {} self._len_mask = {}
for key in self._keys: for key in self._keys:
anchors = [self._anchors[mask] for mask in self._masks[key]] anchors = self._anchors[key]
self._generator[key] = self.get_generators(anchors, self._path_scale[key], # pylint: disable=assignment-from-none self._generator[key] = loss_utils.GridGenerator(
key) anchors, scale_anchors=self._path_scale[key])
self._len_mask[key] = len(self._masks[key]) self._len_mask[key] = len(anchors)
return return
def get_generators(self, anchors, path_scale, path_key):
anchor_generator = loss_utils.GridGenerator(
anchors, scale_anchors=path_scale)
return anchor_generator
def parse_prediction_path(self, key, inputs): def parse_prediction_path(self, key, inputs):
shape_ = tf.shape(inputs) shape_ = tf.shape(inputs)
shape = inputs.get_shape().as_list() shape = inputs.get_shape().as_list()
...@@ -290,7 +279,6 @@ class YoloLayer(tf.keras.Model): ...@@ -290,7 +279,6 @@ class YoloLayer(tf.keras.Model):
keys=self._keys, keys=self._keys,
classes=self._classes, classes=self._classes,
anchors=self._anchors, anchors=self._anchors,
masks=self._masks,
path_strides=self._path_scale, path_strides=self._path_scale,
truth_thresholds=self._truth_thresh, truth_thresholds=self._truth_thresh,
ignore_thresholds=self._ignore_thresh, ignore_thresholds=self._ignore_thresh,
...@@ -309,7 +297,6 @@ class YoloLayer(tf.keras.Model): ...@@ -309,7 +297,6 @@ class YoloLayer(tf.keras.Model):
def get_config(self): def get_config(self):
return { return {
'masks': dict(self._masks),
'anchors': [list(a) for a in self._anchors], 'anchors': [list(a) for a in self._anchors],
'thresh': self._thresh, 'thresh': self._thresh,
'max_boxes': self._max_boxes, 'max_boxes': self._max_boxes,
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
"""Tests for yolo detection generator.""" """Tests for yolo detection generator."""
from official.vision.beta.projects.yolo.ops import anchor
from absl.testing import parameterized from absl.testing import parameterized
import tensorflow as tf import tensorflow as tf
...@@ -35,14 +36,13 @@ class YoloDecoderTest(parameterized.TestCase, tf.test.TestCase): ...@@ -35,14 +36,13 @@ class YoloDecoderTest(parameterized.TestCase, tf.test.TestCase):
'5': [1, 13, 13, 255] '5': [1, 13, 13, 255]
} }
classes = 80 classes = 80
masks = {'3': [0, 1, 2], '4': [3, 4, 5], '5': [6, 7, 8]} anchors = {'3': [[12.0, 19.0], [31.0, 46.0], [96.0, 54.0]],
anchors = [[12.0, 19.0], [31.0, 46.0], [96.0, 54.0], [46.0, 114.0], '4': [[46.0, 114.0],[133.0, 127.0], [79.0, 225.0]],
[133.0, 127.0], [79.0, 225.0], [301.0, 150.0], [172.0, 286.0], '5': [[301.0, 150.0], [172.0, 286.0], [348.0, 340.0]]}
[348.0, 340.0]]
box_type = {key: 'scaled' for key in masks.keys()} box_type = {key: 'scaled' for key in anchors.keys()}
layer = dg.YoloLayer( layer = dg.YoloLayer(anchors, classes, box_type=box_type, max_boxes=10)
masks, anchors, classes, box_type=box_type, max_boxes=10)
inputs = {} inputs = {}
for key in input_shape: for key in input_shape:
......
...@@ -1665,7 +1665,13 @@ class DarkRouteProcess(tf.keras.layers.Layer): ...@@ -1665,7 +1665,13 @@ class DarkRouteProcess(tf.keras.layers.Layer):
class Reorg(tf.keras.layers.Layer): class Reorg(tf.keras.layers.Layer):
"""Splits a high resolution image into 4 lower resolution images.
Used in YOLOR to process very high resolution inputs efficiently.
for example an input image of [1280, 1280, 3] will become [640, 640, 12],
the images are sampled in such a way that the spatial resoltion is
retained.
"""
def call(self, x, training=None): def call(self, x, training=None):
return tf.concat([x[..., ::2, ::2, :], return tf.concat([x[..., ::2, ::2, :],
x[..., 1::2, ::2, :], x[..., 1::2, ::2, :],
......
...@@ -16,7 +16,6 @@ ...@@ -16,7 +16,6 @@
import tensorflow as tf import tensorflow as tf
# static base Yolo Models that do not require configuration # static base Yolo Models that do not require configuration
# similar to a backbone model id. # similar to a backbone model id.
......
...@@ -13,12 +13,14 @@ ...@@ -13,12 +13,14 @@
# limitations under the License. # limitations under the License.
"""Yolo Anchor labler.""" """Yolo Anchor labler."""
import numpy as np
import tensorflow as tf import tensorflow as tf
from tensorflow.python.ops.gen_math_ops import maximum, minimum
from official.vision.beta.projects.yolo.ops import box_ops from official.vision.beta.projects.yolo.ops import box_ops
from official.vision.beta.projects.yolo.ops import preprocessing_ops from official.vision.beta.projects.yolo.ops import preprocessing_ops
from official.vision.beta.projects.yolo.ops import loss_utils from official.vision.beta.projects.yolo.ops import loss_utils
INF = 10000000
def get_best_anchor(y_true, def get_best_anchor(y_true,
anchors, anchors,
...@@ -28,15 +30,22 @@ def get_best_anchor(y_true, ...@@ -28,15 +30,22 @@ def get_best_anchor(y_true,
iou_thresh=0.25, iou_thresh=0.25,
best_match_only=False, best_match_only=False,
use_tie_breaker=True): use_tie_breaker=True):
""" """Get the correct anchor that is assoiciated with each box using IOU.
get the correct anchor that is assoiciated with each box using IOU
Args: Args:
y_true: tf.Tensor[] for the list of bounding boxes in the yolo format y_true: tf.Tensor[] for the list of bounding boxes in the yolo format.
anchors: list or tensor for the anchor boxes to be used in prediction anchors: list or tensor for the anchor boxes to be used in prediction
found via Kmeans found via Kmeans.
width: int for the image width width: int for the image width.
height: int for the image height height: int for the image height.
iou_thresh: `float` the minimum iou threshold to use for selecting boxes for
each level.
best_match_only: `bool` if the box only has one match and it is less than
the iou threshold, when set to True, this match will be dropped as no
anchors can be linked to it.
use_tie_breaker: `bool` if there is many anchors for a given box, then
attempt to use all of them, if False, only the first matching box will
be used.
Return: Return:
tf.Tensor: y_true with the anchor associated with each ground truth tf.Tensor: y_true with the anchor associated with each ground truth
box known box known
...@@ -46,7 +55,10 @@ def get_best_anchor(y_true, ...@@ -46,7 +55,10 @@ def get_best_anchor(y_true,
height = tf.cast(height, dtype=tf.float32) height = tf.cast(height, dtype=tf.float32)
scaler = tf.convert_to_tensor([width, height]) scaler = tf.convert_to_tensor([width, height])
# scale to levels houts width and height
true_wh = tf.cast(y_true[..., 2:4], dtype=tf.float32) * scaler true_wh = tf.cast(y_true[..., 2:4], dtype=tf.float32) * scaler
# scale down from large anchor to small anchor type
anchors = tf.cast(anchors, dtype=tf.float32)/stride anchors = tf.cast(anchors, dtype=tf.float32)/stride
k = tf.shape(anchors)[0] k = tf.shape(anchors)[0]
...@@ -71,7 +83,6 @@ def get_best_anchor(y_true, ...@@ -71,7 +83,6 @@ def get_best_anchor(y_true,
values = -values values = -values
ind_mask = tf.cast(values < iou_thresh, dtype=indexes.dtype) ind_mask = tf.cast(values < iou_thresh, dtype=indexes.dtype)
else: else:
# iou_raw = box_ops.compute_iou(truth_comp, anchors)
truth_comp = box_ops.xcycwh_to_yxyx(truth_comp) truth_comp = box_ops.xcycwh_to_yxyx(truth_comp)
anchors = box_ops.xcycwh_to_yxyx(anchors) anchors = box_ops.xcycwh_to_yxyx(anchors)
iou_raw = box_ops.aggregated_comparitive_iou( iou_raw = box_ops.aggregated_comparitive_iou(
...@@ -80,7 +91,7 @@ def get_best_anchor(y_true, ...@@ -80,7 +91,7 @@ def get_best_anchor(y_true,
iou_type=3, iou_type=3,
) )
values, indexes = tf.math.top_k( values, indexes = tf.math.top_k(
iou_raw, #tf.transpose(iou_raw, perm=[0, 2, 1]), iou_raw,
k=tf.cast(k, dtype=tf.int32), k=tf.cast(k, dtype=tf.int32),
sorted=True) sorted=True)
ind_mask = tf.cast(values >= iou_thresh, dtype=indexes.dtype) ind_mask = tf.cast(values >= iou_thresh, dtype=indexes.dtype)
...@@ -102,18 +113,73 @@ def get_best_anchor(y_true, ...@@ -102,18 +113,73 @@ def get_best_anchor(y_true,
return tf.cast(iou_index, dtype=tf.float32), tf.cast(values, dtype=tf.float32) return tf.cast(iou_index, dtype=tf.float32), tf.cast(values, dtype=tf.float32)
class YoloAnchorLabeler: class YoloAnchorLabeler:
"""Anchor labeler for the Yolo Models"""
def __init__(self, def __init__(self,
anchors = None, anchors = None,
anchor_free_level_limits = None,
level_strides = None,
center_radius = None,
max_num_instances = 200,
match_threshold = 0.25, match_threshold = 0.25,
best_matches_only = False, best_matches_only = False,
use_tie_breaker = True): use_tie_breaker = True,
darknet = False,
dtype = 'float32'):
"""Initialization for anchor labler.
Args:
anchors: `Dict[List[Union[int, float]]]` values for each anchor box.
anchor_free_level_limits: `List` the box sizes that will be allowed at
each FPN level as is done in the FCOS and YOLOX paper for anchor free
box assignment.
level_strides: `Dict[int]` for how much the model scales down the
images at the each level.
center_radius: `Dict[float]` for radius around each box center to search
for extra centers in each level.
max_num_instances: `int` for the number of boxes to compute loss on.
match_threshold: `float` indicating the threshold over which an anchor
will be considered for prediction, at zero, all the anchors will be used
and at 1.0 only the best will be used. for anchor thresholds larger than
1.0 we stop using the IOU for anchor comparison and resort directly to
comparing the width and height, this is used for the scaled models.
best_matches_only: `boolean` indicating how boxes are selected for
optimization.
use_tie_breaker: `boolean` indicating whether to use the anchor threshold
value.
darknet: `boolean` indicating which data pipeline to use. Setting to True
swaps the pipeline to output images realtive to Yolov4 and older.
dtype: `str` indicating the output datatype of the datapipeline selecting
from {"float32", "float16", "bfloat16"}.
"""
self.anchors = anchors self.anchors = anchors
self.masks = self._get_mask() self.masks = self._get_mask()
self.anchor_free_level_limits = self._get_level_limits(
anchor_free_level_limits)
if darknet and self.anchor_free_level_limits is None:
center_radius = None
self.keys = self.anchors.keys()
if self.anchor_free_level_limits is not None:
maxim = 2000
match_threshold = -0.01
self.num_instances = {key: maxim for key in self.keys}
elif not darknet:
self.num_instances = {
key: (6 - i) * max_num_instances for i, key in enumerate(self.keys)}
else:
self.num_instances = {key: max_num_instances for key in self.keys}
self.center_radius = center_radius
self.level_strides = level_strides
self.match_threshold = match_threshold self.match_threshold = match_threshold
self.best_matches_only = best_matches_only self.best_matches_only = best_matches_only
self.use_tie_breaker = use_tie_breaker self.use_tie_breaker = use_tie_breaker
self.dtype = dtype
def _get_mask(self): def _get_mask(self):
"""For each level get indexs of each anchor for box search across levels."""
masks = {} masks = {}
start = 0 start = 0
...@@ -124,8 +190,21 @@ class YoloAnchorLabeler: ...@@ -124,8 +190,21 @@ class YoloAnchorLabeler:
masks[str(i)] = list(range(start, per_scale + start)) masks[str(i)] = list(range(start, per_scale + start))
start += per_scale start += per_scale
return masks return masks
def _get_level_limits(self, level_limits):
"""For each level receptive feild range for anchor free box placement."""
if level_limits is not None:
level_limits_dict = {}
level_limits = [0.0] + level_limits + [np.inf]
for i, key in enumerate(self.anchors.keys()):
level_limits_dict[key] = level_limits[i:i + 2]
else:
level_limits_dict = None
return level_limits_dict
def _tie_breaking_search(self, anchors, mask, boxes, classes): def _tie_breaking_search(self, anchors, mask, boxes, classes):
"""After search, link each anchor ind to the correct map in ground truth."""
mask = tf.cast(tf.reshape(mask, [1, 1, 1, -1]), anchors.dtype) mask = tf.cast(tf.reshape(mask, [1, 1, 1, -1]), anchors.dtype)
anchors = tf.expand_dims(anchors, axis=-1) anchors = tf.expand_dims(anchors, axis=-1)
viable = tf.where(tf.squeeze(anchors == mask, axis = 0)) viable = tf.where(tf.squeeze(anchors == mask, axis = 0))
...@@ -140,10 +219,12 @@ class YoloAnchorLabeler: ...@@ -140,10 +219,12 @@ class YoloAnchorLabeler:
anchor_id = tf.cast(anchor_id, boxes.dtype) anchor_id = tf.cast(anchor_id, boxes.dtype)
return boxes, classes, anchor_id return boxes, classes, anchor_id
def _get_anchor_id(self, key, boxes, classes, anchors, width, height, stride): def _get_anchor_id(self, key, boxes, classes, width, height, stride,
iou_index = None):
"""Find the object anchor assignments in an anchor based paradigm. """ """Find the object anchor assignments in an anchor based paradigm. """
# find the best anchor # find the best anchor
anchors = self.anchors[key]
num_anchors = len(anchors) num_anchors = len(anchors)
if self.best_matches_only: if self.best_matches_only:
# get the best anchor for each box # get the best anchor for each box
...@@ -153,28 +234,20 @@ class YoloAnchorLabeler: ...@@ -153,28 +234,20 @@ class YoloAnchorLabeler:
iou_thresh=self.match_threshold) iou_thresh=self.match_threshold)
mask = range(num_anchors) mask = range(num_anchors)
else: else:
# stitch and search boxes across fpn levels # search is done across FPN levels, get the mask of anchor indexes
anchorsvec = [] # corralated to this level.
for stitch in self.anchors.keys():
anchorsvec.extend(self.anchors[stitch])
# get the best anchor for each box
iou_index, _ = get_best_anchor(boxes, anchorsvec, stride,
width=width, height=height,
best_match_only=False,
use_tie_breaker=self.use_tie_breaker,
iou_thresh=self.match_threshold)
mask = self.masks[key] mask = self.masks[key]
# search for the correct box to use # search for the correct box to use
(boxes, (boxes, classes, anchors) = self._tie_breaking_search(iou_index, mask,
classes, boxes, classes)
anchors) = self._tie_breaking_search(iou_index, mask, boxes, classes)
return boxes, classes, anchors, num_anchors return boxes, classes, anchors, num_anchors
def _get_centers(self, boxes, classes, anchors, width, height, offset): def _get_centers(self, boxes, classes, anchors, width, height, scale_xy):
"""Find the object center assignments in an anchor based paradigm. """ """Find the object center assignments in an anchor based paradigm. """
grid_xy, wh = tf.split(boxes, 2, axis = -1) offset = tf.cast(0.5 * (scale_xy - 1), boxes.dtype)
grid_xy, _ = tf.split(boxes, 2, axis = -1)
wh_scale = tf.cast(tf.convert_to_tensor([width, height]), boxes.dtype) wh_scale = tf.cast(tf.convert_to_tensor([width, height]), boxes.dtype)
grid_xy = grid_xy * wh_scale grid_xy = grid_xy * wh_scale
...@@ -234,16 +307,16 @@ class YoloAnchorLabeler: ...@@ -234,16 +307,16 @@ class YoloAnchorLabeler:
return boxes, classes, centers return boxes, classes, centers
def _get_anchor_free(self, def _get_anchor_free(self,
key,
boxes, boxes,
classes, classes,
height, height,
width, width,
stride, stride,
fpn_limits, center_radius):
center_radius=2.5): """Find the box assignements in an anchor free paradigm."""
"""Find the box assignements in an anchor free paradigm. """ level_limits = self.anchor_free_level_limits[key]
gen = loss_utils.GridGenerator( gen = loss_utils.GridGenerator(anchors=[[1, 1]], scale_anchors=stride)
masks=None, anchors=[[1, 1]], scale_anchors=stride)
grid_points = gen(width, height, 1, boxes.dtype)[0] grid_points = gen(width, height, 1, boxes.dtype)[0]
grid_points = tf.squeeze(grid_points, axis=0) grid_points = tf.squeeze(grid_points, axis=0)
box_list = boxes box_list = boxes
...@@ -266,10 +339,10 @@ class YoloAnchorLabeler: ...@@ -266,10 +339,10 @@ class YoloAnchorLabeler:
b_b = tlbr_boxes[..., 2] - y_centers b_b = tlbr_boxes[..., 2] - y_centers
b_r = tlbr_boxes[..., 3] - x_centers b_r = tlbr_boxes[..., 3] - x_centers
box_delta = tf.stack([b_t, b_l, b_b, b_r], axis=-1) box_delta = tf.stack([b_t, b_l, b_b, b_r], axis=-1)
if fpn_limits is not None: if level_limits is not None:
max_reg_targets_per_im = tf.reduce_max(box_delta, axis=-1) max_reg_targets_per_im = tf.reduce_max(box_delta, axis=-1)
gt_min = max_reg_targets_per_im >= fpn_limits[0] gt_min = max_reg_targets_per_im >= level_limits[0]
gt_max = max_reg_targets_per_im <= fpn_limits[1] gt_max = max_reg_targets_per_im <= level_limits[1]
is_in_boxes = tf.logical_and(gt_min, gt_max) is_in_boxes = tf.logical_and(gt_min, gt_max)
else: else:
is_in_boxes = tf.reduce_min(box_delta, axis=-1) > 0.0 is_in_boxes = tf.reduce_min(box_delta, axis=-1) > 0.0
...@@ -290,11 +363,10 @@ class YoloAnchorLabeler: ...@@ -290,11 +363,10 @@ class YoloAnchorLabeler:
is_in_boxes_and_center = tf.logical_and(is_in_index, is_in_boxes_and_center) is_in_boxes_and_center = tf.logical_and(is_in_index, is_in_boxes_and_center)
if self.use_tie_breaker: if self.use_tie_breaker:
inf = 10000000
boxes_all = tf.cast(is_in_boxes_and_center, area.dtype) boxes_all = tf.cast(is_in_boxes_and_center, area.dtype)
boxes_all = ((boxes_all * area) + ((1 - boxes_all) * inf)) boxes_all = ((boxes_all * area) + ((1 - boxes_all) * INF))
boxes_min = tf.reduce_min(boxes_all, axis = -1, keepdims = True) boxes_min = tf.reduce_min(boxes_all, axis = -1, keepdims = True)
boxes_min = tf.where(boxes_min == inf, -1.0, boxes_min) boxes_min = tf.where(boxes_min == INF, -1.0, boxes_min)
is_in_boxes_and_center = boxes_all == boxes_min is_in_boxes_and_center = boxes_all == boxes_min
# construct the index update grid # construct the index update grid
...@@ -314,33 +386,60 @@ class YoloAnchorLabeler: ...@@ -314,33 +386,60 @@ class YoloAnchorLabeler:
indexes = tf.concat([y, x, tf.zeros_like(t)], axis=-1) indexes = tf.concat([y, x, tf.zeros_like(t)], axis=-1)
return indexes, samples return indexes, samples
def __call__(self, def build_label_per_path(self,
key, key,
boxes, boxes,
classes, classes,
anchors, width,
width, height,
height, iou_index = None):
stride, """Builds the labels for one path."""
scale_xy, stride = self.level_strides[key]
num_instances, scale_xy = self.center_radius[key] if self.center_radius is not None else 1
fpn_limits = None):
width = tf.cast(width//stride, boxes.dtype)
height = tf.cast(height//stride, boxes.dtype)
if self.anchor_free_level_limits is None:
(boxes, classes,
anchors, num_anchors) = self._get_anchor_id(key, boxes, classes,
width, height, stride,
iou_index = iou_index)
boxes, classes, centers = self._get_centers(boxes, classes, anchors,
width, height, scale_xy)
ind_mask = tf.ones_like(classes)
updates = tf.concat([boxes, ind_mask, classes], axis = -1)
else:
num_anchors = 1
(centers, updates) = self._get_anchor_free(key, boxes, classes, height,
width, stride, scale_xy)
boxes, ind_mask, classes = tf.split(updates, [4, 1, 1], axis = -1)
width = tf.cast(width, tf.int32)
height = tf.cast(height, tf.int32)
full = tf.zeros([height, width, num_anchors, 1], dtype=classes.dtype)
full = tf.tensor_scatter_nd_add(full, centers, ind_mask)
num_instances = int(self.num_instances[key])
centers = preprocessing_ops.pad_max_instances(
centers, num_instances, pad_value=0, pad_axis=0)
updates = preprocessing_ops.pad_max_instances(
updates, num_instances, pad_value=0, pad_axis=0)
updates = tf.cast(updates, self.dtype)
full = tf.cast(full, self.dtype)
return centers, updates, full
def __call__(self, boxes, classes, width, height):
"""Builds the labels for a single image, not functional in batch mode. """Builds the labels for a single image, not functional in batch mode.
Args: Args:
boxes: `Tensor` of shape [None, 4] indicating the object locations in boxes: `Tensor` of shape [None, 4] indicating the object locations in
an image. an image.
classes: `Tensor` of shape [None] indicating the each objects classes. classes: `Tensor` of shape [None] indicating the each objects classes.
anchors: `List[List[int, float]]` representing the anchor boxes to build
the model against.
width: `int` for the images width. width: `int` for the images width.
height: `int` for the images height. height: `int` for the images height.
stride: `int` for how much the image gets scaled at this level.
scale_xy: `float` for the center shifts to apply when finding center
assignments for a box.
num_instances: `int` for the maximum number of expanded boxes to allow. num_instances: `int` for the maximum number of expanded boxes to allow.
fpn_limits: `List[int]` given no anchor boxes this is used to limit the
boxes assied to the each fpn level based on the levels receptive feild.
Returns: Returns:
centers: `Tensor` of shape [None, 3] of indexes in the final grid where centers: `Tensor` of shape [None, 3] of indexes in the final grid where
...@@ -349,35 +448,27 @@ class YoloAnchorLabeler: ...@@ -349,35 +448,27 @@ class YoloAnchorLabeler:
full: `Tensor` of [width/stride, height/stride, num_anchors, 1] holding full: `Tensor` of [width/stride, height/stride, num_anchors, 1] holding
a mask of where boxes are locates for confidence losses. a mask of where boxes are locates for confidence losses.
""" """
boxes = box_ops.yxyx_to_xcycwh(boxes) indexes = {}
updates = {}
true_grids = {}
iou_index = None
width //= stride boxes = box_ops.yxyx_to_xcycwh(boxes)
height //= stride if not self.best_matches_only and self.anchor_free_level_limits is None:
width = tf.cast(width, boxes.dtype) # stitch and search boxes across fpn levels
height = tf.cast(height, boxes.dtype) anchorsvec = []
for stitch in self.anchors:
if fpn_limits is None: anchorsvec.extend(self.anchors[stitch])
offset = tf.cast(0.5 * (scale_xy - 1), boxes.dtype)
(boxes, classes,
anchors, num_anchors) = self._get_anchor_id(key, boxes, classes, anchors,
width, height, stride)
boxes, classes, centers = self._get_centers(boxes, classes, anchors,
width, height, offset)
ind_mask = tf.ones_like(classes)
updates = tf.concat([boxes, ind_mask, classes], axis = -1)
else:
(centers, updates) = self._get_anchor_free(boxes, classes, height,
width, stride, fpn_limits)
boxes, ind_mask, classes = tf.split(updates, [4, 1, 1], axis = -1)
num_anchors = 1
stride = tf.cast([width, height], boxes.dtype)
# get the best anchor for each box
iou_index, _ = get_best_anchor(boxes, anchorsvec, stride,
width=1.0, height=1.0,
best_match_only=False,
use_tie_breaker=self.use_tie_breaker,
iou_thresh=self.match_threshold)
width = tf.cast(width, tf.int32) for key in self.keys:
height = tf.cast(height, tf.int32) indexes[key], updates[key], true_grids[key] = self.build_label_per_path(
full = tf.zeros([height, width, num_anchors, 1], dtype=classes.dtype) key, boxes, classes, width, height, iou_index = iou_index)
full = tf.tensor_scatter_nd_add(full, centers, ind_mask) return indexes, updates, true_grids
centers = preprocessing_ops.pad_max_instances( \ No newline at end of file
centers, int(num_instances), pad_value=0, pad_axis=0)
updates = preprocessing_ops.pad_max_instances(
updates, int(num_instances), pad_value=0, pad_axis=0)
return centers, updates, full
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
"""Yolo loss utility functions.""" """Yolo loss utility functions."""
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
...@@ -129,6 +130,10 @@ def build_grid(indexes, truths, preds, ind_mask, update=False, grid=None): ...@@ -129,6 +130,10 @@ def build_grid(indexes, truths, preds, ind_mask, update=False, grid=None):
indexes = apply_mask(tf.cast(ind_mask, indexes.dtype), indexes) indexes = apply_mask(tf.cast(ind_mask, indexes.dtype), indexes)
indexes = (indexes + (ind_mask - 1)) indexes = (indexes + (ind_mask - 1))
# mask truths
truths = apply_mask(tf.cast(ind_mask, truths.dtype), truths)
truths = (truths + (tf.cast(ind_mask, truths.dtype) - 1))
# reshape the indexes into the correct shape for the loss, # reshape the indexes into the correct shape for the loss,
# just flatten all indexes but the last # just flatten all indexes but the last
indexes = tf.reshape(indexes, [-1, 4]) indexes = tf.reshape(indexes, [-1, 4])
...@@ -157,26 +162,16 @@ def build_grid(indexes, truths, preds, ind_mask, update=False, grid=None): ...@@ -157,26 +162,16 @@ def build_grid(indexes, truths, preds, ind_mask, update=False, grid=None):
class GridGenerator: class GridGenerator:
"""Grid generator that generates anchor grids for box decoding.""" """Grid generator that generates anchor grids for box decoding."""
def __init__(self, anchors, masks=None, scale_anchors=None): def __init__(self, anchors, scale_anchors=None):
"""Initialize Grid Generator. """Initialize Grid Generator.
Args: Args:
anchors: A `List[List[int]]` for the anchor boxes that are used in the anchors: A `List[List[int]]` for the anchor boxes that are used in the
model at all levels. model at all levels.
masks: A `List[int]` for the output level that this specific model output
Level.
scale_anchors: An `int` for how much to scale this level to get the scale_anchors: An `int` for how much to scale this level to get the
original input shape. original input shape.
""" """
self.dtype = tf.keras.backend.floatx() self.dtype = tf.keras.backend.floatx()
if masks is not None:
self._num = len(masks)
else:
self._num = tf.shape(anchors)[0]
if masks is not None:
anchors = [anchors[mask] for mask in masks]
self._scale_anchors = scale_anchors self._scale_anchors = scale_anchors
self._anchors = tf.convert_to_tensor(anchors) self._anchors = tf.convert_to_tensor(anchors)
return return
...@@ -331,18 +326,10 @@ class PairWiseSearch: ...@@ -331,18 +326,10 @@ class PairWiseSearch:
pred_classes, pred_classes,
boxes, boxes,
classes, classes,
scale=None,
yxyx=True,
clip_thresh=0.0): clip_thresh=0.0):
num_boxes = tf.shape(boxes)[-2] num_boxes = tf.shape(boxes)[-2]
num_tiles = (num_boxes // TILE_SIZE) - 1 num_tiles = (num_boxes // TILE_SIZE) - 1
if yxyx:
boxes = box_ops.yxyx_to_xcycwh(boxes)
if scale is not None:
boxes = boxes * tf.stop_gradient(scale)
if self._min_conf > 0.0: if self._min_conf > 0.0:
pred_classes = tf.cast(pred_classes > self._min_conf, pred_classes.dtype) pred_classes = tf.cast(pred_classes > self._min_conf, pred_classes.dtype)
...@@ -540,7 +527,6 @@ def _anchor_free_scale_boxes(encoded_boxes, ...@@ -540,7 +527,6 @@ def _anchor_free_scale_boxes(encoded_boxes,
height, height,
stride, stride,
grid_points, grid_points,
scale_xy,
darknet=False): darknet=False):
"""Decode models boxes using FPN stride under anchor free conditions.""" """Decode models boxes using FPN stride under anchor free conditions."""
# split the boxes # split the boxes
...@@ -549,7 +535,6 @@ def _anchor_free_scale_boxes(encoded_boxes, ...@@ -549,7 +535,6 @@ def _anchor_free_scale_boxes(encoded_boxes,
# build a scaling tensor to get the offset of th ebox relative to the image # build a scaling tensor to get the offset of th ebox relative to the image
scaler = tf.convert_to_tensor([height, width, height, width]) scaler = tf.convert_to_tensor([height, width, height, width])
scale_xy = tf.cast(scale_xy, encoded_boxes.dtype)
scale_down = lambda x, y: x / y scale_down = lambda x, y: x / y
scale_up = lambda x, y: x * y scale_up = lambda x, y: x * y
...@@ -557,10 +542,6 @@ def _anchor_free_scale_boxes(encoded_boxes, ...@@ -557,10 +542,6 @@ def _anchor_free_scale_boxes(encoded_boxes,
scale_down = tf.grad_pass_through(scale_down) scale_down = tf.grad_pass_through(scale_down)
scale_up = tf.grad_pass_through(scale_up) scale_up = tf.grad_pass_through(scale_up)
# scale the centers and find the offset of each box relative to
# their center pixel
pred_xy = pred_xy * scale_xy - 0.5 * (scale_xy - 1)
# scale the offsets and add them to the grid points or a tensor that is # scale the offsets and add them to the grid points or a tensor that is
# the realtive location of each pixel # the realtive location of each pixel
box_xy = (grid_points + pred_xy) box_xy = (grid_points + pred_xy)
...@@ -624,7 +605,7 @@ def get_predicted_box(width, ...@@ -624,7 +605,7 @@ def get_predicted_box(width,
if box_type == 'anchor_free': if box_type == 'anchor_free':
(scaler, scaled_box, (scaler, scaled_box,
pred_box) = _anchor_free_scale_boxes(encoded_boxes, width, height, stride, pred_box) = _anchor_free_scale_boxes(encoded_boxes, width, height, stride,
grid_points, scale_xy, darknet=darknet) grid_points, darknet=darknet)
elif darknet: elif darknet:
# pylint:disable=unbalanced-tuple-unpacking # pylint:disable=unbalanced-tuple-unpacking
......
...@@ -17,7 +17,7 @@ import random ...@@ -17,7 +17,7 @@ import random
import tensorflow as tf import tensorflow as tf
import tensorflow_addons as tfa import tensorflow_addons as tfa
from yolo.ops import preprocessing_ops from official.vision.beta.projects.yolo.ops import preprocessing_ops
from official.vision.beta.ops import box_ops from official.vision.beta.ops import box_ops
from official.vision.beta.ops import preprocess_ops from official.vision.beta.ops import preprocess_ops
...@@ -396,4 +396,3 @@ class Mosaic: ...@@ -396,4 +396,3 @@ class Mosaic:
return self._apply return self._apply
else: else:
return self._skip return self._skip
\ No newline at end of file
...@@ -4,8 +4,6 @@ import random ...@@ -4,8 +4,6 @@ import random
import os import os
import tensorflow_addons as tfa import tensorflow_addons as tfa
from official.vision.beta.projects.yolo.ops import box_ops
from official.vision.beta.projects.yolo.ops import loss_utils
from official.vision.beta.ops import box_ops as bbox_ops from official.vision.beta.ops import box_ops as bbox_ops
PAD_VALUE = 114 PAD_VALUE = 114
...@@ -122,6 +120,11 @@ def pad_max_instances(value, instances, pad_value=0, pad_axis=0): ...@@ -122,6 +120,11 @@ def pad_max_instances(value, instances, pad_value=0, pad_axis=0):
nshape = tf.concat([shape[:pad_axis], pad, shape[(pad_axis + 1):]], axis=0) nshape = tf.concat([shape[:pad_axis], pad, shape[(pad_axis + 1):]], axis=0)
pad_tensor = tf.fill(nshape, tf.cast(pad_value, dtype=value.dtype)) pad_tensor = tf.fill(nshape, tf.cast(pad_value, dtype=value.dtype))
value = tf.concat([value, pad_tensor], axis=pad_axis) value = tf.concat([value, pad_tensor], axis=pad_axis)
if isinstance(instances, int):
vshape = value.get_shape().as_list()
vshape[pad_axis] = instances
value.set_shape(vshape)
return value return value
...@@ -317,10 +320,43 @@ def resize_and_jitter_image(image, ...@@ -317,10 +320,43 @@ def resize_and_jitter_image(image,
cut=None, cut=None,
method=tf.image.ResizeMethod.BILINEAR, method=tf.image.ResizeMethod.BILINEAR,
seed=None): seed=None):
"""WIP""" """Resize, Pad, and distort a given input image.
Args:
image: a `Tensor` of shape [height, width, 3] representing an image.
desired_size: a `Tensor` or `int` list/tuple of two elements representing
[height, width] of the desired actual output image size.
jitter: an `int` representing the maximum jittering that can be applied to
the image.
letter_box: a `bool` representing if letterboxing should be applied.
random_pad: a `bool` representing if random padding should be applied.
crop_only: a `bool` representing if only cropping will be applied.
shiftx: a `float` indicating if the image is in the
left or right.
shifty: a `float` value indicating if the image is in the
top or bottom.
cut: a `float` value indicating the desired center of the final patched
image.
method: function to resize input image to scaled image.
seed: seed for random scale jittering.
Returns:
image_: a `Tensor` of shape [height, width, 3] where [height, width]
equals to `desired_size`.
infos: a 2D `Tensor` that encodes the information of the image and the
applied preprocessing. It is in the format of
[[original_height, original_width], [desired_height, desired_width],
[y_scale, x_scale], [y_offset, x_offset]], where [desired_height,
desired_width] is the actual scaled image size, and [y_scale, x_scale] is
the scaling factor, which is the ratio of
scaled dimension / original dimension.
cast([original_width, original_height, width, height, ptop, pleft, pbottom,
pright], tf.float32): a `Tensor` containing the information of the image
andthe applied preprocessing.
"""
def intersection(a, b): def intersection(a, b):
"""Find the intersection of 2 crop boxes.""" """Find the intersection between 2 crops"""
minx = tf.maximum(a[0], b[0]) minx = tf.maximum(a[0], b[0])
miny = tf.maximum(a[1], b[1]) miny = tf.maximum(a[1], b[1])
maxx = tf.minimum(a[2], b[2]) maxx = tf.minimum(a[2], b[2])
...@@ -328,11 +364,10 @@ def resize_and_jitter_image(image, ...@@ -328,11 +364,10 @@ def resize_and_jitter_image(image,
return tf.convert_to_tensor([minx, miny, maxx, maxy]) return tf.convert_to_tensor([minx, miny, maxx, maxy])
def cast(values, dtype): def cast(values, dtype):
"""Cast a list of items to a givne data type to reduce lines of code"""
return [tf.cast(value, dtype) for value in values] return [tf.cast(value, dtype) for value in values]
if jitter > 0.5 or jitter < 0: if jitter > 0.5 or jitter < 0:
raise Exception("maximum change in aspect ratio must be between 0 and 0.5") raise Exception('maximum change in aspect ratio must be between 0 and 0.5')
with tf.name_scope('resize_and_jitter_image'): with tf.name_scope('resize_and_jitter_image'):
# Cast all parameters to a usable float data type. # Cast all parameters to a usable float data type.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment