Commit e528aa76 authored by Vishnu Banna's avatar Vishnu Banna
Browse files

detection generator update

parent 804d6abc
...@@ -14,8 +14,10 @@ ...@@ -14,8 +14,10 @@
"""Contains common building blocks for yolo layer (detection layer).""" """Contains common building blocks for yolo layer (detection layer)."""
import tensorflow as tf import tensorflow as tf
from official.vision.beta.modeling.layers import detection_generator
from official.vision.beta.projects.yolo.ops import box_ops from official.vision.beta.projects.yolo.ops import (loss_utils, box_ops)
from official.vision.beta.projects.yolo.losses import yolo_loss
@tf.keras.utils.register_keras_serializable(package='yolo') @tf.keras.utils.register_keras_serializable(package='yolo')
...@@ -36,11 +38,11 @@ class YoloLayer(tf.keras.Model): ...@@ -36,11 +38,11 @@ class YoloLayer(tf.keras.Model):
cls_normalizer=1.0, cls_normalizer=1.0,
obj_normalizer=1.0, obj_normalizer=1.0,
use_scaled_loss=False, use_scaled_loss=False,
darknet=None, update_on_repeat=False,
pre_nms_points=5000, pre_nms_points=5000,
label_smoothing=0.0, label_smoothing=0.0,
max_boxes=200, max_boxes=200,
new_cords=False, box_type='original',
path_scale=None, path_scale=None,
scale_xy=None, scale_xy=None,
nms_type='greedy', nms_type='greedy',
...@@ -91,18 +93,6 @@ class YoloLayer(tf.keras.Model): ...@@ -91,18 +93,6 @@ class YoloLayer(tf.keras.Model):
objectness_smooth: `float` for how much to smooth the loss on the objectness_smooth: `float` for how much to smooth the loss on the
detection map. detection map.
**kwargs: Addtional keyword arguments. **kwargs: Addtional keyword arguments.
Return:
loss: `float` for the actual loss.
box_loss: `float` loss on the boxes used for metrics.
conf_loss: `float` loss on the confidence used for metrics.
class_loss: `float` loss on the classes used for metrics.
avg_iou: `float` metric for the average iou between predictions
and ground truth.
avg_obj: `float` metric for the average confidence of the model
for predictions.
recall50: `float` metric for how accurate the model is.
precision50: `float` metric for how precise the model is.
""" """
super().__init__(**kwargs) super().__init__(**kwargs)
self._masks = masks self._masks = masks
...@@ -121,29 +111,18 @@ class YoloLayer(tf.keras.Model): ...@@ -121,29 +111,18 @@ class YoloLayer(tf.keras.Model):
self._loss_type = loss_type self._loss_type = loss_type
self._use_scaled_loss = use_scaled_loss self._use_scaled_loss = use_scaled_loss
self._darknet = darknet self._update_on_repeat = update_on_repeat
self._pre_nms_points = pre_nms_points self._pre_nms_points = pre_nms_points
self._label_smoothing = label_smoothing self._label_smoothing = label_smoothing
self._keys = list(masks.keys()) self._keys = list(masks.keys())
self._len_keys = len(self._keys) self._len_keys = len(self._keys)
self._new_cords = new_cords self._box_type = box_type
self._path_scale = path_scale or { self._path_scale = path_scale or {
key: 2**int(key) for key, _ in masks.items() key: 2**int(key) for key, _ in masks.items()
} }
self._nms_types = { self._nms_type = nms_type
'greedy': 1,
'iou': 2,
'giou': 3,
'ciou': 4,
'diou': 5,
'class_independent': 6,
'weighted_diou': 7
}
self._nms_type = self._nms_types[nms_type]
self._scale_xy = scale_xy or {key: 1.0 for key, _ in masks.items()} self._scale_xy = scale_xy or {key: 1.0 for key, _ in masks.items()}
self._generator = {} self._generator = {}
...@@ -156,27 +135,33 @@ class YoloLayer(tf.keras.Model): ...@@ -156,27 +135,33 @@ class YoloLayer(tf.keras.Model):
return return
def get_generators(self, anchors, path_scale, path_key): def get_generators(self, anchors, path_scale, path_key):
return None anchor_generator = loss_utils.GridGenerator(
anchors, scale_anchors=path_scale)
def rm_nan_inf(self, x, val=0.0): return anchor_generator
x = tf.where(tf.math.is_nan(x), tf.cast(val, dtype=x.dtype), x)
x = tf.where(tf.math.is_inf(x), tf.cast(val, dtype=x.dtype), x)
return x
def parse_prediction_path(self, key, inputs): def parse_prediction_path(self, key, inputs):
shape_ = tf.shape(inputs)
shape = inputs.get_shape().as_list() shape = inputs.get_shape().as_list()
height, width = shape[1], shape[2] batchsize, height, width = shape_[0], shape[1], shape[2]
if height is None or width is None:
height, width = shape_[1], shape_[2]
generator = self._generator[key]
len_mask = self._len_mask[key] len_mask = self._len_mask[key]
scale_xy = self._scale_xy[key]
# reshape the yolo output to (batchsize, # reshape the yolo output to (batchsize,
# width, # width,
# height, # height,
# number_anchors, # number_anchors,
# remaining_points) # remaining_points)
data = tf.reshape(inputs, [-1, height, width, len_mask, self._classes + 5]) data = tf.reshape(inputs, [-1, height, width, len_mask, self._classes + 5])
# use the grid generator to get the formatted anchor boxes and grid points
# in shape [1, height, width, 2]
centers, anchors = generator(height, width, batchsize, dtype=data.dtype)
# split the yolo detections into boxes, object score map, classes # split the yolo detections into boxes, object score map, classes
boxes, obns_scores, class_scores = tf.split( boxes, obns_scores, class_scores = tf.split(
data, [4, 1, self._classes], axis=-1) data, [4, 1, self._classes], axis=-1)
...@@ -184,25 +169,32 @@ class YoloLayer(tf.keras.Model): ...@@ -184,25 +169,32 @@ class YoloLayer(tf.keras.Model):
# determine the number of classes # determine the number of classes
classes = class_scores.get_shape().as_list()[-1] classes = class_scores.get_shape().as_list()[-1]
# configurable to use the new coordinates in scaled Yolo v4 or not
_, _, boxes = loss_utils.get_predicted_box(
tf.cast(height, data.dtype),
tf.cast(width, data.dtype),
boxes,
anchors,
centers,
scale_xy,
stride=self._path_scale[key],
darknet=False,
box_type=self._box_type[key])
# convert boxes from yolo(x, y, w. h) to tensorflow(ymin, xmin, ymax, xmax) # convert boxes from yolo(x, y, w. h) to tensorflow(ymin, xmin, ymax, xmax)
boxes = box_ops.xcycwh_to_yxyx(boxes) boxes = box_ops.xcycwh_to_yxyx(boxes)
# activate and detection map # activate and detection map
obns_scores = tf.math.sigmoid(obns_scores) obns_scores = tf.math.sigmoid(obns_scores)
# threshold the detection map
obns_mask = tf.cast(obns_scores > self._thresh, obns_scores.dtype)
# convert detection map to class detection probabailities # convert detection map to class detection probabailities
class_scores = tf.math.sigmoid(class_scores) * obns_mask * obns_scores class_scores = tf.math.sigmoid(class_scores) * obns_scores
class_scores *= tf.cast(class_scores > self._thresh, class_scores.dtype)
fill = height * width * len_mask
# platten predictions to [batchsize, N, -1] for non max supression # platten predictions to [batchsize, N, -1] for non max supression
fill = height * width * len_mask
boxes = tf.reshape(boxes, [-1, fill, 4]) boxes = tf.reshape(boxes, [-1, fill, 4])
class_scores = tf.reshape(class_scores, [-1, fill, classes]) class_scores = tf.reshape(class_scores, [-1, fill, classes])
obns_scores = tf.reshape(obns_scores, [-1, fill]) obns_scores = tf.reshape(obns_scores, [-1, fill])
return obns_scores, boxes, class_scores return obns_scores, boxes, class_scores
def call(self, inputs): def call(self, inputs):
...@@ -224,26 +216,49 @@ class YoloLayer(tf.keras.Model): ...@@ -224,26 +216,49 @@ class YoloLayer(tf.keras.Model):
# colate all predicitons # colate all predicitons
boxes = tf.concat(boxes, axis=1) boxes = tf.concat(boxes, axis=1)
object_scores = tf.keras.backend.concatenate(object_scores, axis=1) object_scores = tf.concat(object_scores, axis=1)
class_scores = tf.keras.backend.concatenate(class_scores, axis=1) class_scores = tf.concat(class_scores, axis=1)
# greedy NMS # get masks to threshold all the predicitons
boxes = tf.cast(boxes, dtype=tf.float32) object_mask = tf.cast(object_scores > self._thresh, object_scores.dtype)
class_scores = tf.cast(class_scores, dtype=tf.float32) class_mask = tf.cast(class_scores > self._thresh, class_scores.dtype)
nms_items = tf.image.combined_non_max_suppression(
tf.expand_dims(boxes, axis=-2), # apply thresholds mask to all the predicitons
class_scores, object_scores *= object_mask
self._pre_nms_points, class_scores *= (tf.expand_dims(object_mask, axis=-1) * class_mask)
self._max_boxes,
iou_threshold=self._nms_thresh, # apply nms
score_threshold=self._thresh) if self._nms_type == 'greedy':
# cast the boxes and predicitons abck to original datatype # greedy NMS
boxes = tf.cast(nms_items.nmsed_boxes, object_scores.dtype) boxes = tf.cast(boxes, dtype=tf.float32)
class_scores = tf.cast(nms_items.nmsed_classes, object_scores.dtype) class_scores = tf.cast(class_scores, dtype=tf.float32)
object_scores = tf.cast(nms_items.nmsed_scores, object_scores.dtype) boxes, object_scores_, class_scores, num_detections = (
tf.image.combined_non_max_suppression(
# compute the number of valid detections tf.expand_dims(boxes, axis=-2),
num_detections = tf.math.reduce_sum(tf.math.ceil(object_scores), axis=-1) class_scores,
self._pre_nms_points,
self._max_boxes,
iou_threshold=self._nms_thresh,
score_threshold=self._thresh))
# cast the boxes and predicitons abck to original datatype
boxes = tf.cast(boxes, object_scores.dtype)
class_scores = tf.cast(class_scores, object_scores.dtype)
object_scores = tf.cast(object_scores_, object_scores.dtype)
else:
# TPU NMS
boxes = tf.cast(boxes, dtype=tf.float32)
class_scores = tf.cast(class_scores, dtype=tf.float32)
(boxes, confidence,
classes, num_detections) = detection_generator._generate_detections_v2(
tf.expand_dims(boxes, axis=-2),
class_scores,
pre_nms_top_k=self._pre_nms_points,
max_num_detections=self._max_boxes,
nms_iou_threshold=self._nms_thresh,
pre_nms_score_threshold=self._thresh)
boxes = tf.cast(boxes, object_scores.dtype)
class_scores = tf.cast(classes, object_scores.dtype)
object_scores = tf.cast(confidence, object_scores.dtype)
# format and return # format and return
return { return {
...@@ -255,12 +270,31 @@ class YoloLayer(tf.keras.Model): ...@@ -255,12 +270,31 @@ class YoloLayer(tf.keras.Model):
@property @property
def losses(self): def losses(self):
"""Generates a dictionary of losses to apply to each path. """ Generates a dictionary of losses to apply to each path
Done in the detection generator because all parameters are the same Done in the detection generator because all parameters are the same
across both loss and detection generator. across both loss and detection generator
""" """
return None loss = yolo_loss.YoloLoss(
keys=self._keys,
classes=self._classes,
anchors=self._anchors,
masks=self._masks,
path_strides=self._path_scale,
truth_thresholds=self._truth_thresh,
ignore_thresholds=self._ignore_thresh,
loss_types=self._loss_type,
iou_normalizers=self._iou_normalizer,
cls_normalizers=self._cls_normalizer,
obj_normalizers=self._obj_normalizer,
objectness_smooths=self._objectness_smooth,
box_types=self._box_type,
max_deltas=self._max_delta,
scale_xys=self._scale_xy,
use_scaled_loss=self._use_scaled_loss,
update_on_repeat=self._update_on_repeat,
label_smoothing=self._label_smoothing)
return loss
def get_config(self): def get_config(self):
return { return {
......
...@@ -39,10 +39,16 @@ class YoloDecoderTest(parameterized.TestCase, tf.test.TestCase): ...@@ -39,10 +39,16 @@ class YoloDecoderTest(parameterized.TestCase, tf.test.TestCase):
anchors = [[12.0, 19.0], [31.0, 46.0], [96.0, 54.0], [46.0, 114.0], anchors = [[12.0, 19.0], [31.0, 46.0], [96.0, 54.0], [46.0, 114.0],
[133.0, 127.0], [79.0, 225.0], [301.0, 150.0], [172.0, 286.0], [133.0, 127.0], [79.0, 225.0], [301.0, 150.0], [172.0, 286.0],
[348.0, 340.0]] [348.0, 340.0]]
layer = dg.YoloLayer(masks, anchors, classes, max_boxes=10) box_type = {key:"scaled" for key in masks.keys()}
layer = dg.YoloLayer(masks,
anchors,
classes,
box_type = box_type,
max_boxes=10)
inputs = {} inputs = {}
for key in input_shape: for key in input_shape.keys():
inputs[key] = tf.ones(input_shape[key], dtype=tf.float32) inputs[key] = tf.ones(input_shape[key], dtype=tf.float32)
endpoints = layer(inputs) endpoints = layer(inputs)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment