Commit b0ccdb11 authored by Shixin Luo's avatar Shixin Luo
Browse files

resolve conflict with master

parents e61588cd 1611a8c5
use_local_features: true
use_global_features: true
model_path: "parameters/r101delg_gldv2clean_20200914"
image_scales: 0.25
image_scales: 0.35355338
image_scales: 0.5
image_scales: 0.70710677
image_scales: 1.0
image_scales: 1.4142135
image_scales: 2.0
delf_local_config {
use_pca: false
max_feature_num: 1000
score_threshold: 357.48
}
delf_global_config {
use_pca: false
image_scales_ind: 3
image_scales_ind: 4
image_scales_ind: 5
}
max_image_size: 1024
use_local_features: true
use_global_features: true
model_path: "parameters/r50delg_gldv2clean_20200914"
image_scales: 0.25
image_scales: 0.35355338
image_scales: 0.5
image_scales: 0.70710677
image_scales: 1.0
image_scales: 1.4142135
image_scales: 2.0
delf_local_config {
use_pca: false
max_feature_num: 1000
score_threshold: 454.6
}
delf_global_config {
use_pca: false
image_scales_ind: 3
image_scales_ind: 4
image_scales_ind: 5
}
max_image_size: 1024
......@@ -180,18 +180,17 @@ def MakeExtractor(config):
if hasattr(config, 'is_tf2_exported') and config.is_tf2_exported:
predict = model.signatures['serving_default']
if config.use_local_features and config.use_global_features:
if config.use_global_features:
output_dict = predict(
input_image=image_tensor,
input_scales=image_scales_tensor,
input_max_feature_num=max_feature_num_tensor,
input_abs_thres=score_threshold_tensor,
input_global_scales_ind=global_scales_ind_tensor)
output = [
output_dict['boxes'], output_dict['features'],
output_dict['scales'], output_dict['scores'],
output_dict['global_descriptors']
]
output_dict = predict(
input_image=image_tensor,
input_scales=image_scales_tensor,
input_max_feature_num=max_feature_num_tensor,
input_abs_thres=score_threshold_tensor,
input_global_scales_ind=global_scales_ind_tensor)
output = [
output_dict['boxes'], output_dict['features'],
output_dict['scales'], output_dict['scores'],
output_dict['global_descriptors']
]
elif config.use_local_features:
output_dict = predict(
input_image=image_tensor,
......
......@@ -143,6 +143,8 @@ curl -Os http://storage.googleapis.com/delf/resnet50_imagenet_weights.tar.gz
tar -xzvf resnet50_imagenet_weights.tar.gz
```
### Training with Local Features
Assuming the TFRecord files were generated in the `gldv2_dataset/tfrecord/`
directory, running the following command should start training a model and
output the results in the `gldv2_training` directory:
......@@ -156,13 +158,7 @@ python3 train.py \
--logdir=gldv2_training/
```
On a multi-GPU machine the batch size can be increased to speed up the training
using the `--batch_size` parameter. On a 8 Tesla P100 GPUs machine you can set
the batch size to `256`:
```
--batch_size=256
```
### Training with Local and Global Features
It is also possible to train the model with an improved global features head as
introduced in the [DELG paper](https://arxiv.org/abs/2001.05027). To do this,
......@@ -179,6 +175,15 @@ python3 train.py \
--delg_global_features
```
### Hyperparameter Guidelines
In order to improve the convergence of the training, the following
hyperparameter values have been tested and validated on the following
infrastructures, the remaining `train.py` flags keeping their **default
values**:
* 8 Tesla P100 GPUs: `--batch_size=256`, `--initial_lr=0.01`
* 4 Tesla P100 GPUs: `--batch_size=128`, `--initial_lr=0.005`
*NOTE*: We are currently working on adding the autoencoder described in the DELG
paper to this codebase. Currently, it is not yet implemented here. Stay tuned!
......
......@@ -76,7 +76,7 @@ documentation of the Object Detection API:
### Mobile Inference for TF2 models
TF2 OD API models can now be converted to TensorFlow Lite! Only SSD models
currently supported. See <a href='running_on_mobile_tf2.md'>documentation</a>.
currently supported. See <a href='g3doc/running_on_mobile_tf2.md'>documentation</a>.
**Thanks to contributors**: Sachin Joglekar
......
......@@ -50,6 +50,7 @@ from object_detection.utils import tf_version
if tf_version.is_tf2():
from object_detection.models import center_net_hourglass_feature_extractor
from object_detection.models import center_net_mobilenet_v2_feature_extractor
from object_detection.models import center_net_mobilenet_v2_fpn_feature_extractor
from object_detection.models import center_net_resnet_feature_extractor
from object_detection.models import center_net_resnet_v1_fpn_feature_extractor
from object_detection.models import faster_rcnn_inception_resnet_v2_keras_feature_extractor as frcnn_inc_res_keras
......@@ -140,8 +141,10 @@ if tf_version.is_tf2():
}
CENTER_NET_EXTRACTOR_FUNCTION_MAP = {
'resnet_v2_50': center_net_resnet_feature_extractor.resnet_v2_50,
'resnet_v2_101': center_net_resnet_feature_extractor.resnet_v2_101,
'resnet_v2_50':
center_net_resnet_feature_extractor.resnet_v2_50,
'resnet_v2_101':
center_net_resnet_feature_extractor.resnet_v2_101,
'resnet_v1_18_fpn':
center_net_resnet_v1_fpn_feature_extractor.resnet_v1_18_fpn,
'resnet_v1_34_fpn':
......@@ -154,6 +157,8 @@ if tf_version.is_tf2():
center_net_hourglass_feature_extractor.hourglass_104,
'mobilenet_v2':
center_net_mobilenet_v2_feature_extractor.mobilenet_v2,
'mobilenet_v2_fpn':
center_net_mobilenet_v2_fpn_feature_extractor.mobilenet_v2_fpn,
}
FEATURE_EXTRACTOR_MAPS = [
......@@ -936,6 +941,21 @@ def tracking_proto_to_params(tracking_config):
task_loss_weight=tracking_config.task_loss_weight)
def temporal_offset_proto_to_params(temporal_offset_config):
"""Converts CenterNet.TemporalOffsetEstimation proto to param-tuple."""
loss = losses_pb2.Loss()
# Add dummy classification loss to avoid the loss_builder throwing error.
# TODO(yuhuic): update the loss builder to take the classification loss
# directly.
loss.classification_loss.weighted_sigmoid.CopyFrom(
losses_pb2.WeightedSigmoidClassificationLoss())
loss.localization_loss.CopyFrom(temporal_offset_config.localization_loss)
_, localization_loss, _, _, _, _, _ = losses_builder.build(loss)
return center_net_meta_arch.TemporalOffsetParams(
localization_loss=localization_loss,
task_loss_weight=temporal_offset_config.task_loss_weight)
def _build_center_net_model(center_net_config, is_training, add_summaries):
"""Build a CenterNet detection model.
......@@ -998,6 +1018,11 @@ def _build_center_net_model(center_net_config, is_training, add_summaries):
track_params = tracking_proto_to_params(
center_net_config.track_estimation_task)
temporal_offset_params = None
if center_net_config.HasField('temporal_offset_task'):
temporal_offset_params = temporal_offset_proto_to_params(
center_net_config.temporal_offset_task)
return center_net_meta_arch.CenterNetMetaArch(
is_training=is_training,
add_summaries=add_summaries,
......@@ -1009,7 +1034,9 @@ def _build_center_net_model(center_net_config, is_training, add_summaries):
keypoint_params_dict=keypoint_params_dict,
mask_params=mask_params,
densepose_params=densepose_params,
track_params=track_params)
track_params=track_params,
temporal_offset_params=temporal_offset_params,
use_depthwise=center_net_config.use_depthwise)
def _build_center_net_feature_extractor(
......
......@@ -151,7 +151,10 @@ def clip_to_window(boxlist, window, filter_nonoverlapping=True, scope=None):
with tf.name_scope(scope, 'ClipToWindow'):
y_min, x_min, y_max, x_max = tf.split(
value=boxlist.get(), num_or_size_splits=4, axis=1)
win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
win_y_min = window[0]
win_x_min = window[1]
win_y_max = window[2]
win_x_max = window[3]
y_min_clipped = tf.maximum(tf.minimum(y_min, win_y_max), win_y_min)
y_max_clipped = tf.maximum(tf.minimum(y_max, win_y_max), win_y_min)
x_min_clipped = tf.maximum(tf.minimum(x_min, win_x_max), win_x_min)
......
......@@ -102,7 +102,8 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
Args:
field: a string key, options are
fields.BoxListFields.{boxes,classes,masks,keypoints,
keypoint_visibilities, densepose_*, track_ids}
keypoint_visibilities, densepose_*, track_ids,
temporal_offsets, track_match_flags}
fields.InputDataFields.is_annotated.
Returns:
......@@ -304,6 +305,8 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
groundtruth_dp_part_ids_list=None,
groundtruth_dp_surface_coords_list=None,
groundtruth_track_ids_list=None,
groundtruth_temporal_offsets_list=None,
groundtruth_track_match_flags_list=None,
groundtruth_weights_list=None,
groundtruth_confidences_list=None,
groundtruth_is_crowd_list=None,
......@@ -345,6 +348,12 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
padding.
groundtruth_track_ids_list: a list of 1-D tf.int32 tensors of shape
[num_boxes] containing the track IDs of groundtruth objects.
groundtruth_temporal_offsets_list: a list of 2-D tf.float32 tensors
of shape [num_boxes, 2] containing the spatial offsets of objects'
centers compared with the previous frame.
groundtruth_track_match_flags_list: a list of 1-D tf.float32 tensors
of shape [num_boxes] containing 0-1 flags that indicate if an object
has existed in the previous frame.
groundtruth_weights_list: A list of 1-D tf.float32 tensors of shape
[num_boxes] containing weights for groundtruth boxes.
groundtruth_confidences_list: A list of 2-D tf.float32 tensors of shape
......@@ -397,6 +406,14 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
if groundtruth_track_ids_list:
self._groundtruth_lists[
fields.BoxListFields.track_ids] = groundtruth_track_ids_list
if groundtruth_temporal_offsets_list:
self._groundtruth_lists[
fields.BoxListFields.temporal_offsets] = (
groundtruth_temporal_offsets_list)
if groundtruth_track_match_flags_list:
self._groundtruth_lists[
fields.BoxListFields.track_match_flags] = (
groundtruth_track_match_flags_list)
if groundtruth_is_crowd_list:
self._groundtruth_lists[
fields.BoxListFields.is_crowd] = groundtruth_is_crowd_list
......
......@@ -4143,6 +4143,7 @@ def random_scale_crop_and_pad_to_square(
label_weights,
masks=None,
keypoints=None,
label_confidences=None,
scale_min=0.1,
scale_max=2.0,
output_size=512,
......@@ -4176,6 +4177,8 @@ def random_scale_crop_and_pad_to_square(
as the input `image`.
keypoints: (optional) rank 3 float32 tensor with shape [num_instances,
num_keypoints, 2]. The keypoints are in y-x normalized coordinates.
label_confidences: (optional) float32 tensor of shape [num_instance]
representing the confidence for each box.
scale_min: float, the minimum value for the random scale factor.
scale_max: float, the maximum value for the random scale factor.
output_size: int, the desired (square) output image size.
......@@ -4191,9 +4194,8 @@ def random_scale_crop_and_pad_to_square(
label_weights: rank 1 float32 tensor with shape [num_instances].
masks: rank 3 float32 tensor with shape [num_instances, height, width]
containing instance masks.
label_confidences: confidences for retained boxes.
"""
img_shape = tf.shape(image)
input_height, input_width = img_shape[0], img_shape[1]
random_scale = tf.random_uniform([], scale_min, scale_max, seed=seed)
......@@ -4258,6 +4260,9 @@ def random_scale_crop_and_pad_to_square(
keypoints, [0.0, 0.0, 1.0, 1.0])
return_values.append(keypoints)
if label_confidences is not None:
return_values.append(tf.gather(label_confidences, indices))
return return_values
......@@ -4498,7 +4503,7 @@ def get_default_func_arg_map(include_label_weights=True,
fields.InputDataFields.groundtruth_boxes,
fields.InputDataFields.groundtruth_classes,
groundtruth_label_weights, groundtruth_instance_masks,
groundtruth_keypoints),
groundtruth_keypoints, groundtruth_label_confidences),
}
return prep_func_arg_map
......
......@@ -3931,6 +3931,32 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
self.assertAllClose(image[:, :, 0],
masks[0, :, :])
def test_random_scale_crop_and_pad_to_square_handles_confidences(self):
def graph_fn():
image = tf.zeros([10, 10, 1])
boxes = tf.constant([[0, 0, 0.5, 0.5], [0.5, 0.5, 0.75, 0.75]])
label_weights = tf.constant([1.0, 1.0])
box_labels = tf.constant([0, 1])
box_confidences = tf.constant([-1.0, 1.0])
(_, new_boxes, _, _,
new_confidences) = preprocessor.random_scale_crop_and_pad_to_square(
image,
boxes,
box_labels,
label_weights,
label_confidences=box_confidences,
scale_min=0.8,
scale_max=0.9,
output_size=10)
return new_boxes, new_confidences
boxes, confidences = self.execute_cpu(graph_fn, [])
self.assertLen(boxes, 2)
self.assertAllEqual(confidences, [-1.0, 1.0])
if __name__ == '__main__':
tf.test.main()
# Lint as: python3
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
......@@ -79,6 +80,39 @@ class IouSimilarity(RegionSimilarityCalculator):
return box_list_ops.iou(boxlist1, boxlist2)
class DETRSimilarity(RegionSimilarityCalculator):
"""Class to compute similarity for the Detection Transformer model.
This class computes pairwise DETR similarity between two BoxLists using a
weighted combination of GIOU, classification scores, and the L1 loss.
"""
def __init__(self, l1_weight=5, giou_weight=2):
super().__init__()
self.l1_weight = l1_weight
self.giou_weight = giou_weight
def _compare(self, boxlist1, boxlist2):
"""Compute pairwise DETR similarity between the two BoxLists.
Args:
boxlist1: BoxList holding N groundtruth boxes.
boxlist2: BoxList holding M predicted boxes.
Returns:
A tensor with shape [N, M] representing pairwise DETR similarity scores.
"""
groundtruth_labels = boxlist1.get_field(fields.BoxListFields.classes)
predicted_labels = boxlist2.get_field(fields.BoxListFields.classes)
classification_scores = tf.matmul(groundtruth_labels,
predicted_labels,
transpose_b=True)
loss = self.l1_weight * box_list_ops.l1(
boxlist1, boxlist2) + self.giou_weight * (1 - box_list_ops.giou(
boxlist1, boxlist2)) - classification_scores
return -loss
class NegSqDistSimilarity(RegionSimilarityCalculator):
"""Class to compute similarity based on the squared distance metric.
......
......@@ -93,6 +93,25 @@ class RegionSimilarityCalculatorTest(test_case.TestCase):
iou_output = self.execute(graph_fn, [])
self.assertAllClose(iou_output, exp_output)
def test_detr_similarity(self):
def graph_fn():
corners1 = tf.constant([[5.0, 7.0, 7.0, 9.0]])
corners2 = tf.constant([[5.0, 7.0, 7.0, 9.0], [5.0, 11.0, 7.0, 13.0]])
groundtruth_labels = tf.constant([[1.0, 0.0]])
predicted_labels = tf.constant([[0.0, 1000.0], [1000.0, 0.0]])
boxes1 = box_list.BoxList(corners1)
boxes2 = box_list.BoxList(corners2)
boxes1.add_field(fields.BoxListFields.classes, groundtruth_labels)
boxes2.add_field(fields.BoxListFields.classes, predicted_labels)
detr_similarity_calculator = \
region_similarity_calculator.DETRSimilarity()
detr_similarity = detr_similarity_calculator.compare(
boxes1, boxes2, None)
return detr_similarity
exp_output = [[0.0, -20 - 8.0/3.0 + 1000.0]]
sim_output = self.execute(graph_fn, [])
self.assertAllClose(sim_output, exp_output)
if __name__ == '__main__':
tf.test.main()
......@@ -47,6 +47,10 @@ class InputDataFields(object):
groundtruth_boxes: coordinates of the ground truth boxes in the image.
groundtruth_classes: box-level class labels.
groundtruth_track_ids: box-level track ID labels.
groundtruth_temporal_offset: box-level temporal offsets, i.e.,
movement of the box center in adjacent frames.
groundtruth_track_match_flags: box-level flags indicating if objects
exist in the previous frame.
groundtruth_confidences: box-level class confidences. The shape should be
the same as the shape of groundtruth_classes.
groundtruth_label_types: box-level label types (e.g. explicit negative).
......@@ -99,6 +103,8 @@ class InputDataFields(object):
groundtruth_boxes = 'groundtruth_boxes'
groundtruth_classes = 'groundtruth_classes'
groundtruth_track_ids = 'groundtruth_track_ids'
groundtruth_temporal_offset = 'groundtruth_temporal_offset'
groundtruth_track_match_flags = 'groundtruth_track_match_flags'
groundtruth_confidences = 'groundtruth_confidences'
groundtruth_label_types = 'groundtruth_label_types'
groundtruth_is_crowd = 'groundtruth_is_crowd'
......@@ -170,6 +176,7 @@ class DetectionResultFields(object):
detection_keypoints = 'detection_keypoints'
detection_keypoint_scores = 'detection_keypoint_scores'
detection_embeddings = 'detection_embeddings'
detection_offsets = 'detection_temporal_offsets'
num_detections = 'num_detections'
raw_detection_boxes = 'raw_detection_boxes'
raw_detection_scores = 'raw_detection_scores'
......@@ -194,6 +201,8 @@ class BoxListFields(object):
densepose_part_ids: DensePose part ids per bounding box.
densepose_surface_coords: DensePose surface coordinates per bounding box.
is_crowd: is_crowd annotation per bounding box.
temporal_offsets: temporal center offsets per bounding box.
track_match_flags: match flags per bounding box.
"""
boxes = 'boxes'
classes = 'classes'
......@@ -212,6 +221,8 @@ class BoxListFields(object):
is_crowd = 'is_crowd'
group_of = 'group_of'
track_ids = 'track_ids'
temporal_offsets = 'temporal_offsets'
track_match_flags = 'track_match_flags'
class PredictionFields(object):
......
......@@ -51,6 +51,7 @@ from object_detection.core import matcher as mat
from object_detection.core import region_similarity_calculator as sim_calc
from object_detection.core import standard_fields as fields
from object_detection.matchers import argmax_matcher
from object_detection.matchers import hungarian_matcher
from object_detection.utils import shape_utils
from object_detection.utils import target_assigner_utils as ta_utils
from object_detection.utils import tf_version
......@@ -510,7 +511,8 @@ def batch_assign(target_assigner,
anchors_batch, gt_box_batch, gt_class_targets_batch, gt_weights_batch):
(cls_targets, cls_weights,
reg_targets, reg_weights, match) = target_assigner.assign(
anchors, gt_boxes, gt_class_targets, unmatched_class_label, gt_weights)
anchors, gt_boxes, gt_class_targets, unmatched_class_label,
gt_weights)
cls_targets_list.append(cls_targets)
cls_weights_list.append(cls_weights)
reg_targets_list.append(reg_targets)
......@@ -1980,3 +1982,291 @@ class CenterNetCornerOffsetTargetAssigner(object):
return (tf.stack(corner_targets, axis=0),
tf.stack(foreground_targets, axis=0))
class CenterNetTemporalOffsetTargetAssigner(object):
"""Wrapper to compute target tensors for the temporal offset task.
This class has methods that take as input a batch of ground truth tensors
(in the form of a list) and returns the targets required to train the
temporal offset task.
"""
def __init__(self, stride):
"""Initializes the target assigner.
Args:
stride: int, the stride of the network in output pixels.
"""
self._stride = stride
def assign_temporal_offset_targets(self,
height,
width,
gt_boxes_list,
gt_offsets_list,
gt_match_list,
gt_weights_list=None):
"""Returns the temporal offset targets and their indices.
For each ground truth box, this function assigns it the corresponding
temporal offset to train the model.
Args:
height: int, height of input to the model. This is used to determine the
height of the output.
width: int, width of the input to the model. This is used to determine the
width of the output.
gt_boxes_list: A list of float tensors with shape [num_boxes, 4]
representing the groundtruth detection bounding boxes for each sample in
the batch. The coordinates are expected in normalized coordinates.
gt_offsets_list: A list of 2-D tf.float32 tensors of shape [num_boxes, 2]
containing the spatial offsets of objects' centers compared with the
previous frame.
gt_match_list: A list of 1-D tf.float32 tensors of shape [num_boxes]
containing flags that indicate if an object has existed in the
previous frame.
gt_weights_list: A list of tensors with shape [num_boxes] corresponding to
the weight of each groundtruth detection box.
Returns:
batch_indices: an integer tensor of shape [num_boxes, 3] holding the
indices inside the predicted tensor which should be penalized. The
first column indicates the index along the batch dimension and the
second and third columns indicate the index along the y and x
dimensions respectively.
batch_temporal_offsets: a float tensor of shape [num_boxes, 2] of the
expected y and x temporal offset of each object center in the
output space.
batch_weights: a float tensor of shape [num_boxes] indicating the
weight of each prediction.
"""
if gt_weights_list is None:
gt_weights_list = [None] * len(gt_boxes_list)
batch_indices = []
batch_weights = []
batch_temporal_offsets = []
for i, (boxes, offsets, match_flags, weights) in enumerate(zip(
gt_boxes_list, gt_offsets_list, gt_match_list, gt_weights_list)):
boxes = box_list.BoxList(boxes)
boxes = box_list_ops.to_absolute_coordinates(boxes,
height // self._stride,
width // self._stride)
# Get the box center coordinates. Each returned tensors have the shape of
# [num_boxes]
(y_center, x_center, _, _) = boxes.get_center_coordinates_and_sizes()
num_boxes = tf.shape(x_center)
# Compute the offsets and indices of the box centers. Shape:
# offsets: [num_boxes, 2]
# indices: [num_boxes, 2]
(_, indices) = ta_utils.compute_floor_offsets_with_indices(
y_source=y_center, x_source=x_center)
# Assign ones if weights are not provided.
# if an object is not matched, its weight becomes zero.
if weights is None:
weights = tf.ones(num_boxes, dtype=tf.float32)
weights *= match_flags
# Shape of [num_boxes, 1] integer tensor filled with current batch index.
batch_index = i * tf.ones_like(indices[:, 0:1], dtype=tf.int32)
batch_indices.append(tf.concat([batch_index, indices], axis=1))
batch_weights.append(weights)
batch_temporal_offsets.append(offsets)
batch_indices = tf.concat(batch_indices, axis=0)
batch_weights = tf.concat(batch_weights, axis=0)
batch_temporal_offsets = tf.concat(batch_temporal_offsets, axis=0)
return (batch_indices, batch_temporal_offsets, batch_weights)
class DETRTargetAssigner(object):
"""Target assigner for DETR (https://arxiv.org/abs/2005.12872).
Detection Transformer (DETR) matches predicted boxes to groundtruth directly
to determine targets instead of matching anchors to groundtruth. Hence, the
new target assigner.
"""
def __init__(self):
"""Construct Object Detection Target Assigner."""
self._similarity_calc = sim_calc.DETRSimilarity()
self._matcher = hungarian_matcher.HungarianBipartiteMatcher()
def batch_assign(self,
pred_box_batch,
gt_box_batch,
pred_class_batch,
gt_class_targets_batch,
gt_weights_batch=None,
unmatched_class_label_batch=None):
"""Batched assignment of classification and regression targets.
Args:
pred_box_batch: a tensor of shape [batch_size, num_queries, 4]
representing predicted bounding boxes.
gt_box_batch: a tensor of shape [batch_size, num_queries, 4]
representing groundtruth bounding boxes.
pred_class_batch: A list of tensors with length batch_size, where each
each tensor has shape [num_queries, num_classes] to be used
by certain similarity calculators.
gt_class_targets_batch: a list of tensors with length batch_size, where
each tensor has shape [num_gt_boxes_i, num_classes] and
num_gt_boxes_i is the number of boxes in the ith boxlist of
gt_box_batch.
gt_weights_batch: A list of 1-D tf.float32 tensors of shape
[num_boxes] containing weights for groundtruth boxes.
unmatched_class_label_batch: a float32 tensor with shape
[d_1, d_2, ..., d_k] which is consistent with the classification target
for each anchor (and can be empty for scalar targets). This shape must
thus be compatible with the `gt_class_targets_batch`.
Returns:
batch_cls_targets: a tensor with shape [batch_size, num_pred_boxes,
num_classes],
batch_cls_weights: a tensor with shape [batch_size, num_pred_boxes,
num_classes],
batch_reg_targets: a tensor with shape [batch_size, num_pred_boxes,
box_code_dimension]
batch_reg_weights: a tensor with shape [batch_size, num_pred_boxes].
"""
pred_box_batch = [
box_list.BoxList(pred_box)
for pred_box in tf.unstack(pred_box_batch)]
gt_box_batch = [
box_list.BoxList(gt_box)
for gt_box in tf.unstack(gt_box_batch)]
cls_targets_list = []
cls_weights_list = []
reg_targets_list = []
reg_weights_list = []
if gt_weights_batch is None:
gt_weights_batch = [None] * len(gt_class_targets_batch)
if unmatched_class_label_batch is None:
unmatched_class_label_batch = [None] * len(gt_class_targets_batch)
pred_class_batch = tf.unstack(pred_class_batch)
for (pred_boxes, gt_boxes, pred_class_batch, gt_class_targets, gt_weights,
unmatched_class_label) in zip(pred_box_batch, gt_box_batch,
pred_class_batch, gt_class_targets_batch,
gt_weights_batch,
unmatched_class_label_batch):
(cls_targets, cls_weights, reg_targets,
reg_weights) = self.assign(pred_boxes, gt_boxes, pred_class_batch,
gt_class_targets, gt_weights,
unmatched_class_label)
cls_targets_list.append(cls_targets)
cls_weights_list.append(cls_weights)
reg_targets_list.append(reg_targets)
reg_weights_list.append(reg_weights)
batch_cls_targets = tf.stack(cls_targets_list)
batch_cls_weights = tf.stack(cls_weights_list)
batch_reg_targets = tf.stack(reg_targets_list)
batch_reg_weights = tf.stack(reg_weights_list)
return (batch_cls_targets, batch_cls_weights, batch_reg_targets,
batch_reg_weights)
def assign(self,
pred_boxes,
gt_boxes,
pred_classes,
gt_labels,
gt_weights=None,
unmatched_class_label=None):
"""Assign classification and regression targets to each box_pred.
For a given set of pred_boxes and groundtruth detections, match pred_boxes
to gt_boxes and assign classification and regression targets to
each box_pred as well as weights based on the resulting match (specifying,
e.g., which pred_boxes should not contribute to training loss).
pred_boxes that are not matched to anything are given a classification
target of `unmatched_cls_target`.
Args:
pred_boxes: a BoxList representing N pred_boxes
gt_boxes: a BoxList representing M groundtruth boxes
pred_classes: A tensor with shape [max_num_boxes, num_classes]
to be used by certain similarity calculators.
gt_labels: a tensor of shape [M, num_classes]
with labels for each of the ground_truth boxes. The subshape
[num_classes] can be empty (corresponding to scalar inputs). When set
to None, gt_labels assumes a binary problem where all
ground_truth boxes get a positive label (of 1).
gt_weights: a float tensor of shape [M] indicating the weight to
assign to all pred_boxes match to a particular groundtruth box. The
weights must be in [0., 1.]. If None, all weights are set to 1.
Generally no groundtruth boxes with zero weight match to any pred_boxes
as matchers are aware of groundtruth weights. Additionally,
`cls_weights` and `reg_weights` are calculated using groundtruth
weights as an added safety.
unmatched_class_label: a float32 tensor with shape [d_1, d_2, ..., d_k]
which is consistent with the classification target for each
anchor (and can be empty for scalar targets). This shape must thus be
compatible with the groundtruth labels that are passed to the "assign"
function (which have shape [num_gt_boxes, d_1, d_2, ..., d_k]).
Returns:
cls_targets: a float32 tensor with shape [num_pred_boxes, num_classes],
where the subshape [num_classes] is compatible with gt_labels
which has shape [num_gt_boxes, num_classes].
cls_weights: a float32 tensor with shape [num_pred_boxes, num_classes],
representing weights for each element in cls_targets.
reg_targets: a float32 tensor with shape [num_pred_boxes,
box_code_dimension]
reg_weights: a float32 tensor with shape [num_pred_boxes]
"""
if not unmatched_class_label:
unmatched_class_label = tf.constant(
[1] + [0] * (gt_labels.shape[1] - 1), tf.float32)
if gt_weights is None:
num_gt_boxes = gt_boxes.num_boxes_static()
if not num_gt_boxes:
num_gt_boxes = gt_boxes.num_boxes()
gt_weights = tf.ones([num_gt_boxes], dtype=tf.float32)
gt_boxes.add_field(fields.BoxListFields.classes, gt_labels)
pred_boxes.add_field(fields.BoxListFields.classes, pred_classes)
match_quality_matrix = self._similarity_calc.compare(
gt_boxes,
pred_boxes)
match = self._matcher.match(match_quality_matrix,
valid_rows=tf.greater(gt_weights, 0))
matched_gt_boxes = match.gather_based_on_match(
gt_boxes.get(),
unmatched_value=tf.zeros(4),
ignored_value=tf.zeros(4))
matched_gt_boxlist = box_list.BoxList(matched_gt_boxes)
ty, tx, th, tw = matched_gt_boxlist.get_center_coordinates_and_sizes()
reg_targets = tf.transpose(tf.stack([ty, tx, th, tw]))
cls_targets = match.gather_based_on_match(
gt_labels,
unmatched_value=unmatched_class_label,
ignored_value=unmatched_class_label)
reg_weights = match.gather_based_on_match(
gt_weights,
ignored_value=0.,
unmatched_value=0.)
cls_weights = match.gather_based_on_match(
gt_weights,
ignored_value=0.,
unmatched_value=1)
# convert cls_weights from per-box_pred to per-class.
class_label_shape = tf.shape(cls_targets)[1:]
weights_multiple = tf.concat(
[tf.constant([1]), class_label_shape],
axis=0)
cls_weights = tf.expand_dims(cls_weights, -1)
cls_weights = tf.tile(cls_weights, weights_multiple)
return (cls_targets, cls_weights, reg_targets, reg_weights)
......@@ -115,6 +115,7 @@ class TargetAssignerTest(test_case.TestCase):
self.assertEqual(reg_weights_out.dtype, np.float32)
def test_assign_agnostic_with_keypoints(self):
def graph_fn(anchor_means, groundtruth_box_corners,
groundtruth_keypoints):
similarity_calc = region_similarity_calculator.IouSimilarity()
......@@ -2290,6 +2291,215 @@ class CornerOffsetTargetAssignerTest(test_case.TestCase):
self.assertAllClose(foreground, np.zeros((1, 5, 5)))
class CenterNetTemporalOffsetTargetAssigner(test_case.TestCase):
def setUp(self):
super(CenterNetTemporalOffsetTargetAssigner, self).setUp()
self._box_center = [0.0, 0.0, 1.0, 1.0]
self._box_center_small = [0.25, 0.25, 0.75, 0.75]
self._box_lower_left = [0.5, 0.0, 1.0, 0.5]
self._box_center_offset = [0.1, 0.05, 1.0, 1.0]
self._box_odd_coordinates = [0.1625, 0.2125, 0.5625, 0.9625]
self._offset_center = [0.5, 0.4]
self._offset_center_small = [0.1, 0.1]
self._offset_lower_left = [-0.1, 0.1]
self._offset_center_offset = [0.4, 0.3]
self._offset_odd_coord = [0.125, -0.125]
def test_assign_empty_groundtruths(self):
"""Tests the assign_offset_targets function with empty inputs."""
def graph_fn():
box_batch = [
tf.zeros((0, 4), dtype=tf.float32),
]
offset_batch = [
tf.zeros((0, 2), dtype=tf.float32),
]
match_flag_batch = [
tf.zeros((0), dtype=tf.float32),
]
assigner = targetassigner.CenterNetTemporalOffsetTargetAssigner(4)
indices, temporal_offset, weights = assigner.assign_temporal_offset_targets(
80, 80, box_batch, offset_batch, match_flag_batch)
return indices, temporal_offset, weights
indices, temporal_offset, weights = self.execute(graph_fn, [])
self.assertEqual(indices.shape, (0, 3))
self.assertEqual(temporal_offset.shape, (0, 2))
self.assertEqual(weights.shape, (0,))
def test_assign_offset_targets(self):
"""Tests the assign_offset_targets function."""
def graph_fn():
box_batch = [
tf.constant([self._box_center, self._box_lower_left]),
tf.constant([self._box_center_offset]),
tf.constant([self._box_center_small, self._box_odd_coordinates]),
]
offset_batch = [
tf.constant([self._offset_center, self._offset_lower_left]),
tf.constant([self._offset_center_offset]),
tf.constant([self._offset_center_small, self._offset_odd_coord]),
]
match_flag_batch = [
tf.constant([1.0, 1.0]),
tf.constant([1.0]),
tf.constant([1.0, 1.0]),
]
assigner = targetassigner.CenterNetTemporalOffsetTargetAssigner(4)
indices, temporal_offset, weights = assigner.assign_temporal_offset_targets(
80, 80, box_batch, offset_batch, match_flag_batch)
return indices, temporal_offset, weights
indices, temporal_offset, weights = self.execute(graph_fn, [])
self.assertEqual(indices.shape, (5, 3))
self.assertEqual(temporal_offset.shape, (5, 2))
self.assertEqual(weights.shape, (5,))
np.testing.assert_array_equal(
indices,
[[0, 10, 10], [0, 15, 5], [1, 11, 10], [2, 10, 10], [2, 7, 11]])
np.testing.assert_array_almost_equal(
temporal_offset,
[[0.5, 0.4], [-0.1, 0.1], [0.4, 0.3], [0.1, 0.1], [0.125, -0.125]])
np.testing.assert_array_equal(weights, 1)
def test_assign_offset_targets_with_match_flags(self):
"""Tests the assign_offset_targets function with match flags."""
def graph_fn():
box_batch = [
tf.constant([self._box_center, self._box_lower_left]),
tf.constant([self._box_center_offset]),
tf.constant([self._box_center_small, self._box_odd_coordinates]),
]
offset_batch = [
tf.constant([self._offset_center, self._offset_lower_left]),
tf.constant([self._offset_center_offset]),
tf.constant([self._offset_center_small, self._offset_odd_coord]),
]
match_flag_batch = [
tf.constant([0.0, 1.0]),
tf.constant([1.0]),
tf.constant([1.0, 1.0]),
]
cn_assigner = targetassigner.CenterNetTemporalOffsetTargetAssigner(4)
weights_batch = [
tf.constant([1.0, 0.0]),
tf.constant([1.0]),
tf.constant([1.0, 1.0])
]
indices, temporal_offset, weights = cn_assigner.assign_temporal_offset_targets(
80, 80, box_batch, offset_batch, match_flag_batch, weights_batch)
return indices, temporal_offset, weights
indices, temporal_offset, weights = self.execute(graph_fn, [])
self.assertEqual(indices.shape, (5, 3))
self.assertEqual(temporal_offset.shape, (5, 2))
self.assertEqual(weights.shape, (5,))
np.testing.assert_array_equal(
indices,
[[0, 10, 10], [0, 15, 5], [1, 11, 10], [2, 10, 10], [2, 7, 11]])
np.testing.assert_array_almost_equal(
temporal_offset,
[[0.5, 0.4], [-0.1, 0.1], [0.4, 0.3], [0.1, 0.1], [0.125, -0.125]])
np.testing.assert_array_equal(weights, [0, 0, 1, 1, 1])
class DETRTargetAssignerTest(test_case.TestCase):
def test_assign_detr(self):
def graph_fn(pred_corners, groundtruth_box_corners,
groundtruth_labels, predicted_labels):
detr_target_assigner = targetassigner.DETRTargetAssigner()
pred_boxlist = box_list.BoxList(pred_corners)
groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners)
result = detr_target_assigner.assign(
pred_boxlist, groundtruth_boxlist,
predicted_labels, groundtruth_labels)
(cls_targets, cls_weights, reg_targets, reg_weights) = result
return (cls_targets, cls_weights, reg_targets, reg_weights)
pred_corners = np.array([[0.25, 0.25, 0.4, 0.2],
[0.5, 0.8, 1.0, 0.8],
[0.9, 0.5, 0.1, 1.0]], dtype=np.float32)
groundtruth_box_corners = np.array([[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.9, 0.9]],
dtype=np.float32)
predicted_labels = np.array([[-3.0, 3.0], [2.0, 9.4], [5.0, 1.0]],
dtype=np.float32)
groundtruth_labels = np.array([[0.0, 1.0], [0.0, 1.0]],
dtype=np.float32)
exp_cls_targets = [[0, 1], [0, 1], [1, 0]]
exp_cls_weights = [[1, 1], [1, 1], [1, 1]]
exp_reg_targets = [[0.25, 0.25, 0.5, 0.5],
[0.7, 0.7, 0.4, 0.4],
[0, 0, 0, 0]]
exp_reg_weights = [1, 1, 0]
(cls_targets_out,
cls_weights_out, reg_targets_out, reg_weights_out) = self.execute_cpu(
graph_fn, [pred_corners, groundtruth_box_corners,
groundtruth_labels, predicted_labels])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertEqual(cls_targets_out.dtype, np.float32)
self.assertEqual(cls_weights_out.dtype, np.float32)
self.assertEqual(reg_targets_out.dtype, np.float32)
self.assertEqual(reg_weights_out.dtype, np.float32)
def test_batch_assign_detr(self):
def graph_fn(pred_corners, groundtruth_box_corners,
groundtruth_labels, predicted_labels):
detr_target_assigner = targetassigner.DETRTargetAssigner()
result = detr_target_assigner.batch_assign(
pred_corners, groundtruth_box_corners,
[predicted_labels], [groundtruth_labels])
(cls_targets, cls_weights, reg_targets, reg_weights) = result
return (cls_targets, cls_weights, reg_targets, reg_weights)
pred_corners = np.array([[[0.25, 0.25, 0.4, 0.2],
[0.5, 0.8, 1.0, 0.8],
[0.9, 0.5, 0.1, 1.0]]], dtype=np.float32)
groundtruth_box_corners = np.array([[[0.0, 0.0, 0.5, 0.5],
[0.5, 0.5, 0.9, 0.9]]],
dtype=np.float32)
predicted_labels = np.array([[-3.0, 3.0], [2.0, 9.4], [5.0, 1.0]],
dtype=np.float32)
groundtruth_labels = np.array([[0.0, 1.0], [0.0, 1.0]],
dtype=np.float32)
exp_cls_targets = [[[0, 1], [0, 1], [1, 0]]]
exp_cls_weights = [[[1, 1], [1, 1], [1, 1]]]
exp_reg_targets = [[[0.25, 0.25, 0.5, 0.5],
[0.7, 0.7, 0.4, 0.4],
[0, 0, 0, 0]]]
exp_reg_weights = [[1, 1, 0]]
(cls_targets_out,
cls_weights_out, reg_targets_out, reg_weights_out) = self.execute_cpu(
graph_fn, [pred_corners, groundtruth_box_corners,
groundtruth_labels, predicted_labels])
self.assertAllClose(cls_targets_out, exp_cls_targets)
self.assertAllClose(cls_weights_out, exp_cls_weights)
self.assertAllClose(reg_targets_out, exp_reg_targets)
self.assertAllClose(reg_weights_out, exp_reg_weights)
self.assertEqual(cls_targets_out.dtype, np.float32)
self.assertEqual(cls_weights_out.dtype, np.float32)
self.assertEqual(reg_targets_out.dtype, np.float32)
self.assertEqual(reg_weights_out.dtype, np.float32)
if __name__ == '__main__':
tf.enable_v2_behavior()
tf.test.main()
# Running TF2 Detection API Models on mobile
[![TensorFlow 2.2](https://img.shields.io/badge/TensorFlow-2.2-FF6F00?logo=tensorflow)](https://github.com/tensorflow/tensorflow/releases/tag/v2.2.0)
[![TensorFlow 2.3](https://img.shields.io/badge/TensorFlow-2.3-FF6F00?logo=tensorflow)](https://github.com/tensorflow/tensorflow/releases/tag/v2.3.0)
[![Python 3.6](https://img.shields.io/badge/Python-3.6-3776AB)](https://www.python.org/downloads/release/python-360/)
**NOTE:** This support was added *after* TF2.3, so please use the latest nightly
for the TensorFlow Lite Converter for this to work.
[TensorFlow Lite](https://www.tensorflow.org/mobile/tflite/)(TFLite) is
TensorFlow’s lightweight solution for mobile and embedded devices. It enables
on-device machine learning inference with low latency and a small binary size.
......@@ -54,16 +57,30 @@ python object_detection/export_tflite_graph_tf2.py \
--output_directory path/to/exported_model_directory
```
Use `--help` with the aboev script to get the full list of supported parameters.
Use `--help` with the above script to get the full list of supported parameters.
These can fine-tune accuracy and speed for your model.
### Step 2: Convert to TFLite
Use the [TensorFlow Lite Converter](https://www.tensorflow.org/lite/convert) to
convert the `SavedModel` to TFLite. You can also leverage
convert the `SavedModel` to TFLite. Note that you need to use `from_saved_model`
for TFLite conversion with the Python API.
You can also leverage
[Post-training Quantization](https://www.tensorflow.org/lite/performance/post_training_quantization)
to
[optimize performance](https://www.tensorflow.org/lite/performance/model_optimization)
and obtain a smaller model.
and obtain a smaller model. Note that this is only possible from the *Python
API*. Be sure to use a
[representative dataset](https://www.tensorflow.org/lite/performance/post_training_quantization#full_integer_quantization)
and set the following options on the converter:
```
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8,
tf.lite.OpsSet.TFLITE_BUILTINS]
converter.representative_dataset = <...>
```
## Running our model on Android
......
......@@ -15,7 +15,7 @@ They are also useful for initializing your models when training on novel
datasets. You can try this out on our few-shot training
[colab](../colab_tutorials/eager_few_shot_od_training_tf2_colab.ipynb).
Please look at [this guide](../running_on_mobile_tf2.md) for mobile inference.
Please look at [this guide](running_on_mobile_tf2.md) for mobile inference.
<!-- mdlint on -->
......
......@@ -139,7 +139,7 @@ class CenterNetFeatureExtractor(tf.keras.Model):
def make_prediction_net(num_out_channels, kernel_size=3, num_filters=256,
bias_fill=None):
bias_fill=None, use_depthwise=False, name=None):
"""Creates a network to predict the given number of output channels.
This function is intended to make the prediction heads for the CenterNet
......@@ -151,12 +151,19 @@ def make_prediction_net(num_out_channels, kernel_size=3, num_filters=256,
num_filters: The number of filters in the intermediate conv layer.
bias_fill: If not None, is used to initialize the bias in the final conv
layer.
use_depthwise: If true, use SeparableConv2D to construct the Sequential
layers instead of Conv2D.
name: Optional name for the prediction net.
Returns:
net: A keras module which when called on an input tensor of size
[batch_size, height, width, num_in_channels] returns an output
of size [batch_size, height, width, num_out_channels]
"""
if use_depthwise:
conv_fn = tf.keras.layers.SeparableConv2D
else:
conv_fn = tf.keras.layers.Conv2D
out_conv = tf.keras.layers.Conv2D(num_out_channels, kernel_size=1)
......@@ -164,11 +171,10 @@ def make_prediction_net(num_out_channels, kernel_size=3, num_filters=256,
out_conv.bias_initializer = tf.keras.initializers.constant(bias_fill)
net = tf.keras.Sequential(
[tf.keras.layers.Conv2D(num_filters, kernel_size=kernel_size,
padding='same'),
[conv_fn(num_filters, kernel_size=kernel_size, padding='same'),
tf.keras.layers.ReLU(),
out_conv]
)
out_conv],
name=name)
return net
......@@ -329,6 +335,39 @@ def prediction_tensors_to_boxes(detection_scores, y_indices, x_indices,
return boxes, detection_classes, detection_scores, num_detections
def prediction_tensors_to_temporal_offsets(
y_indices, x_indices, offset_predictions):
"""Converts CenterNet temporal offset map predictions to batched format.
This function is similiar to the box offset conversion function, as both
temporal offsets and box offsets are size-2 vectors.
Args:
y_indices: A [batch, num_boxes] int32 tensor with y indices corresponding to
object center locations (expressed in output coordinate frame).
x_indices: A [batch, num_boxes] int32 tensor with x indices corresponding to
object center locations (expressed in output coordinate frame).
offset_predictions: A float tensor of shape [batch_size, height, width, 2]
representing the y and x offsets of a box's center across adjacent frames.
Returns:
offsets: A tensor of shape [batch_size, num_boxes, 2] holding the
the object temporal offsets of (y, x) dimensions.
"""
_, _, width, _ = _get_shape(offset_predictions, 4)
peak_spatial_indices = flattened_indices_from_row_col_indices(
y_indices, x_indices, width)
y_indices = _to_float32(y_indices)
x_indices = _to_float32(x_indices)
offsets_flat = _flatten_spatial_dimensions(offset_predictions)
offsets = tf.gather(offsets_flat, peak_spatial_indices, batch_dims=1)
return offsets
def prediction_tensors_to_keypoint_candidates(
keypoint_heatmap_predictions,
keypoint_heatmap_offsets,
......@@ -555,15 +594,23 @@ def refine_keypoints(regressed_keypoints, keypoint_candidates, keypoint_scores,
# Pairwise squared distances between regressed keypoints and candidate
# keypoints (for a single keypoint type).
# Shape [batch_size, num_instances, max_candidates, num_keypoints].
# Shape [batch_size, num_instances, 1, num_keypoints, 2].
regressed_keypoint_expanded = tf.expand_dims(regressed_keypoints,
axis=2)
# Shape [batch_size, 1, max_candidates, num_keypoints, 2].
keypoint_candidates_expanded = tf.expand_dims(
keypoint_candidates_with_nans, axis=1)
sqrd_distances = tf.math.reduce_sum(
tf.math.squared_difference(regressed_keypoint_expanded,
keypoint_candidates_expanded),
axis=-1)
# Use explicit tensor shape broadcasting (since the tensor dimensions are
# expanded to 5D) to make it tf.lite compatible.
regressed_keypoint_expanded = tf.tile(
regressed_keypoint_expanded, multiples=[1, 1, max_candidates, 1, 1])
keypoint_candidates_expanded = tf.tile(
keypoint_candidates_expanded, multiples=[1, num_instances, 1, 1, 1])
# Replace tf.math.squared_difference by "-" operator and tf.multiply ops since
# tf.lite convert doesn't support squared_difference with undetermined
# dimension.
diff = regressed_keypoint_expanded - keypoint_candidates_expanded
sqrd_distances = tf.math.reduce_sum(tf.multiply(diff, diff), axis=-1)
distances = tf.math.sqrt(sqrd_distances)
# Determine the candidates that have the minimum distance to the regressed
......@@ -935,9 +982,16 @@ def convert_strided_predictions_to_normalized_keypoints(
def clip_to_window(inputs):
keypoints, window = inputs
return keypoint_ops.clip_to_window(keypoints, window)
# Specify the TensorSpec explicitly in the tf.map_fn to make it tf.lite
# compatible.
kpts_dims = _get_shape(keypoint_coords_normalized, 4)
output_spec = tf.TensorSpec(
shape=[kpts_dims[1], kpts_dims[2], kpts_dims[3]], dtype=tf.float32)
keypoint_coords_normalized = tf.map_fn(
clip_to_window, (keypoint_coords_normalized, batch_window),
dtype=tf.float32, back_prop=False)
dtype=tf.float32, back_prop=False,
fn_output_signature=output_spec)
keypoint_scores = tf.where(valid_indices, keypoint_scores,
tf.zeros_like(keypoint_scores))
return keypoint_coords_normalized, keypoint_scores
......@@ -1534,6 +1588,32 @@ class TrackParams(
num_fc_layers, classification_loss,
task_loss_weight)
class TemporalOffsetParams(
collections.namedtuple('TemporalOffsetParams', [
'localization_loss', 'task_loss_weight'
])):
"""Namedtuple to store temporal offset related parameters."""
__slots__ = ()
def __new__(cls,
localization_loss,
task_loss_weight=1.0):
"""Constructor with default values for TrackParams.
Args:
localization_loss: an object_detection.core.losses.Loss object to
compute the loss for the temporal offset in CenterNet.
task_loss_weight: float, the loss weight for the temporal offset
task.
Returns:
An initialized TemporalOffsetParams namedtuple.
"""
return super(TemporalOffsetParams,
cls).__new__(cls, localization_loss, task_loss_weight)
# The following constants are used to generate the keys of the
# (prediction, loss, target assigner,...) dictionaries used in CenterNetMetaArch
# class.
......@@ -1552,6 +1632,8 @@ DENSEPOSE_REGRESSION = 'densepose/regression'
LOSS_KEY_PREFIX = 'Loss'
TRACK_TASK = 'track_task'
TRACK_REID = 'track/reid'
TEMPORALOFFSET_TASK = 'temporal_offset_task'
TEMPORAL_OFFSET = 'track/offset'
def get_keypoint_name(task_name, head_name):
......@@ -1596,7 +1678,9 @@ class CenterNetMetaArch(model.DetectionModel):
keypoint_params_dict=None,
mask_params=None,
densepose_params=None,
track_params=None):
track_params=None,
temporal_offset_params=None,
use_depthwise=False):
"""Initializes a CenterNet model.
Args:
......@@ -1631,6 +1715,10 @@ class CenterNetMetaArch(model.DetectionModel):
track_params: A TrackParams namedtuple. This object
holds the hyper-parameters for tracking. Please see the class
definition for more details.
temporal_offset_params: A TemporalOffsetParams namedtuple. This object
holds the hyper-parameters for offset prediction based tracking.
use_depthwise: If true, all task heads will be constructed using
separable_conv. Otherwise, standard convoltuions will be used.
"""
assert object_detection_params or keypoint_params_dict
# Shorten the name for convenience and better formatting.
......@@ -1651,6 +1739,9 @@ class CenterNetMetaArch(model.DetectionModel):
'be supplied.')
self._densepose_params = densepose_params
self._track_params = track_params
self._temporal_offset_params = temporal_offset_params
self._use_depthwise = use_depthwise
# Construct the prediction head nets.
self._prediction_head_dict = self._construct_prediction_heads(
......@@ -1695,58 +1786,75 @@ class CenterNetMetaArch(model.DetectionModel):
"""
prediction_heads = {}
prediction_heads[OBJECT_CENTER] = [
make_prediction_net(num_classes, bias_fill=class_prediction_bias_init)
make_prediction_net(num_classes, bias_fill=class_prediction_bias_init,
use_depthwise=self._use_depthwise)
for _ in range(num_feature_outputs)
]
if self._od_params is not None:
prediction_heads[BOX_SCALE] = [
make_prediction_net(NUM_SIZE_CHANNELS)
make_prediction_net(
NUM_SIZE_CHANNELS, use_depthwise=self._use_depthwise)
for _ in range(num_feature_outputs)
]
prediction_heads[BOX_OFFSET] = [
make_prediction_net(NUM_OFFSET_CHANNELS)
make_prediction_net(
NUM_OFFSET_CHANNELS, use_depthwise=self._use_depthwise)
for _ in range(num_feature_outputs)
]
if self._kp_params_dict is not None:
for task_name, kp_params in self._kp_params_dict.items():
num_keypoints = len(kp_params.keypoint_indices)
# pylint: disable=g-complex-comprehension
prediction_heads[get_keypoint_name(task_name, KEYPOINT_HEATMAP)] = [
make_prediction_net(
num_keypoints, bias_fill=kp_params.heatmap_bias_init)
num_keypoints,
bias_fill=kp_params.heatmap_bias_init,
use_depthwise=self._use_depthwise)
for _ in range(num_feature_outputs)
]
# pylint: enable=g-complex-comprehension
prediction_heads[get_keypoint_name(task_name, KEYPOINT_REGRESSION)] = [
make_prediction_net(NUM_OFFSET_CHANNELS * num_keypoints)
make_prediction_net(NUM_OFFSET_CHANNELS * num_keypoints,
use_depthwise=self._use_depthwise)
for _ in range(num_feature_outputs)
]
if kp_params.per_keypoint_offset:
prediction_heads[get_keypoint_name(task_name, KEYPOINT_OFFSET)] = [
make_prediction_net(NUM_OFFSET_CHANNELS * num_keypoints)
make_prediction_net(NUM_OFFSET_CHANNELS * num_keypoints,
use_depthwise=self._use_depthwise)
for _ in range(num_feature_outputs)
]
else:
prediction_heads[get_keypoint_name(task_name, KEYPOINT_OFFSET)] = [
make_prediction_net(NUM_OFFSET_CHANNELS)
make_prediction_net(NUM_OFFSET_CHANNELS,
use_depthwise=self._use_depthwise)
for _ in range(num_feature_outputs)
]
# pylint: disable=g-complex-comprehension
if self._mask_params is not None:
prediction_heads[SEGMENTATION_HEATMAP] = [
make_prediction_net(num_classes,
bias_fill=self._mask_params.heatmap_bias_init)
make_prediction_net(
num_classes,
bias_fill=self._mask_params.heatmap_bias_init,
use_depthwise=self._use_depthwise)
for _ in range(num_feature_outputs)]
if self._densepose_params is not None:
prediction_heads[DENSEPOSE_HEATMAP] = [
make_prediction_net( # pylint: disable=g-complex-comprehension
make_prediction_net(
self._densepose_params.num_parts,
bias_fill=self._densepose_params.heatmap_bias_init)
bias_fill=self._densepose_params.heatmap_bias_init,
use_depthwise=self._use_depthwise)
for _ in range(num_feature_outputs)]
prediction_heads[DENSEPOSE_REGRESSION] = [
make_prediction_net(2 * self._densepose_params.num_parts)
make_prediction_net(2 * self._densepose_params.num_parts,
use_depthwise=self._use_depthwise)
for _ in range(num_feature_outputs)
]
# pylint: enable=g-complex-comprehension
if self._track_params is not None:
prediction_heads[TRACK_REID] = [
make_prediction_net(self._track_params.reid_embed_size)
make_prediction_net(self._track_params.reid_embed_size,
use_depthwise=self._use_depthwise)
for _ in range(num_feature_outputs)]
# Creates a classification network to train object embeddings by learning
......@@ -1764,6 +1872,12 @@ class CenterNetMetaArch(model.DetectionModel):
tf.keras.layers.Dense(self._track_params.num_track_ids,
input_shape=(
self._track_params.reid_embed_size,)))
if self._temporal_offset_params is not None:
prediction_heads[TEMPORAL_OFFSET] = [
make_prediction_net(NUM_OFFSET_CHANNELS,
use_depthwise=self._use_depthwise)
for _ in range(num_feature_outputs)
]
return prediction_heads
def _initialize_target_assigners(self, stride, min_box_overlap_iou):
......@@ -1806,6 +1920,9 @@ class CenterNetMetaArch(model.DetectionModel):
target_assigners[TRACK_TASK] = (
cn_assigner.CenterNetTrackTargetAssigner(
stride, self._track_params.num_track_ids))
if self._temporal_offset_params is not None:
target_assigners[TEMPORALOFFSET_TASK] = (
cn_assigner.CenterNetTemporalOffsetTargetAssigner(stride))
return target_assigners
......@@ -2394,6 +2511,54 @@ class CenterNetMetaArch(model.DetectionModel):
return loss_per_instance
def _compute_temporal_offset_loss(self, input_height,
input_width, prediction_dict):
"""Computes the temporal offset loss for tracking.
Args:
input_height: An integer scalar tensor representing input image height.
input_width: An integer scalar tensor representing input image width.
prediction_dict: The dictionary returned from the predict() method.
Returns:
A dictionary with track/temporal_offset losses.
"""
gt_boxes_list = self.groundtruth_lists(fields.BoxListFields.boxes)
gt_offsets_list = self.groundtruth_lists(
fields.BoxListFields.temporal_offsets)
gt_match_list = self.groundtruth_lists(
fields.BoxListFields.track_match_flags)
gt_weights_list = self.groundtruth_lists(fields.BoxListFields.weights)
num_boxes = tf.cast(
get_num_instances_from_weights(gt_weights_list), tf.float32)
offset_predictions = prediction_dict[TEMPORAL_OFFSET]
num_predictions = float(len(offset_predictions))
assigner = self._target_assigner_dict[TEMPORALOFFSET_TASK]
(batch_indices, batch_offset_targets,
batch_weights) = assigner.assign_temporal_offset_targets(
height=input_height,
width=input_width,
gt_boxes_list=gt_boxes_list,
gt_offsets_list=gt_offsets_list,
gt_match_list=gt_match_list,
gt_weights_list=gt_weights_list)
batch_weights = tf.expand_dims(batch_weights, -1)
offset_loss_fn = self._temporal_offset_params.localization_loss
loss_dict = {}
offset_loss = 0
for offset_pred in offset_predictions:
offset_pred = cn_assigner.get_batch_predictions_from_indices(
offset_pred, batch_indices)
offset_loss += offset_loss_fn(offset_pred[:, None],
batch_offset_targets[:, None],
weights=batch_weights)
offset_loss = tf.reduce_sum(offset_loss) / (num_predictions * num_boxes)
loss_dict[TEMPORAL_OFFSET] = offset_loss
return loss_dict
def preprocess(self, inputs):
outputs = shape_utils.resize_images_and_return_shapes(
inputs, self._image_resizer_fn)
......@@ -2490,6 +2655,7 @@ class CenterNetMetaArch(model.DetectionModel):
'Loss/densepose/heatmap', (optional)
'Loss/densepose/regression', (optional)
'Loss/track/reid'] (optional)
'Loss/track/offset'] (optional)
scalar tensors corresponding to the losses for different tasks. Note the
$TASK_NAME is provided by the KeypointEstimation namedtuple used to
differentiate between different keypoint tasks.
......@@ -2567,6 +2733,16 @@ class CenterNetMetaArch(model.DetectionModel):
track_losses[key] * self._track_params.task_loss_weight)
losses.update(track_losses)
if self._temporal_offset_params is not None:
offset_losses = self._compute_temporal_offset_loss(
input_height=input_height,
input_width=input_width,
prediction_dict=prediction_dict)
for key in offset_losses:
offset_losses[key] = (
offset_losses[key] * self._temporal_offset_params.task_loss_weight)
losses.update(offset_losses)
# Prepend the LOSS_KEY_PREFIX to the keys in the dictionary such that the
# losses will be grouped together in Tensorboard.
return dict([('%s/%s' % (LOSS_KEY_PREFIX, key), val)
......@@ -2683,6 +2859,12 @@ class CenterNetMetaArch(model.DetectionModel):
fields.DetectionResultFields.detection_embeddings: embeddings
})
if self._temporal_offset_params:
offsets = prediction_tensors_to_temporal_offsets(
y_indices, x_indices,
prediction_dict[TEMPORAL_OFFSET][-1])
postprocess_dict[fields.DetectionResultFields.detection_offsets] = offsets
return postprocess_dict
def _postprocess_embeddings(self, prediction_dict, y_indices, x_indices):
......@@ -2753,6 +2935,7 @@ class CenterNetMetaArch(model.DetectionModel):
get_keypoint_name(task_name, KEYPOINT_REGRESSION)][-1]
instance_inds = self._get_instance_indices(
classes, num_detections, ex_ind, kp_params.class_id)
num_ind = _get_shape(instance_inds, 1)
def true_fn(
keypoint_heatmap, keypoint_offsets, keypoint_regression,
......@@ -2787,7 +2970,8 @@ class CenterNetMetaArch(model.DetectionModel):
true_fn, keypoint_heatmap, keypoint_offsets, keypoint_regression,
classes, y_indices, x_indices, boxes, instance_inds, ex_ind,
kp_params)
results = tf.cond(tf.size(instance_inds) > 0, true_fn, false_fn)
# Use dimension values instead of tf.size for tf.lite compatibility.
results = tf.cond(num_ind[0] > 0, true_fn, false_fn)
kpt_coords_for_class_list.append(results[0])
kpt_scores_for_class_list.append(results[1])
......@@ -2799,7 +2983,9 @@ class CenterNetMetaArch(model.DetectionModel):
instance_inds_for_example = tf.concat(instance_inds_for_class_list,
axis=0)
if tf.size(instance_inds_for_example) > 0:
# Use dimension values instead of tf.size for tf.lite compatibility.
num_inds = _get_shape(instance_inds_for_example, 1)
if num_inds[0] > 0:
# Scatter into tensor where instances align with original detection
# instances. New shape of keypoint coordinates and scores are
# [1, max_detections, num_total_keypoints, 2] and
......@@ -2839,7 +3025,7 @@ class CenterNetMetaArch(model.DetectionModel):
class_id: Class id
Returns:
instance_inds: A [num_instances] int tensor where each element indicates
instance_inds: A [num_instances] int32 tensor where each element indicates
the instance location within the `classes` tensor. This is useful to
associate the refined keypoints with the original detections (i.e.
boxes)
......@@ -2848,11 +3034,14 @@ class CenterNetMetaArch(model.DetectionModel):
_, max_detections = shape_utils.combined_static_and_dynamic_shape(
classes)
# Get the detection indices corresponding to the target class.
# Call tf.math.equal with matched tensor shape to make it tf.lite
# compatible.
valid_detections_with_kpt_class = tf.math.logical_and(
tf.range(max_detections) < num_detections[batch_index],
classes[0] == class_id)
tf.math.equal(classes[0], tf.fill(classes[0].shape, class_id)))
instance_inds = tf.where(valid_detections_with_kpt_class)[:, 0]
return instance_inds
# Cast the indices tensor to int32 for tf.lite compatibility.
return tf.cast(instance_inds, tf.int32)
def _postprocess_keypoints_for_class_and_image(
self, keypoint_heatmap, keypoint_offsets, keypoint_regression, classes,
......
......@@ -35,11 +35,14 @@ from object_detection.utils import tf_version
@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
class CenterNetMetaArchPredictionHeadTest(test_case.TestCase):
class CenterNetMetaArchPredictionHeadTest(
test_case.TestCase, parameterized.TestCase):
"""Test CenterNet meta architecture prediction head."""
def test_prediction_head(self):
head = cnma.make_prediction_net(num_out_channels=7)
@parameterized.parameters([True, False])
def test_prediction_head(self, use_depthwise):
head = cnma.make_prediction_net(num_out_channels=7,
use_depthwise=use_depthwise)
output = head(np.zeros((4, 128, 128, 8)))
self.assertEqual((4, 128, 128, 7), output.shape)
......@@ -547,6 +550,53 @@ class CenterNetMetaArchHelpersTest(test_case.TestCase, parameterized.TestCase):
np.testing.assert_allclose(scores[1][:1], [.9])
np.testing.assert_allclose(scores[2], [1., .8])
def test_offset_prediction(self):
class_pred = np.zeros((3, 128, 128, 5), dtype=np.float32)
offset_pred = np.zeros((3, 128, 128, 2), dtype=np.float32)
# Sample 1, 2 boxes
class_pred[0, 10, 20] = [0.3, .7, 0.0, 0.0, 0.0]
offset_pred[0, 10, 20] = [1, 2]
class_pred[0, 50, 60] = [0.55, 0.0, 0.0, 0.0, 0.45]
offset_pred[0, 50, 60] = [0, 0]
# Sample 2, 2 boxes (at same location)
class_pred[1, 100, 100] = [0.0, 0.1, 0.9, 0.0, 0.0]
offset_pred[1, 100, 100] = [1, 3]
# Sample 3, 3 boxes
class_pred[2, 60, 90] = [0.0, 0.0, 0.0, 0.2, 0.8]
offset_pred[2, 60, 90] = [0, 0]
class_pred[2, 65, 95] = [0.0, 0.7, 0.3, 0.0, 0.0]
offset_pred[2, 65, 95] = [1, 2]
class_pred[2, 75, 85] = [1.0, 0.0, 0.0, 0.0, 0.0]
offset_pred[2, 75, 85] = [5, 2]
def graph_fn():
class_pred_tensor = tf.constant(class_pred)
offset_pred_tensor = tf.constant(offset_pred)
_, y_indices, x_indices, _ = (
cnma.top_k_feature_map_locations(
class_pred_tensor, max_pool_kernel_size=3, k=2))
offsets = cnma.prediction_tensors_to_temporal_offsets(
y_indices, x_indices, offset_pred_tensor)
return offsets
offsets = self.execute(graph_fn, [])
np.testing.assert_allclose(
[[1, 2], [0, 0]], offsets[0])
np.testing.assert_allclose(
[[1, 3], [1, 3]], offsets[1])
np.testing.assert_allclose(
[[5, 2], [0, 0]], offsets[2])
def test_keypoint_candidate_prediction(self):
keypoint_heatmap_np = np.zeros((2, 3, 3, 2), dtype=np.float32)
keypoint_heatmap_np[0, 0, 0, 0] = 1.0
......@@ -1156,6 +1206,13 @@ def get_fake_track_params():
task_loss_weight=1.0)
def get_fake_temporal_offset_params():
"""Returns the fake temporal offset parameter namedtuple."""
return cnma.TemporalOffsetParams(
localization_loss=losses.WeightedSmoothL1LocalizationLoss(),
task_loss_weight=1.0)
def build_center_net_meta_arch(build_resnet=False):
"""Builds the CenterNet meta architecture."""
if build_resnet:
......@@ -1185,7 +1242,8 @@ def build_center_net_meta_arch(build_resnet=False):
keypoint_params_dict={_TASK_NAME: get_fake_kp_params()},
mask_params=get_fake_mask_params(),
densepose_params=get_fake_densepose_params(),
track_params=get_fake_track_params())
track_params=get_fake_track_params(),
temporal_offset_params=get_fake_temporal_offset_params())
def _logit(p):
......@@ -1284,6 +1342,11 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
fake_feature_map)
self.assertEqual((4, 128, 128, _REID_EMBED_SIZE), output.shape)
# "temporal offset" head:
output = model._prediction_head_dict[cnma.TEMPORAL_OFFSET][-1](
fake_feature_map)
self.assertEqual((4, 128, 128, 2), output.shape)
def test_initialize_target_assigners(self):
model = build_center_net_meta_arch()
assigner_dict = model._initialize_target_assigners(
......@@ -1315,6 +1378,10 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
self.assertIsInstance(assigner_dict[cnma.TRACK_TASK],
cn_assigner.CenterNetTrackTargetAssigner)
# Temporal Offset target assigner:
self.assertIsInstance(assigner_dict[cnma.TEMPORALOFFSET_TASK],
cn_assigner.CenterNetTemporalOffsetTargetAssigner)
def test_predict(self):
"""Test the predict function."""
......@@ -1341,6 +1408,8 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
(2, 32, 32, 2 * _DENSEPOSE_NUM_PARTS))
self.assertEqual(prediction_dict[cnma.TRACK_REID][0].shape,
(2, 32, 32, _REID_EMBED_SIZE))
self.assertEqual(prediction_dict[cnma.TEMPORAL_OFFSET][0].shape,
(2, 32, 32, 2))
def test_loss(self):
"""Test the loss function."""
......@@ -1361,7 +1430,11 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
groundtruth_dp_surface_coords_list=groundtruth_dict[
fields.BoxListFields.densepose_surface_coords],
groundtruth_track_ids_list=groundtruth_dict[
fields.BoxListFields.track_ids])
fields.BoxListFields.track_ids],
groundtruth_track_match_flags_list=groundtruth_dict[
fields.BoxListFields.track_match_flags],
groundtruth_temporal_offsets_list=groundtruth_dict[
fields.BoxListFields.temporal_offsets])
kernel_initializer = tf.constant_initializer(
[[1, 1, 0], [-1000000, -1000000, 1000000]])
......@@ -1413,6 +1486,9 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
self.assertGreater(
0.01, loss_dict['%s/%s' % (cnma.LOSS_KEY_PREFIX,
cnma.TRACK_REID)])
self.assertGreater(
0.01, loss_dict['%s/%s' % (cnma.LOSS_KEY_PREFIX,
cnma.TEMPORAL_OFFSET)])
@parameterized.parameters(
{'target_class_id': 1},
......@@ -1463,6 +1539,9 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
dtype=np.float32)
track_reid_embedding[0, 16, 16, :] = np.ones(embedding_size)
temporal_offsets = np.zeros((1, 32, 32, 2), dtype=np.float32)
temporal_offsets[..., 1] = 1
class_center = tf.constant(class_center)
height_width = tf.constant(height_width)
offset = tf.constant(offset)
......@@ -1473,6 +1552,7 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
dp_part_heatmap = tf.constant(dp_part_heatmap, dtype=tf.float32)
dp_surf_coords = tf.constant(dp_surf_coords, dtype=tf.float32)
track_reid_embedding = tf.constant(track_reid_embedding, dtype=tf.float32)
temporal_offsets = tf.constant(temporal_offsets, dtype=tf.float32)
prediction_dict = {
cnma.OBJECT_CENTER: [class_center],
......@@ -1487,7 +1567,8 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
cnma.SEGMENTATION_HEATMAP: [segmentation_heatmap],
cnma.DENSEPOSE_HEATMAP: [dp_part_heatmap],
cnma.DENSEPOSE_REGRESSION: [dp_surf_coords],
cnma.TRACK_REID: [track_reid_embedding]
cnma.TRACK_REID: [track_reid_embedding],
cnma.TEMPORAL_OFFSET: [temporal_offsets],
}
def graph_fn():
......@@ -1519,6 +1600,8 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
detections['detection_masks'].shape)
self.assertAllEqual([1, max_detection, embedding_size],
detections['detection_embeddings'].shape)
self.assertAllEqual([1, max_detection, 2],
detections['detection_temporal_offsets'].shape)
# Masks should be empty for everything but the first detection.
self.assertAllEqual(
......@@ -1632,6 +1715,10 @@ def get_fake_prediction_dict(input_height, input_width, stride):
_REID_EMBED_SIZE), dtype=np.float32)
track_reid_embedding[0, 2, 4, :] = np.arange(_REID_EMBED_SIZE)
temporal_offsets = np.zeros((2, output_height, output_width, 2),
dtype=np.float32)
temporal_offsets[0, 2, 4, :] = 5
prediction_dict = {
'preprocessed_inputs':
tf.zeros((2, input_height, input_width, 3)),
......@@ -1674,7 +1761,11 @@ def get_fake_prediction_dict(input_height, input_width, stride):
cnma.TRACK_REID: [
tf.constant(track_reid_embedding),
tf.constant(track_reid_embedding),
]
],
cnma.TEMPORAL_OFFSET: [
tf.constant(temporal_offsets),
tf.constant(temporal_offsets),
],
}
return prediction_dict
......@@ -1736,6 +1827,14 @@ def get_fake_groundtruth_dict(input_height, input_width, stride):
tf.constant([2], dtype=tf.int32),
tf.constant([1], dtype=tf.int32),
]
temporal_offsets = [
tf.constant([[5.0, 5.0]], dtype=tf.float32),
tf.constant([[2.0, 3.0]], dtype=tf.float32),
]
track_match_flags = [
tf.constant([1.0], dtype=tf.float32),
tf.constant([1.0], dtype=tf.float32),
]
groundtruth_dict = {
fields.BoxListFields.boxes: boxes,
fields.BoxListFields.weights: weights,
......@@ -1747,6 +1846,8 @@ def get_fake_groundtruth_dict(input_height, input_width, stride):
fields.BoxListFields.densepose_surface_coords:
densepose_surface_coords,
fields.BoxListFields.track_ids: track_ids,
fields.BoxListFields.temporal_offsets: temporal_offsets,
fields.BoxListFields.track_match_flags: track_match_flags,
fields.InputDataFields.groundtruth_labeled_classes: labeled_classes,
}
return groundtruth_dict
......
......@@ -1286,15 +1286,15 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
metric_names = ['DetectionMasks_Precision/mAP',
'DetectionMasks_Precision/mAP@.50IOU',
'DetectionMasks_Precision/mAP@.75IOU',
'DetectionMasks_Precision/mAP (large)',
'DetectionMasks_Precision/mAP (medium)',
'DetectionMasks_Precision/mAP (small)',
'DetectionMasks_Precision/mAP (medium)',
'DetectionMasks_Precision/mAP (large)',
'DetectionMasks_Recall/AR@1',
'DetectionMasks_Recall/AR@10',
'DetectionMasks_Recall/AR@100',
'DetectionMasks_Recall/AR@100 (large)',
'DetectionMasks_Recall/AR@100 (small)',
'DetectionMasks_Recall/AR@100 (medium)',
'DetectionMasks_Recall/AR@100 (small)']
'DetectionMasks_Recall/AR@100 (large)']
if self._include_metrics_per_category:
for category_dict in self._categories:
metric_names.append('DetectionMasks_PerformanceByCategory/mAP/' +
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment