Commit aa7dbd59 authored by A. Unique TensorFlower's avatar A. Unique TensorFlower Committed by TF Object Detection Team
Browse files

INTERNAL_CHANGE

PiperOrigin-RevId: 361897677
parent 7eacbc87
...@@ -237,9 +237,12 @@ if tf_version.is_tf1(): ...@@ -237,9 +237,12 @@ if tf_version.is_tf1():
frcnn_resnet_v1.FasterRCNNResnet152FeatureExtractor, frcnn_resnet_v1.FasterRCNNResnet152FeatureExtractor,
} }
CENTER_NET_EXTRACTOR_FUNCTION_MAP = {}
FEATURE_EXTRACTOR_MAPS = [ FEATURE_EXTRACTOR_MAPS = [
SSD_FEATURE_EXTRACTOR_CLASS_MAP, SSD_FEATURE_EXTRACTOR_CLASS_MAP,
FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP,
CENTER_NET_EXTRACTOR_FUNCTION_MAP
] ]
...@@ -996,7 +999,7 @@ def _build_center_net_model(center_net_config, is_training, add_summaries): ...@@ -996,7 +999,7 @@ def _build_center_net_model(center_net_config, is_training, add_summaries):
center_net_config.image_resizer) center_net_config.image_resizer)
_check_feature_extractor_exists(center_net_config.feature_extractor.type) _check_feature_extractor_exists(center_net_config.feature_extractor.type)
feature_extractor = _build_center_net_feature_extractor( feature_extractor = _build_center_net_feature_extractor(
center_net_config.feature_extractor) center_net_config.feature_extractor, is_training)
object_center_params = object_center_proto_to_params( object_center_params = object_center_proto_to_params(
center_net_config.object_center_params) center_net_config.object_center_params)
...@@ -1067,19 +1070,21 @@ def _build_center_net_model(center_net_config, is_training, add_summaries): ...@@ -1067,19 +1070,21 @@ def _build_center_net_model(center_net_config, is_training, add_summaries):
non_max_suppression_fn=non_max_suppression_fn) non_max_suppression_fn=non_max_suppression_fn)
def _build_center_net_feature_extractor( def _build_center_net_feature_extractor(feature_extractor_config, is_training):
feature_extractor_config):
"""Build a CenterNet feature extractor from the given config.""" """Build a CenterNet feature extractor from the given config."""
if feature_extractor_config.type not in CENTER_NET_EXTRACTOR_FUNCTION_MAP: if feature_extractor_config.type not in CENTER_NET_EXTRACTOR_FUNCTION_MAP:
raise ValueError('\'{}\' is not a known CenterNet feature extractor type' raise ValueError('\'{}\' is not a known CenterNet feature extractor type'
.format(feature_extractor_config.type)) .format(feature_extractor_config.type))
kwargs = {
'channel_means': list(feature_extractor_config.channel_means),
'channel_stds': list(feature_extractor_config.channel_stds),
'bgr_ordering': feature_extractor_config.bgr_ordering,
}
return CENTER_NET_EXTRACTOR_FUNCTION_MAP[feature_extractor_config.type]( return CENTER_NET_EXTRACTOR_FUNCTION_MAP[feature_extractor_config.type](
channel_means=list(feature_extractor_config.channel_means), **kwargs)
channel_stds=list(feature_extractor_config.channel_stds),
bgr_ordering=feature_extractor_config.bgr_ordering
)
META_ARCH_BUILDER_MAP = { META_ARCH_BUILDER_MAP = {
......
...@@ -33,6 +33,8 @@ from object_detection.core import standard_fields as fields ...@@ -33,6 +33,8 @@ from object_detection.core import standard_fields as fields
from object_detection.core import target_assigner as cn_assigner from object_detection.core import target_assigner as cn_assigner
from object_detection.utils import shape_utils from object_detection.utils import shape_utils
from object_detection.utils import target_assigner_utils as ta_utils from object_detection.utils import target_assigner_utils as ta_utils
from object_detection.utils import tf_version
# Number of channels needed to predict size and offsets. # Number of channels needed to predict size and offsets.
NUM_OFFSET_CHANNELS = 2 NUM_OFFSET_CHANNELS = 2
...@@ -166,15 +168,25 @@ def make_prediction_net(num_out_channels, kernel_size=3, num_filters=256, ...@@ -166,15 +168,25 @@ def make_prediction_net(num_out_channels, kernel_size=3, num_filters=256,
else: else:
conv_fn = tf.keras.layers.Conv2D conv_fn = tf.keras.layers.Conv2D
out_conv = tf.keras.layers.Conv2D(num_out_channels, kernel_size=1) # We name the convolution operations explicitly because Keras, by default,
# uses different names during training and evaluation. By setting the names
# here, we avoid unexpected pipeline breakage in TF1.
out_conv = tf.keras.layers.Conv2D(
num_out_channels,
kernel_size=1,
name='conv1' if tf_version.is_tf1() else None)
if bias_fill is not None: if bias_fill is not None:
out_conv.bias_initializer = tf.keras.initializers.constant(bias_fill) out_conv.bias_initializer = tf.keras.initializers.constant(bias_fill)
net = tf.keras.Sequential( net = tf.keras.Sequential([
[conv_fn(num_filters, kernel_size=kernel_size, padding='same'), conv_fn(
tf.keras.layers.ReLU(), num_filters,
out_conv], kernel_size=kernel_size,
padding='same',
name='conv2' if tf_version.is_tf1() else None),
tf.keras.layers.ReLU(), out_conv
],
name=name) name=name)
return net return net
...@@ -2096,6 +2108,21 @@ class CenterNetMetaArch(model.DetectionModel): ...@@ -2096,6 +2108,21 @@ class CenterNetMetaArch(model.DetectionModel):
'tensor names.') 'tensor names.')
return self._batched_prediction_tensor_names return self._batched_prediction_tensor_names
def _make_prediction_net_list(self, num_feature_outputs, num_out_channels,
kernel_size=3, num_filters=256, bias_fill=None,
name=None):
prediction_net_list = []
for i in range(num_feature_outputs):
prediction_net_list.append(
make_prediction_net(
num_out_channels,
kernel_size=kernel_size,
num_filters=num_filters,
bias_fill=bias_fill,
use_depthwise=self._use_depthwise,
name='{}_{}'.format(name, i) if name else name))
return prediction_net_list
def _construct_prediction_heads(self, num_classes, num_feature_outputs, def _construct_prediction_heads(self, num_classes, num_feature_outputs,
class_prediction_bias_init): class_prediction_bias_init):
"""Constructs the prediction heads based on the specific parameters. """Constructs the prediction heads based on the specific parameters.
...@@ -2116,86 +2143,72 @@ class CenterNetMetaArch(model.DetectionModel): ...@@ -2116,86 +2143,72 @@ class CenterNetMetaArch(model.DetectionModel):
learning the tracking task. learning the tracking task.
""" """
prediction_heads = {} prediction_heads = {}
prediction_heads[OBJECT_CENTER] = [ prediction_heads[OBJECT_CENTER] = self._make_prediction_net_list(
make_prediction_net(num_classes, bias_fill=class_prediction_bias_init, num_feature_outputs, num_classes, bias_fill=class_prediction_bias_init,
use_depthwise=self._use_depthwise) name='center')
for _ in range(num_feature_outputs)
]
if self._od_params is not None: if self._od_params is not None:
prediction_heads[BOX_SCALE] = [ prediction_heads[BOX_SCALE] = self._make_prediction_net_list(
make_prediction_net( num_feature_outputs, NUM_SIZE_CHANNELS, name='box_scale')
NUM_SIZE_CHANNELS, use_depthwise=self._use_depthwise) prediction_heads[BOX_OFFSET] = self._make_prediction_net_list(
for _ in range(num_feature_outputs) num_feature_outputs, NUM_OFFSET_CHANNELS, name='box_offset')
]
prediction_heads[BOX_OFFSET] = [
make_prediction_net(
NUM_OFFSET_CHANNELS, use_depthwise=self._use_depthwise)
for _ in range(num_feature_outputs)
]
if self._kp_params_dict is not None: if self._kp_params_dict is not None:
for task_name, kp_params in self._kp_params_dict.items(): for task_name, kp_params in self._kp_params_dict.items():
num_keypoints = len(kp_params.keypoint_indices) num_keypoints = len(kp_params.keypoint_indices)
# pylint: disable=g-complex-comprehension prediction_heads[get_keypoint_name(
prediction_heads[get_keypoint_name(task_name, KEYPOINT_HEATMAP)] = [ task_name, KEYPOINT_HEATMAP)] = self._make_prediction_net_list(
make_prediction_net( num_feature_outputs,
num_keypoints, num_keypoints,
bias_fill=kp_params.heatmap_bias_init, bias_fill=kp_params.heatmap_bias_init,
use_depthwise=self._use_depthwise) name='kpt_heatmap')
for _ in range(num_feature_outputs) prediction_heads[get_keypoint_name(
] task_name, KEYPOINT_REGRESSION)] = self._make_prediction_net_list(
# pylint: enable=g-complex-comprehension num_feature_outputs,
prediction_heads[get_keypoint_name(task_name, KEYPOINT_REGRESSION)] = [ NUM_OFFSET_CHANNELS * num_keypoints,
make_prediction_net(NUM_OFFSET_CHANNELS * num_keypoints, name='kpt_regress')
use_depthwise=self._use_depthwise)
for _ in range(num_feature_outputs)
]
if kp_params.per_keypoint_offset: if kp_params.per_keypoint_offset:
prediction_heads[get_keypoint_name(task_name, KEYPOINT_OFFSET)] = [ prediction_heads[get_keypoint_name(
make_prediction_net(NUM_OFFSET_CHANNELS * num_keypoints, task_name, KEYPOINT_OFFSET)] = self._make_prediction_net_list(
use_depthwise=self._use_depthwise) num_feature_outputs,
for _ in range(num_feature_outputs) NUM_OFFSET_CHANNELS * num_keypoints,
] name='kpt_offset')
else: else:
prediction_heads[get_keypoint_name(task_name, KEYPOINT_OFFSET)] = [ prediction_heads[get_keypoint_name(
make_prediction_net(NUM_OFFSET_CHANNELS, task_name, KEYPOINT_OFFSET)] = self._make_prediction_net_list(
use_depthwise=self._use_depthwise) num_feature_outputs, NUM_OFFSET_CHANNELS, name='kpt_offset')
for _ in range(num_feature_outputs)
]
if kp_params.predict_depth: if kp_params.predict_depth:
num_depth_channel = ( num_depth_channel = (
num_keypoints if kp_params.per_keypoint_depth else 1) num_keypoints if kp_params.per_keypoint_depth else 1)
prediction_heads[get_keypoint_name(task_name, KEYPOINT_DEPTH)] = [ prediction_heads[get_keypoint_name(
make_prediction_net( task_name, KEYPOINT_DEPTH)] = self._make_prediction_net_list(
num_depth_channel, use_depthwise=self._use_depthwise) num_feature_outputs, num_depth_channel, name='kpt_depth')
for _ in range(num_feature_outputs)
]
# pylint: disable=g-complex-comprehension
if self._mask_params is not None: if self._mask_params is not None:
prediction_heads[SEGMENTATION_HEATMAP] = [ prediction_heads[SEGMENTATION_HEATMAP] = self._make_prediction_net_list(
make_prediction_net( num_feature_outputs,
num_classes, num_classes,
bias_fill=self._mask_params.heatmap_bias_init, bias_fill=self._mask_params.heatmap_bias_init,
use_depthwise=self._use_depthwise) name='seg_heatmap')
for _ in range(num_feature_outputs)]
if self._densepose_params is not None: if self._densepose_params is not None:
prediction_heads[DENSEPOSE_HEATMAP] = [ prediction_heads[DENSEPOSE_HEATMAP] = self._make_prediction_net_list(
make_prediction_net( num_feature_outputs,
self._densepose_params.num_parts, self._densepose_params.num_parts,
bias_fill=self._densepose_params.heatmap_bias_init, bias_fill=self._densepose_params.heatmap_bias_init,
use_depthwise=self._use_depthwise) name='dense_pose_heatmap')
for _ in range(num_feature_outputs)] prediction_heads[DENSEPOSE_REGRESSION] = self._make_prediction_net_list(
prediction_heads[DENSEPOSE_REGRESSION] = [ num_feature_outputs,
make_prediction_net(2 * self._densepose_params.num_parts, 2 * self._densepose_params.num_parts,
use_depthwise=self._use_depthwise) name='dense_pose_regress')
for _ in range(num_feature_outputs)
]
# pylint: enable=g-complex-comprehension
if self._track_params is not None: if self._track_params is not None:
prediction_heads[TRACK_REID] = [ prediction_heads[TRACK_REID] = self._make_prediction_net_list(
make_prediction_net(self._track_params.reid_embed_size, num_feature_outputs,
use_depthwise=self._use_depthwise) self._track_params.reid_embed_size,
for _ in range(num_feature_outputs)] name='track_reid')
# Creates a classification network to train object embeddings by learning # Creates a classification network to train object embeddings by learning
# a projection from embedding space to object track ID space. # a projection from embedding space to object track ID space.
...@@ -2213,11 +2226,8 @@ class CenterNetMetaArch(model.DetectionModel): ...@@ -2213,11 +2226,8 @@ class CenterNetMetaArch(model.DetectionModel):
input_shape=( input_shape=(
self._track_params.reid_embed_size,))) self._track_params.reid_embed_size,)))
if self._temporal_offset_params is not None: if self._temporal_offset_params is not None:
prediction_heads[TEMPORAL_OFFSET] = [ prediction_heads[TEMPORAL_OFFSET] = self._make_prediction_net_list(
make_prediction_net(NUM_OFFSET_CHANNELS, num_feature_outputs, NUM_OFFSET_CHANNELS, name='temporal_offset')
use_depthwise=self._use_depthwise)
for _ in range(num_feature_outputs)
]
return prediction_heads return prediction_heads
def _initialize_target_assigners(self, stride, min_box_overlap_iou): def _initialize_target_assigners(self, stride, min_box_overlap_iou):
...@@ -3524,6 +3534,37 @@ class CenterNetMetaArch(model.DetectionModel): ...@@ -3524,6 +3534,37 @@ class CenterNetMetaArch(model.DetectionModel):
return embeddings return embeddings
def _scatter_keypoints_to_batch(self, num_ind, kpt_coords_for_example,
kpt_scores_for_example,
instance_inds_for_example, max_detections,
total_num_keypoints):
"""Helper function to convert scattered keypoints into batch."""
def left_fn(kpt_coords_for_example, kpt_scores_for_example,
instance_inds_for_example):
# Scatter into tensor where instances align with original detection
# instances. New shape of keypoint coordinates and scores are
# [1, max_detections, num_total_keypoints, 2] and
# [1, max_detections, num_total_keypoints], respectively.
return _pad_to_full_instance_dim(
kpt_coords_for_example, kpt_scores_for_example,
instance_inds_for_example,
self._center_params.max_box_predictions)
def right_fn():
kpt_coords_for_example_all_det = tf.zeros(
[1, max_detections, total_num_keypoints, 2], dtype=tf.float32)
kpt_scores_for_example_all_det = tf.zeros(
[1, max_detections, total_num_keypoints], dtype=tf.float32)
return (kpt_coords_for_example_all_det,
kpt_scores_for_example_all_det)
left_fn = functools.partial(left_fn, kpt_coords_for_example,
kpt_scores_for_example,
instance_inds_for_example)
# Use dimension values instead of tf.size for tf.lite compatibility.
return tf.cond(num_ind[0] > 0, left_fn, right_fn)
def _postprocess_keypoints_multi_class(self, prediction_dict, classes, def _postprocess_keypoints_multi_class(self, prediction_dict, classes,
y_indices, x_indices, boxes, y_indices, x_indices, boxes,
num_detections): num_detections):
...@@ -3630,23 +3671,10 @@ class CenterNetMetaArch(model.DetectionModel): ...@@ -3630,23 +3671,10 @@ class CenterNetMetaArch(model.DetectionModel):
instance_inds_for_example = tf.concat(instance_inds_for_class_list, instance_inds_for_example = tf.concat(instance_inds_for_class_list,
axis=0) axis=0)
# Use dimension values instead of tf.size for tf.lite compatibility. (kpt_coords_for_example_all_det,
num_inds = _get_shape(instance_inds_for_example, 1) kpt_scores_for_example_all_det) = self._scatter_keypoints_to_batch(
if num_inds[0] > 0: num_ind, kpt_coords_for_example, kpt_scores_for_example,
# Scatter into tensor where instances align with original detection instance_inds_for_example, max_detections, total_num_keypoints)
# instances. New shape of keypoint coordinates and scores are
# [1, max_detections, num_total_keypoints, 2] and
# [1, max_detections, num_total_keypoints], respectively.
kpt_coords_for_example_all_det, kpt_scores_for_example_all_det = (
_pad_to_full_instance_dim(
kpt_coords_for_example, kpt_scores_for_example,
instance_inds_for_example,
self._center_params.max_box_predictions))
else:
kpt_coords_for_example_all_det = tf.zeros(
[1, max_detections, total_num_keypoints, 2], dtype=tf.float32)
kpt_scores_for_example_all_det = tf.zeros(
[1, max_detections, total_num_keypoints], dtype=tf.float32)
kpt_coords_for_example_list.append(kpt_coords_for_example_all_det) kpt_coords_for_example_list.append(kpt_coords_for_example_all_det)
kpt_scores_for_example_list.append(kpt_scores_for_example_all_det) kpt_scores_for_example_list.append(kpt_scores_for_example_all_det)
...@@ -3951,5 +3979,13 @@ class CenterNetMetaArch(model.DetectionModel): ...@@ -3951,5 +3979,13 @@ class CenterNetMetaArch(model.DetectionModel):
fine_tune_checkpoint_type)} fine_tune_checkpoint_type)}
def updates(self): def updates(self):
if tf_version.is_tf2():
raise RuntimeError('This model is intended to be used with model_lib_v2 ' raise RuntimeError('This model is intended to be used with model_lib_v2 '
'which does not support updates()') 'which does not support updates()')
else:
update_ops = []
slim_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
# Copy the slim ops to avoid modifying the collection
if slim_update_ops:
update_ops.extend(slim_update_ops)
return update_ops
...@@ -334,4 +334,6 @@ message CenterNetFeatureExtractor { ...@@ -334,4 +334,6 @@ message CenterNetFeatureExtractor {
// separable convolutions. This is typically applied to feature pyramid // separable convolutions. This is typically applied to feature pyramid
// network if any. // network if any.
optional bool use_depthwise = 5 [default = false]; optional bool use_depthwise = 5 [default = false];
} }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment