Commit bb124157 authored by stephenwu's avatar stephenwu
Browse files

Merge branch 'master' of https://github.com/tensorflow/models into RTESuperGLUE

parents 2e9bb539 0edeb7f6
......@@ -237,9 +237,12 @@ if tf_version.is_tf1():
frcnn_resnet_v1.FasterRCNNResnet152FeatureExtractor,
}
CENTER_NET_EXTRACTOR_FUNCTION_MAP = {}
FEATURE_EXTRACTOR_MAPS = [
SSD_FEATURE_EXTRACTOR_CLASS_MAP,
FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP
FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP,
CENTER_NET_EXTRACTOR_FUNCTION_MAP
]
......@@ -996,7 +999,7 @@ def _build_center_net_model(center_net_config, is_training, add_summaries):
center_net_config.image_resizer)
_check_feature_extractor_exists(center_net_config.feature_extractor.type)
feature_extractor = _build_center_net_feature_extractor(
center_net_config.feature_extractor)
center_net_config.feature_extractor, is_training)
object_center_params = object_center_proto_to_params(
center_net_config.object_center_params)
......@@ -1067,19 +1070,21 @@ def _build_center_net_model(center_net_config, is_training, add_summaries):
non_max_suppression_fn=non_max_suppression_fn)
def _build_center_net_feature_extractor(
feature_extractor_config):
def _build_center_net_feature_extractor(feature_extractor_config, is_training):
"""Build a CenterNet feature extractor from the given config."""
if feature_extractor_config.type not in CENTER_NET_EXTRACTOR_FUNCTION_MAP:
raise ValueError('\'{}\' is not a known CenterNet feature extractor type'
.format(feature_extractor_config.type))
kwargs = {
'channel_means': list(feature_extractor_config.channel_means),
'channel_stds': list(feature_extractor_config.channel_stds),
'bgr_ordering': feature_extractor_config.bgr_ordering,
}
return CENTER_NET_EXTRACTOR_FUNCTION_MAP[feature_extractor_config.type](
channel_means=list(feature_extractor_config.channel_means),
channel_stds=list(feature_extractor_config.channel_stds),
bgr_ordering=feature_extractor_config.bgr_ordering
)
**kwargs)
META_ARCH_BUILDER_MAP = {
......
......@@ -33,6 +33,8 @@ from object_detection.core import standard_fields as fields
from object_detection.core import target_assigner as cn_assigner
from object_detection.utils import shape_utils
from object_detection.utils import target_assigner_utils as ta_utils
from object_detection.utils import tf_version
# Number of channels needed to predict size and offsets.
NUM_OFFSET_CHANNELS = 2
......@@ -166,16 +168,26 @@ def make_prediction_net(num_out_channels, kernel_size=3, num_filters=256,
else:
conv_fn = tf.keras.layers.Conv2D
out_conv = tf.keras.layers.Conv2D(num_out_channels, kernel_size=1)
# We name the convolution operations explicitly because Keras, by default,
# uses different names during training and evaluation. By setting the names
# here, we avoid unexpected pipeline breakage in TF1.
out_conv = tf.keras.layers.Conv2D(
num_out_channels,
kernel_size=1,
name='conv1' if tf_version.is_tf1() else None)
if bias_fill is not None:
out_conv.bias_initializer = tf.keras.initializers.constant(bias_fill)
net = tf.keras.Sequential(
[conv_fn(num_filters, kernel_size=kernel_size, padding='same'),
tf.keras.layers.ReLU(),
out_conv],
name=name)
net = tf.keras.Sequential([
conv_fn(
num_filters,
kernel_size=kernel_size,
padding='same',
name='conv2' if tf_version.is_tf1() else None),
tf.keras.layers.ReLU(), out_conv
],
name=name)
return net
......@@ -2096,6 +2108,21 @@ class CenterNetMetaArch(model.DetectionModel):
'tensor names.')
return self._batched_prediction_tensor_names
def _make_prediction_net_list(self, num_feature_outputs, num_out_channels,
kernel_size=3, num_filters=256, bias_fill=None,
name=None):
prediction_net_list = []
for i in range(num_feature_outputs):
prediction_net_list.append(
make_prediction_net(
num_out_channels,
kernel_size=kernel_size,
num_filters=num_filters,
bias_fill=bias_fill,
use_depthwise=self._use_depthwise,
name='{}_{}'.format(name, i) if name else name))
return prediction_net_list
def _construct_prediction_heads(self, num_classes, num_feature_outputs,
class_prediction_bias_init):
"""Constructs the prediction heads based on the specific parameters.
......@@ -2116,86 +2143,72 @@ class CenterNetMetaArch(model.DetectionModel):
learning the tracking task.
"""
prediction_heads = {}
prediction_heads[OBJECT_CENTER] = [
make_prediction_net(num_classes, bias_fill=class_prediction_bias_init,
use_depthwise=self._use_depthwise)
for _ in range(num_feature_outputs)
]
prediction_heads[OBJECT_CENTER] = self._make_prediction_net_list(
num_feature_outputs, num_classes, bias_fill=class_prediction_bias_init,
name='center')
if self._od_params is not None:
prediction_heads[BOX_SCALE] = [
make_prediction_net(
NUM_SIZE_CHANNELS, use_depthwise=self._use_depthwise)
for _ in range(num_feature_outputs)
]
prediction_heads[BOX_OFFSET] = [
make_prediction_net(
NUM_OFFSET_CHANNELS, use_depthwise=self._use_depthwise)
for _ in range(num_feature_outputs)
]
prediction_heads[BOX_SCALE] = self._make_prediction_net_list(
num_feature_outputs, NUM_SIZE_CHANNELS, name='box_scale')
prediction_heads[BOX_OFFSET] = self._make_prediction_net_list(
num_feature_outputs, NUM_OFFSET_CHANNELS, name='box_offset')
if self._kp_params_dict is not None:
for task_name, kp_params in self._kp_params_dict.items():
num_keypoints = len(kp_params.keypoint_indices)
# pylint: disable=g-complex-comprehension
prediction_heads[get_keypoint_name(task_name, KEYPOINT_HEATMAP)] = [
make_prediction_net(
prediction_heads[get_keypoint_name(
task_name, KEYPOINT_HEATMAP)] = self._make_prediction_net_list(
num_feature_outputs,
num_keypoints,
bias_fill=kp_params.heatmap_bias_init,
use_depthwise=self._use_depthwise)
for _ in range(num_feature_outputs)
]
# pylint: enable=g-complex-comprehension
prediction_heads[get_keypoint_name(task_name, KEYPOINT_REGRESSION)] = [
make_prediction_net(NUM_OFFSET_CHANNELS * num_keypoints,
use_depthwise=self._use_depthwise)
for _ in range(num_feature_outputs)
]
name='kpt_heatmap')
prediction_heads[get_keypoint_name(
task_name, KEYPOINT_REGRESSION)] = self._make_prediction_net_list(
num_feature_outputs,
NUM_OFFSET_CHANNELS * num_keypoints,
name='kpt_regress')
if kp_params.per_keypoint_offset:
prediction_heads[get_keypoint_name(task_name, KEYPOINT_OFFSET)] = [
make_prediction_net(NUM_OFFSET_CHANNELS * num_keypoints,
use_depthwise=self._use_depthwise)
for _ in range(num_feature_outputs)
]
prediction_heads[get_keypoint_name(
task_name, KEYPOINT_OFFSET)] = self._make_prediction_net_list(
num_feature_outputs,
NUM_OFFSET_CHANNELS * num_keypoints,
name='kpt_offset')
else:
prediction_heads[get_keypoint_name(task_name, KEYPOINT_OFFSET)] = [
make_prediction_net(NUM_OFFSET_CHANNELS,
use_depthwise=self._use_depthwise)
for _ in range(num_feature_outputs)
]
prediction_heads[get_keypoint_name(
task_name, KEYPOINT_OFFSET)] = self._make_prediction_net_list(
num_feature_outputs, NUM_OFFSET_CHANNELS, name='kpt_offset')
if kp_params.predict_depth:
num_depth_channel = (
num_keypoints if kp_params.per_keypoint_depth else 1)
prediction_heads[get_keypoint_name(task_name, KEYPOINT_DEPTH)] = [
make_prediction_net(
num_depth_channel, use_depthwise=self._use_depthwise)
for _ in range(num_feature_outputs)
]
# pylint: disable=g-complex-comprehension
prediction_heads[get_keypoint_name(
task_name, KEYPOINT_DEPTH)] = self._make_prediction_net_list(
num_feature_outputs, num_depth_channel, name='kpt_depth')
if self._mask_params is not None:
prediction_heads[SEGMENTATION_HEATMAP] = [
make_prediction_net(
num_classes,
bias_fill=self._mask_params.heatmap_bias_init,
use_depthwise=self._use_depthwise)
for _ in range(num_feature_outputs)]
prediction_heads[SEGMENTATION_HEATMAP] = self._make_prediction_net_list(
num_feature_outputs,
num_classes,
bias_fill=self._mask_params.heatmap_bias_init,
name='seg_heatmap')
if self._densepose_params is not None:
prediction_heads[DENSEPOSE_HEATMAP] = [
make_prediction_net(
self._densepose_params.num_parts,
bias_fill=self._densepose_params.heatmap_bias_init,
use_depthwise=self._use_depthwise)
for _ in range(num_feature_outputs)]
prediction_heads[DENSEPOSE_REGRESSION] = [
make_prediction_net(2 * self._densepose_params.num_parts,
use_depthwise=self._use_depthwise)
for _ in range(num_feature_outputs)
]
# pylint: enable=g-complex-comprehension
prediction_heads[DENSEPOSE_HEATMAP] = self._make_prediction_net_list(
num_feature_outputs,
self._densepose_params.num_parts,
bias_fill=self._densepose_params.heatmap_bias_init,
name='dense_pose_heatmap')
prediction_heads[DENSEPOSE_REGRESSION] = self._make_prediction_net_list(
num_feature_outputs,
2 * self._densepose_params.num_parts,
name='dense_pose_regress')
if self._track_params is not None:
prediction_heads[TRACK_REID] = [
make_prediction_net(self._track_params.reid_embed_size,
use_depthwise=self._use_depthwise)
for _ in range(num_feature_outputs)]
prediction_heads[TRACK_REID] = self._make_prediction_net_list(
num_feature_outputs,
self._track_params.reid_embed_size,
name='track_reid')
# Creates a classification network to train object embeddings by learning
# a projection from embedding space to object track ID space.
......@@ -2213,11 +2226,8 @@ class CenterNetMetaArch(model.DetectionModel):
input_shape=(
self._track_params.reid_embed_size,)))
if self._temporal_offset_params is not None:
prediction_heads[TEMPORAL_OFFSET] = [
make_prediction_net(NUM_OFFSET_CHANNELS,
use_depthwise=self._use_depthwise)
for _ in range(num_feature_outputs)
]
prediction_heads[TEMPORAL_OFFSET] = self._make_prediction_net_list(
num_feature_outputs, NUM_OFFSET_CHANNELS, name='temporal_offset')
return prediction_heads
def _initialize_target_assigners(self, stride, min_box_overlap_iou):
......@@ -3524,6 +3534,37 @@ class CenterNetMetaArch(model.DetectionModel):
return embeddings
def _scatter_keypoints_to_batch(self, num_ind, kpt_coords_for_example,
kpt_scores_for_example,
instance_inds_for_example, max_detections,
total_num_keypoints):
"""Helper function to convert scattered keypoints into batch."""
def left_fn(kpt_coords_for_example, kpt_scores_for_example,
instance_inds_for_example):
# Scatter into tensor where instances align with original detection
# instances. New shape of keypoint coordinates and scores are
# [1, max_detections, num_total_keypoints, 2] and
# [1, max_detections, num_total_keypoints], respectively.
return _pad_to_full_instance_dim(
kpt_coords_for_example, kpt_scores_for_example,
instance_inds_for_example,
self._center_params.max_box_predictions)
def right_fn():
kpt_coords_for_example_all_det = tf.zeros(
[1, max_detections, total_num_keypoints, 2], dtype=tf.float32)
kpt_scores_for_example_all_det = tf.zeros(
[1, max_detections, total_num_keypoints], dtype=tf.float32)
return (kpt_coords_for_example_all_det,
kpt_scores_for_example_all_det)
left_fn = functools.partial(left_fn, kpt_coords_for_example,
kpt_scores_for_example,
instance_inds_for_example)
# Use dimension values instead of tf.size for tf.lite compatibility.
return tf.cond(num_ind[0] > 0, left_fn, right_fn)
def _postprocess_keypoints_multi_class(self, prediction_dict, classes,
y_indices, x_indices, boxes,
num_detections):
......@@ -3630,26 +3671,13 @@ class CenterNetMetaArch(model.DetectionModel):
instance_inds_for_example = tf.concat(instance_inds_for_class_list,
axis=0)
# Use dimension values instead of tf.size for tf.lite compatibility.
num_inds = _get_shape(instance_inds_for_example, 1)
if num_inds[0] > 0:
# Scatter into tensor where instances align with original detection
# instances. New shape of keypoint coordinates and scores are
# [1, max_detections, num_total_keypoints, 2] and
# [1, max_detections, num_total_keypoints], respectively.
kpt_coords_for_example_all_det, kpt_scores_for_example_all_det = (
_pad_to_full_instance_dim(
kpt_coords_for_example, kpt_scores_for_example,
instance_inds_for_example,
self._center_params.max_box_predictions))
else:
kpt_coords_for_example_all_det = tf.zeros(
[1, max_detections, total_num_keypoints, 2], dtype=tf.float32)
kpt_scores_for_example_all_det = tf.zeros(
[1, max_detections, total_num_keypoints], dtype=tf.float32)
(kpt_coords_for_example_all_det,
kpt_scores_for_example_all_det) = self._scatter_keypoints_to_batch(
num_ind, kpt_coords_for_example, kpt_scores_for_example,
instance_inds_for_example, max_detections, total_num_keypoints)
kpt_coords_for_example_list.append(kpt_coords_for_example_all_det)
kpt_scores_for_example_list.append(kpt_scores_for_example_all_det)
kpt_coords_for_example_list.append(kpt_coords_for_example_all_det)
kpt_scores_for_example_list.append(kpt_scores_for_example_all_det)
# Concatenate all keypoints and scores from all examples in the batch.
# Shapes are [batch_size, max_detections, num_total_keypoints, 2] and
......@@ -3951,5 +3979,13 @@ class CenterNetMetaArch(model.DetectionModel):
fine_tune_checkpoint_type)}
def updates(self):
raise RuntimeError('This model is intended to be used with model_lib_v2 '
'which does not support updates()')
if tf_version.is_tf2():
raise RuntimeError('This model is intended to be used with model_lib_v2 '
'which does not support updates()')
else:
update_ops = []
slim_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
# Copy the slim ops to avoid modifying the collection
if slim_update_ops:
update_ops.extend(slim_update_ops)
return update_ops
......@@ -795,7 +795,8 @@ def eager_eval_loop(
eval_dataset,
use_tpu=False,
postprocess_on_cpu=False,
global_step=None):
global_step=None,
):
"""Evaluate the model eagerly on the evaluation dataset.
This method will compute the evaluation metrics specified in the configs on
......@@ -968,11 +969,10 @@ def eager_eval_loop(
eval_metrics[loss_key] = tf.reduce_mean(loss_metrics[loss_key])
eval_metrics = {str(k): v for k, v in eval_metrics.items()}
tf.logging.info('Eval metrics at step %d', global_step)
tf.logging.info('Eval metrics at step %d', global_step.numpy())
for k in eval_metrics:
tf.compat.v2.summary.scalar(k, eval_metrics[k], step=global_step)
tf.logging.info('\t+ %s: %f', k, eval_metrics[k])
return eval_metrics
......@@ -1026,6 +1026,8 @@ def eval_continuously(
"""
get_configs_from_pipeline_file = MODEL_BUILD_UTIL_MAP[
'get_configs_from_pipeline_file']
create_pipeline_proto_from_configs = MODEL_BUILD_UTIL_MAP[
'create_pipeline_proto_from_configs']
merge_external_params_with_configs = MODEL_BUILD_UTIL_MAP[
'merge_external_params_with_configs']
......@@ -1043,6 +1045,10 @@ def eval_continuously(
'Forced number of epochs for all eval validations to be 1.')
configs = merge_external_params_with_configs(
configs, None, kwargs_dict=kwargs)
if model_dir:
pipeline_config_final = create_pipeline_proto_from_configs(configs)
config_util.save_pipeline_config(pipeline_config_final, model_dir)
model_config = configs['model']
train_input_config = configs['train_input_config']
eval_config = configs['eval_config']
......@@ -1109,4 +1115,5 @@ def eval_continuously(
eval_input,
use_tpu=use_tpu,
postprocess_on_cpu=postprocess_on_cpu,
global_step=global_step)
global_step=global_step,
)
......@@ -305,7 +305,7 @@ class KerasMultiResolutionFeatureMaps(tf.keras.Model):
# this net must be appended only once it's been filled with layers
self.convolutions.append(net)
def call(self, image_features, training=None):
def call(self, image_features):
"""Generate the multi-resolution feature maps.
Executed when calling the `.__call__` method on input.
......@@ -313,11 +313,6 @@ class KerasMultiResolutionFeatureMaps(tf.keras.Model):
Args:
image_features: A dictionary of handles to activation tensors from the
base feature extractor.
training: A boolean, True when in training mode. If not specified,
defaults to (in order of priority): the training mode of the outer
`Layer.call`; the default mode set by
`tf.keras.backend.set_learning_phase`; or the default value for
`training` in the call signature.
Returns:
feature_maps: an OrderedDict mapping keys (feature map names) to
......@@ -333,7 +328,7 @@ class KerasMultiResolutionFeatureMaps(tf.keras.Model):
else:
feature_map = feature_maps[-1]
for layer in self.convolutions[index]:
feature_map = layer(feature_map, training=training)
feature_map = layer(feature_map)
layer_name = self.convolutions[index][-1].name
feature_map_keys.append(layer_name)
feature_maps.append(feature_map)
......@@ -621,7 +616,7 @@ class KerasFpnTopDownFeatureMaps(tf.keras.Model):
self.reshape_blocks.append(reshaped_residual)
self.conv_layers.append(conv_net)
def call(self, image_features, training=None):
def call(self, image_features):
"""Generate the multi-resolution feature maps.
Executed when calling the `.__call__` method on input.
......@@ -630,11 +625,6 @@ class KerasFpnTopDownFeatureMaps(tf.keras.Model):
image_features: list of tuples of (tensor_name, image_feature_tensor).
Spatial resolutions of succesive tensors must reduce exactly by a factor
of 2.
training: A boolean, True when in training mode. If not specified,
defaults to (in order of priority): the training mode of the outer
`Layer.call`; the default mode set by
`tf.keras.backend.set_learning_phase`; or the default value for
`training` in the call signature.
Returns:
feature_maps: an OrderedDict mapping keys (feature map names) to
......@@ -646,7 +636,7 @@ class KerasFpnTopDownFeatureMaps(tf.keras.Model):
with tf.name_scope(self.scope):
top_down = image_features[-1][1]
for layer in self.top_layers:
top_down = layer(top_down, training=training)
top_down = layer(top_down)
output_feature_maps_list.append(top_down)
output_feature_map_keys.append('top_down_%s' % image_features[-1][0])
......@@ -655,14 +645,14 @@ class KerasFpnTopDownFeatureMaps(tf.keras.Model):
residual = image_features[level][1]
top_down = output_feature_maps_list[-1]
for layer in self.residual_blocks[index]:
residual = layer(residual, training=training)
residual = layer(residual)
for layer in self.top_down_blocks[index]:
top_down = layer(top_down, training=training)
top_down = layer(top_down)
for layer in self.reshape_blocks[index]:
top_down = layer([residual, top_down], training=training)
top_down = layer([residual, top_down])
top_down += residual
for layer in self.conv_layers[index]:
top_down = layer(top_down, training=training)
top_down = layer(top_down)
output_feature_maps_list.append(top_down)
output_feature_map_keys.append('top_down_%s' % image_features[level][0])
return collections.OrderedDict(reversed(
......
......@@ -197,7 +197,7 @@ class ConvolutionalBoxPredictor(box_predictor.KerasBoxPredictor):
# Apply shared conv layers before the head predictors.
for layer in self._shared_nets[index]:
net = layer(net, training=self._is_training)
net = layer(net)
for head_name in self._sorted_head_names:
head_obj = self._prediction_heads[head_name][index]
......@@ -458,13 +458,13 @@ class WeightSharedConvolutionalBoxPredictor(box_predictor.KerasBoxPredictor):
def _apply_layers(base_tower_layers, image_feature):
for layer in base_tower_layers:
image_feature = layer(image_feature, training=self._is_training)
image_feature = layer(image_feature)
return image_feature
for (index, image_feature) in enumerate(image_features):
# Apply additional projection layers to image features
for layer in self._additional_projection_layers[index]:
image_feature = layer(image_feature, training=self._is_training)
image_feature = layer(image_feature)
# Apply box tower layers.
box_tower_feature = _apply_layers(
......
......@@ -334,4 +334,6 @@ message CenterNetFeatureExtractor {
// separable convolutions. This is typically applied to feature pyramid
// network if any.
optional bool use_depthwise = 5 [default = false];
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment