Commit 5a2cf36f authored by Kaushik Shivakumar's avatar Kaushik Shivakumar
Browse files

Merge remote-tracking branch 'upstream/master' into newavarecords

parents 258ddfc3 a829e648
......@@ -20,8 +20,8 @@ from __future__ import print_function
import functools
import unittest
from unittest import mock # pylint: disable=g-importing-member
from absl.testing import parameterized
import mock
import tensorflow.compat.v1 as tf
import tf_slim as slim
......@@ -41,7 +41,7 @@ from object_detection.meta_architectures import faster_rcnn_meta_arch
from object_detection.protos import box_predictor_pb2
from object_detection.protos import hyperparams_pb2
from object_detection.protos import post_processing_pb2
from object_detection.utils import ops
from object_detection.utils import spatial_transform_ops as spatial_ops
from object_detection.utils import test_case
from object_detection.utils import test_utils
from object_detection.utils import tf_version
......@@ -109,7 +109,6 @@ class FakeFasterRCNNKerasFeatureExtractor(
])
@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
class ContextRCNNMetaArchTest(test_case.TestCase, parameterized.TestCase):
def _get_model(self, box_predictor, **common_kwargs):
......@@ -363,8 +362,9 @@ class ContextRCNNMetaArchTest(test_case.TestCase, parameterized.TestCase):
max_negatives_per_positive=None)
crop_and_resize_fn = (
ops.matmul_crop_and_resize
if use_matmul_crop_and_resize else ops.native_crop_and_resize)
spatial_ops.multilevel_matmul_crop_and_resize
if use_matmul_crop_and_resize
else spatial_ops.multilevel_native_crop_and_resize)
common_kwargs = {
'is_training':
is_training,
......@@ -439,15 +439,16 @@ class ContextRCNNMetaArchTest(test_case.TestCase, parameterized.TestCase):
masks_are_class_agnostic=masks_are_class_agnostic,
share_box_across_classes=share_box_across_classes), **common_kwargs)
@unittest.skipIf(tf_version.is_tf2(), 'Skipping TF1.X only test.')
@mock.patch.object(context_rcnn_meta_arch, 'context_rcnn_lib')
def test_prediction_mock(self, mock_context_rcnn_lib):
"""Mocks the context_rcnn_lib module to test the prediction.
def test_prediction_mock_tf1(self, mock_context_rcnn_lib_v1):
"""Mocks the context_rcnn_lib_v1 module to test the prediction.
Using mock object so that we can ensure compute_box_context_attention is
called in side the prediction function.
Args:
mock_context_rcnn_lib: mock module for the context_rcnn_lib.
mock_context_rcnn_lib_v1: mock module for the context_rcnn_lib_v1.
"""
model = self._build_model(
is_training=False,
......@@ -456,7 +457,7 @@ class ContextRCNNMetaArchTest(test_case.TestCase, parameterized.TestCase):
num_classes=42)
mock_tensor = tf.ones([2, 8, 3, 3, 3], tf.float32)
mock_context_rcnn_lib.compute_box_context_attention.return_value = mock_tensor
mock_context_rcnn_lib_v1.compute_box_context_attention.return_value = mock_tensor
inputs_shape = (2, 20, 20, 3)
inputs = tf.cast(
tf.random_uniform(inputs_shape, minval=0, maxval=255, dtype=tf.int32),
......@@ -478,7 +479,7 @@ class ContextRCNNMetaArchTest(test_case.TestCase, parameterized.TestCase):
side_inputs = model.get_side_inputs(features)
_ = model.predict(preprocessed_inputs, true_image_shapes, **side_inputs)
mock_context_rcnn_lib.compute_box_context_attention.assert_called_once()
mock_context_rcnn_lib_v1.compute_box_context_attention.assert_called_once()
@parameterized.named_parameters(
{'testcase_name': 'static_shapes', 'static_shapes': True},
......@@ -517,7 +518,6 @@ class ContextRCNNMetaArchTest(test_case.TestCase, parameterized.TestCase):
}
side_inputs = model.get_side_inputs(features)
prediction_dict = model.predict(preprocessed_inputs, true_image_shapes,
**side_inputs)
return (prediction_dict['rpn_box_predictor_features'],
......
......@@ -1948,9 +1948,16 @@ class FasterRCNNMetaArch(model.DetectionModel):
Returns:
A float32 tensor with shape [K, new_height, new_width, depth].
"""
features_to_crop = [features_to_crop]
num_levels = len(features_to_crop)
box_levels = None
if num_levels != 1:
# If there are multiple levels to select, get the box levels
box_levels = ops.fpn_feature_levels(num_levels, num_levels - 1,
1.0/224, proposal_boxes_normalized)
cropped_regions = self._flatten_first_two_dimensions(
self._crop_and_resize_fn(
features_to_crop, proposal_boxes_normalized,
features_to_crop, proposal_boxes_normalized, box_levels,
[self._initial_crop_size, self._initial_crop_size]))
return self._maxpool_layer(cropped_regions)
......@@ -2517,8 +2524,8 @@ class FasterRCNNMetaArch(model.DetectionModel):
image_shape[1], image_shape[2], check_range=False).get()
flat_cropped_gt_mask = self._crop_and_resize_fn(
tf.expand_dims(flat_gt_masks, -1),
tf.expand_dims(flat_normalized_proposals, axis=1),
[tf.expand_dims(flat_gt_masks, -1)],
tf.expand_dims(flat_normalized_proposals, axis=1), None,
[mask_height, mask_width])
# Without stopping gradients into cropped groundtruth masks the
# performance with 100-padded groundtruth masks when batch size > 1 is
......@@ -2547,7 +2554,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
if second_stage_mask_loss is not None:
mask_loss = tf.multiply(self._second_stage_mask_loss_weight,
second_stage_mask_loss, name='mask_loss')
loss_dict[mask_loss.op.name] = mask_loss
loss_dict['Loss/BoxClassifierLoss/mask_loss'] = mask_loss
return loss_dict
def _get_mask_proposal_boxes_and_classes(
......
......@@ -34,7 +34,7 @@ from object_detection.meta_architectures import faster_rcnn_meta_arch
from object_detection.protos import box_predictor_pb2
from object_detection.protos import hyperparams_pb2
from object_detection.protos import post_processing_pb2
from object_detection.utils import ops
from object_detection.utils import spatial_transform_ops as spatial_ops
from object_detection.utils import test_case
from object_detection.utils import test_utils
from object_detection.utils import tf_version
......@@ -377,8 +377,9 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
max_negatives_per_positive=None)
crop_and_resize_fn = (
ops.matmul_crop_and_resize
if use_matmul_crop_and_resize else ops.native_crop_and_resize)
spatial_ops.multilevel_matmul_crop_and_resize
if use_matmul_crop_and_resize
else spatial_ops.multilevel_native_crop_and_resize)
common_kwargs = {
'is_training':
is_training,
......
......@@ -432,14 +432,9 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
return eval_metric_ops
def _check_mask_type_and_value(array_name, masks):
"""Checks whether mask dtype is uint8 and the values are either 0 or 1."""
if masks.dtype != np.uint8:
raise ValueError('{} must be of type np.uint8. Found {}.'.format(
array_name, masks.dtype))
if np.any(np.logical_and(masks != 0, masks != 1)):
raise ValueError('{} elements can only be either 0 or 1.'.format(
array_name))
def convert_masks_to_binary(masks):
"""Converts masks to 0 or 1 and uint8 type."""
return (masks > 0).astype(np.uint8)
class CocoKeypointEvaluator(CocoDetectionEvaluator):
......@@ -952,9 +947,8 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
groundtruth_instance_masks = groundtruth_dict[
standard_fields.InputDataFields.groundtruth_instance_masks]
_check_mask_type_and_value(standard_fields.InputDataFields.
groundtruth_instance_masks,
groundtruth_instance_masks)
groundtruth_instance_masks = convert_masks_to_binary(
groundtruth_instance_masks)
self._groundtruth_list.extend(
coco_tools.
ExportSingleImageGroundtruthToCoco(
......@@ -1013,9 +1007,7 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
'are incompatible: {} vs {}'.format(
groundtruth_masks_shape,
detection_masks.shape))
_check_mask_type_and_value(standard_fields.DetectionResultFields.
detection_masks,
detection_masks)
detection_masks = convert_masks_to_binary(detection_masks)
self._detection_masks_list.extend(
coco_tools.ExportSingleImageDetectionMasksToCoco(
image_id=image_id,
......
......@@ -1424,14 +1424,16 @@ class CocoMaskEvaluationTest(tf.test.TestCase):
image_id='image3',
detections_dict={
standard_fields.DetectionResultFields.detection_boxes:
np.array([[25., 25., 50., 50.]]),
np.array([[25., 25., 50., 50.]]),
standard_fields.DetectionResultFields.detection_scores:
np.array([.8]),
np.array([.8]),
standard_fields.DetectionResultFields.detection_classes:
np.array([1]),
np.array([1]),
standard_fields.DetectionResultFields.detection_masks:
np.pad(np.ones([1, 25, 25], dtype=np.uint8),
((0, 0), (10, 10), (10, 10)), mode='constant')
# The value of 5 is equivalent to 1, since masks will be
# thresholded and binarized before evaluation.
np.pad(5 * np.ones([1, 25, 25], dtype=np.uint8),
((0, 0), (10, 10), (10, 10)), mode='constant')
})
metrics = coco_evaluator.evaluate()
self.assertAlmostEqual(metrics['DetectionMasks_Precision/mAP'], 1.0)
......
......@@ -136,15 +136,15 @@ def build_groundtruth_dictionary(data, class_label_map):
dictionary = {
standard_fields.InputDataFields.groundtruth_boxes:
data_location[['YMin', 'XMin', 'YMax', 'XMax']].as_matrix(),
data_location[['YMin', 'XMin', 'YMax', 'XMax']].to_numpy(),
standard_fields.InputDataFields.groundtruth_classes:
data_location['LabelName'].map(lambda x: class_label_map[x]
).as_matrix(),
).to_numpy(),
standard_fields.InputDataFields.groundtruth_group_of:
data_location['IsGroupOf'].as_matrix().astype(int),
data_location['IsGroupOf'].to_numpy().astype(int),
standard_fields.InputDataFields.groundtruth_image_classes:
data_labels['LabelName'].map(lambda x: class_label_map[x]
).as_matrix(),
).to_numpy(),
}
if 'Mask' in data_location:
......@@ -179,9 +179,9 @@ def build_predictions_dictionary(data, class_label_map):
"""
dictionary = {
standard_fields.DetectionResultFields.detection_classes:
data['LabelName'].map(lambda x: class_label_map[x]).as_matrix(),
data['LabelName'].map(lambda x: class_label_map[x]).to_numpy(),
standard_fields.DetectionResultFields.detection_scores:
data['Score'].as_matrix()
data['Score'].to_numpy()
}
if 'Mask' in data:
......@@ -192,6 +192,6 @@ def build_predictions_dictionary(data, class_label_map):
else:
dictionary[standard_fields.DetectionResultFields.detection_boxes] = data[[
'YMin', 'XMin', 'YMax', 'XMax'
]].as_matrix()
]].to_numpy()
return dictionary
......@@ -53,16 +53,16 @@ def build_groundtruth_vrd_dictionary(data, class_label_map,
boxes = np.zeros(data_boxes.shape[0], dtype=vrd_evaluation.vrd_box_data_type)
boxes['subject'] = data_boxes[['YMin1', 'XMin1', 'YMax1',
'XMax1']].as_matrix()
boxes['object'] = data_boxes[['YMin2', 'XMin2', 'YMax2', 'XMax2']].as_matrix()
'XMax1']].to_numpy()
boxes['object'] = data_boxes[['YMin2', 'XMin2', 'YMax2', 'XMax2']].to_numpy()
labels = np.zeros(data_boxes.shape[0], dtype=vrd_evaluation.label_data_type)
labels['subject'] = data_boxes['LabelName1'].map(
lambda x: class_label_map[x]).as_matrix()
lambda x: class_label_map[x]).to_numpy()
labels['object'] = data_boxes['LabelName2'].map(
lambda x: class_label_map[x]).as_matrix()
lambda x: class_label_map[x]).to_numpy()
labels['relation'] = data_boxes['RelationshipLabel'].map(
lambda x: relationship_label_map[x]).as_matrix()
lambda x: relationship_label_map[x]).to_numpy()
return {
standard_fields.InputDataFields.groundtruth_boxes:
......@@ -71,7 +71,7 @@ def build_groundtruth_vrd_dictionary(data, class_label_map,
labels,
standard_fields.InputDataFields.groundtruth_image_classes:
data_labels['LabelName'].map(lambda x: class_label_map[x])
.as_matrix(),
.to_numpy(),
}
......@@ -104,16 +104,16 @@ def build_predictions_vrd_dictionary(data, class_label_map,
boxes = np.zeros(data_boxes.shape[0], dtype=vrd_evaluation.vrd_box_data_type)
boxes['subject'] = data_boxes[['YMin1', 'XMin1', 'YMax1',
'XMax1']].as_matrix()
boxes['object'] = data_boxes[['YMin2', 'XMin2', 'YMax2', 'XMax2']].as_matrix()
'XMax1']].to_numpy()
boxes['object'] = data_boxes[['YMin2', 'XMin2', 'YMax2', 'XMax2']].to_numpy()
labels = np.zeros(data_boxes.shape[0], dtype=vrd_evaluation.label_data_type)
labels['subject'] = data_boxes['LabelName1'].map(
lambda x: class_label_map[x]).as_matrix()
lambda x: class_label_map[x]).to_numpy()
labels['object'] = data_boxes['LabelName2'].map(
lambda x: class_label_map[x]).as_matrix()
lambda x: class_label_map[x]).to_numpy()
labels['relation'] = data_boxes['RelationshipLabel'].map(
lambda x: relationship_label_map[x]).as_matrix()
lambda x: relationship_label_map[x]).to_numpy()
return {
standard_fields.DetectionResultFields.detection_boxes:
......@@ -121,5 +121,5 @@ def build_predictions_vrd_dictionary(data, class_label_map,
standard_fields.DetectionResultFields.detection_classes:
labels,
standard_fields.DetectionResultFields.detection_scores:
data_boxes['Score'].as_matrix()
data_boxes['Score'].to_numpy()
}
......@@ -43,7 +43,6 @@ from object_detection.utils import visualization_utils as vis_utils
# pylint: disable=g-import-not-at-top
try:
from tensorflow.contrib import learn as contrib_learn
from tensorflow.contrib import tpu as contrib_tpu
except ImportError:
# TF 2.0 doesn't ship with contrib.
pass
......@@ -94,6 +93,15 @@ def _prepare_groundtruth_for_eval(detection_model, class_agnostic,
of groundtruth boxes per image..
'groundtruth_keypoints': [batch_size, num_boxes, num_keypoints, 2] float32
tensor of keypoints (if provided in groundtruth).
'groundtruth_dp_num_points_list': [batch_size, num_boxes] int32 tensor
with the number of DensePose points for each instance (if provided in
groundtruth).
'groundtruth_dp_part_ids_list': [batch_size, num_boxes,
max_sampled_points] int32 tensor with the part ids for each DensePose
sampled point (if provided in groundtruth).
'groundtruth_dp_surface_coords_list': [batch_size, num_boxes,
max_sampled_points, 4] containing the DensePose surface coordinates for
each sampled point (if provided in groundtruth).
'groundtruth_group_of': [batch_size, num_boxes] bool tensor indicating
group_of annotations (if provided in groundtruth).
'groundtruth_labeled_classes': [batch_size, num_classes] int64
......@@ -164,6 +172,21 @@ def _prepare_groundtruth_for_eval(detection_model, class_agnostic,
groundtruth[input_data_fields.groundtruth_labeled_classes] = tf.stack(
labeled_classes)
if detection_model.groundtruth_has_field(
fields.BoxListFields.densepose_num_points):
groundtruth[input_data_fields.groundtruth_dp_num_points] = tf.stack(
detection_model.groundtruth_lists(
fields.BoxListFields.densepose_num_points))
if detection_model.groundtruth_has_field(
fields.BoxListFields.densepose_part_ids):
groundtruth[input_data_fields.groundtruth_dp_part_ids] = tf.stack(
detection_model.groundtruth_lists(
fields.BoxListFields.densepose_part_ids))
if detection_model.groundtruth_has_field(
fields.BoxListFields.densepose_surface_coords):
groundtruth[input_data_fields.groundtruth_dp_surface_coords] = tf.stack(
detection_model.groundtruth_lists(
fields.BoxListFields.densepose_surface_coords))
groundtruth[input_data_fields.num_groundtruth_boxes] = (
tf.tile([max_number_of_boxes], multiples=[groundtruth_boxes_shape[0]]))
return groundtruth
......@@ -219,6 +242,9 @@ def unstack_batch(tensor_dict, unpad_groundtruth_tensors=True):
fields.InputDataFields.groundtruth_boxes,
fields.InputDataFields.groundtruth_keypoints,
fields.InputDataFields.groundtruth_keypoint_visibilities,
fields.InputDataFields.groundtruth_dp_num_points,
fields.InputDataFields.groundtruth_dp_part_ids,
fields.InputDataFields.groundtruth_dp_surface_coords,
fields.InputDataFields.groundtruth_group_of,
fields.InputDataFields.groundtruth_difficult,
fields.InputDataFields.groundtruth_is_crowd,
......@@ -269,6 +295,18 @@ def provide_groundtruth(model, labels):
if fields.InputDataFields.groundtruth_keypoint_visibilities in labels:
gt_keypoint_visibilities_list = labels[
fields.InputDataFields.groundtruth_keypoint_visibilities]
gt_dp_num_points_list = None
if fields.InputDataFields.groundtruth_dp_num_points in labels:
gt_dp_num_points_list = labels[
fields.InputDataFields.groundtruth_dp_num_points]
gt_dp_part_ids_list = None
if fields.InputDataFields.groundtruth_dp_part_ids in labels:
gt_dp_part_ids_list = labels[
fields.InputDataFields.groundtruth_dp_part_ids]
gt_dp_surface_coords_list = None
if fields.InputDataFields.groundtruth_dp_surface_coords in labels:
gt_dp_surface_coords_list = labels[
fields.InputDataFields.groundtruth_dp_surface_coords]
gt_weights_list = None
if fields.InputDataFields.groundtruth_weights in labels:
gt_weights_list = labels[fields.InputDataFields.groundtruth_weights]
......@@ -297,13 +335,16 @@ def provide_groundtruth(model, labels):
groundtruth_masks_list=gt_masks_list,
groundtruth_keypoints_list=gt_keypoints_list,
groundtruth_keypoint_visibilities_list=gt_keypoint_visibilities_list,
groundtruth_dp_num_points_list=gt_dp_num_points_list,
groundtruth_dp_part_ids_list=gt_dp_part_ids_list,
groundtruth_dp_surface_coords_list=gt_dp_surface_coords_list,
groundtruth_weights_list=gt_weights_list,
groundtruth_is_crowd_list=gt_is_crowd_list,
groundtruth_group_of_list=gt_group_of_list,
groundtruth_area_list=gt_area_list)
def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
def create_model_fn(detection_model_fn, configs, hparams=None, use_tpu=False,
postprocess_on_cpu=False):
"""Creates a model function for `Estimator`.
......@@ -377,7 +418,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
side_inputs = detection_model.get_side_inputs(features)
if use_tpu and train_config.use_bfloat16:
with contrib_tpu.bfloat16_scope():
with tf.tpu.bfloat16_scope():
prediction_dict = detection_model.predict(
preprocessed_images,
features[fields.InputDataFields.true_image_shape], **side_inputs)
......@@ -392,7 +433,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
if mode in (tf.estimator.ModeKeys.EVAL, tf.estimator.ModeKeys.PREDICT):
if use_tpu and postprocess_on_cpu:
detections = contrib_tpu.outside_compilation(
detections = tf.tpu.outside_compilation(
postprocess_wrapper,
(prediction_dict,
features[fields.InputDataFields.true_image_shape]))
......@@ -468,7 +509,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
if mode == tf.estimator.ModeKeys.TRAIN:
if use_tpu:
training_optimizer = contrib_tpu.CrossShardOptimizer(training_optimizer)
training_optimizer = tf.tpu.CrossShardOptimizer(training_optimizer)
# Optionally freeze some layers by setting their gradients to be zero.
trainable_variables = None
......@@ -588,7 +629,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
# EVAL executes on CPU, so use regular non-TPU EstimatorSpec.
if use_tpu and mode != tf.estimator.ModeKeys.EVAL:
return contrib_tpu.TPUEstimatorSpec(
return tf.estimator.tpu.TPUEstimatorSpec(
mode=mode,
scaffold_fn=scaffold_fn,
predictions=detections,
......@@ -619,8 +660,8 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False,
def create_estimator_and_inputs(run_config,
hparams,
pipeline_config_path,
hparams=None,
pipeline_config_path=None,
config_override=None,
train_steps=None,
sample_1_of_n_eval_examples=1,
......@@ -639,7 +680,7 @@ def create_estimator_and_inputs(run_config,
Args:
run_config: A `RunConfig`.
hparams: A `HParams`.
hparams: (optional) A `HParams`.
pipeline_config_path: A path to a pipeline config file.
config_override: A pipeline_pb2.TrainEvalPipelineConfig text proto to
override the config from `pipeline_config_path`.
......@@ -762,14 +803,14 @@ def create_estimator_and_inputs(run_config,
model_config=model_config, predict_input_config=eval_input_configs[0])
# Read export_to_tpu from hparams if not passed.
if export_to_tpu is None:
if export_to_tpu is None and hparams is not None:
export_to_tpu = hparams.get('export_to_tpu', False)
tf.logging.info('create_estimator_and_inputs: use_tpu %s, export_to_tpu %s',
use_tpu, export_to_tpu)
model_fn = model_fn_creator(detection_model_fn, configs, hparams, use_tpu,
postprocess_on_cpu)
if use_tpu_estimator:
estimator = contrib_tpu.TPUEstimator(
estimator = tf.estimator.tpu.TPUEstimator(
model_fn=model_fn,
train_batch_size=train_config.batch_size,
# For each core, only batch size 1 is supported for eval.
......
......@@ -23,6 +23,7 @@ import os
import time
import tensorflow.compat.v1 as tf
import tensorflow.compat.v2 as tf2
from object_detection import eval_util
from object_detection import inputs
......@@ -93,6 +94,12 @@ def _compute_losses_and_predictions_dicts(
instance masks for objects.
labels[fields.InputDataFields.groundtruth_keypoints] is a
float32 tensor containing keypoints for each box.
labels[fields.InputDataFields.groundtruth_dp_num_points] is an int32
tensor with the number of sampled DensePose points per object.
labels[fields.InputDataFields.groundtruth_dp_part_ids] is an int32
tensor with the DensePose part ids (0-indexed) per object.
labels[fields.InputDataFields.groundtruth_dp_surface_coords] is a
float32 tensor with the DensePose surface coordinates.
labels[fields.InputDataFields.groundtruth_group_of] is a tf.bool tensor
containing group_of annotations.
labels[fields.InputDataFields.groundtruth_labeled_classes] is a float32
......@@ -111,7 +118,8 @@ def _compute_losses_and_predictions_dicts(
prediction_dict = model.predict(
preprocessed_images,
features[fields.InputDataFields.true_image_shape])
features[fields.InputDataFields.true_image_shape],
**model.get_side_inputs(features))
prediction_dict = ops.bfloat16_to_float32_nested(prediction_dict)
losses_dict = model.loss(
......@@ -195,6 +203,17 @@ def eager_train_step(detection_model,
labels[fields.InputDataFields.groundtruth_keypoints] is a
[batch_size, num_boxes, num_keypoints, 2] float32 tensor containing
keypoints for each box.
labels[fields.InputDataFields.groundtruth_dp_num_points] is a
[batch_size, num_boxes] int32 tensor with the number of DensePose
sampled points per instance.
labels[fields.InputDataFields.groundtruth_dp_part_ids] is a
[batch_size, num_boxes, max_sampled_points] int32 tensor with the
part ids (0-indexed) for each instance.
labels[fields.InputDataFields.groundtruth_dp_surface_coords] is a
[batch_size, num_boxes, max_sampled_points, 4] float32 tensor with the
surface coordinates for each point. Each surface coordinate is of the
form (y, x, v, u) where (y, x) are normalized image locations and
(v, u) are part-relative normalized surface coordinates.
labels[fields.InputDataFields.groundtruth_labeled_classes] is a float32
k-hot tensor of classes.
unpad_groundtruth_tensors: A parameter passed to unstack_batch.
......@@ -336,11 +355,18 @@ def load_fine_tune_checkpoint(
labels)
strategy = tf.compat.v2.distribute.get_strategy()
strategy.experimental_run_v2(
_dummy_computation_fn, args=(
features,
labels,
))
if hasattr(tf.distribute.Strategy, 'run'):
strategy.run(
_dummy_computation_fn, args=(
features,
labels,
))
else:
strategy.experimental_run_v2(
_dummy_computation_fn, args=(
features,
labels,
))
restore_from_objects_dict = model.restore_from_objects(
fine_tune_checkpoint_type=checkpoint_type)
......@@ -391,6 +417,7 @@ def train_loop(
save_final_config=False,
checkpoint_every_n=1000,
checkpoint_max_to_keep=7,
record_summaries=True,
**kwargs):
"""Trains a model using eager + functions.
......@@ -420,6 +447,7 @@ def train_loop(
Checkpoint every n training steps.
checkpoint_max_to_keep:
int, the number of most recent checkpoints to keep in the model directory.
record_summaries: Boolean, whether or not to record summaries.
**kwargs: Additional keyword arguments for configuration override.
"""
## Parse the configs
......@@ -506,8 +534,11 @@ def train_loop(
# is the chief.
summary_writer_filepath = get_filepath(strategy,
os.path.join(model_dir, 'train'))
summary_writer = tf.compat.v2.summary.create_file_writer(
summary_writer_filepath)
if record_summaries:
summary_writer = tf.compat.v2.summary.create_file_writer(
summary_writer_filepath)
else:
summary_writer = tf2.summary.create_noop_writer()
if use_tpu:
num_steps_per_iteration = 100
......@@ -562,8 +593,12 @@ def train_loop(
def _sample_and_train(strategy, train_step_fn, data_iterator):
features, labels = data_iterator.next()
per_replica_losses = strategy.experimental_run_v2(
train_step_fn, args=(features, labels))
if hasattr(tf.distribute.Strategy, 'run'):
per_replica_losses = strategy.run(
train_step_fn, args=(features, labels))
else:
per_replica_losses = strategy.experimental_run_v2(
train_step_fn, args=(features, labels))
# TODO(anjalisridhar): explore if it is safe to remove the
## num_replicas scaling of the loss and switch this to a ReduceOp.Mean
return strategy.reduce(tf.distribute.ReduceOp.SUM,
......@@ -575,7 +610,9 @@ def train_loop(
if num_steps_per_iteration > 1:
for _ in tf.range(num_steps_per_iteration - 1):
_sample_and_train(strategy, train_step_fn, data_iterator)
# Following suggestion on yaqs/5402607292645376
with tf.name_scope(''):
_sample_and_train(strategy, train_step_fn, data_iterator)
return _sample_and_train(strategy, train_step_fn, data_iterator)
......@@ -767,7 +804,16 @@ def eager_eval_loop(
name='eval_side_by_side_' + str(i),
step=global_step,
data=sbys_images,
max_outputs=1)
max_outputs=eval_config.num_visualizations)
if eval_util.has_densepose(eval_dict):
dp_image_list = vutils.draw_densepose_visualizations(
eval_dict)
dp_images = tf.concat(dp_image_list, axis=0)
tf.compat.v2.summary.image(
name='densepose_detections_' + str(i),
step=global_step,
data=dp_images,
max_outputs=eval_config.num_visualizations)
if evaluators is None:
if class_agnostic:
......@@ -817,6 +863,7 @@ def eval_continuously(
checkpoint_dir=None,
wait_interval=180,
timeout=3600,
eval_index=None,
**kwargs):
"""Run continuous evaluation of a detection model eagerly.
......@@ -846,6 +893,8 @@ def eval_continuously(
new checkpoint.
timeout: The maximum number of seconds to wait for a checkpoint. Execution
will terminate if no new checkpoints are found after these many seconds.
eval_index: int, optional If give, only evaluate the dataset at the given
index.
**kwargs: Additional keyword arguments for configuration override.
"""
......@@ -899,6 +948,11 @@ def eval_continuously(
model=detection_model)
eval_inputs.append((eval_input_config.name, next_eval_input))
if eval_index is not None:
eval_inputs = [eval_inputs[eval_index]]
tf.logging.info('eval_index selected - {}'.format(
eval_inputs))
global_step = tf.compat.v2.Variable(
0, trainable=False, dtype=tf.compat.v2.dtypes.int64)
......
......@@ -22,7 +22,6 @@ from absl import flags
import tensorflow.compat.v1 as tf
from object_detection import model_hparams
from object_detection import model_lib
flags.DEFINE_string(
......@@ -41,10 +40,6 @@ flags.DEFINE_integer('sample_1_of_n_eval_on_train_examples', 5, 'Will sample '
'one of every n train input examples for evaluation, '
'where n is provided. This is only used if '
'`eval_training_data` is True.')
flags.DEFINE_string(
'hparams_overrides', None, 'Hyperparameter overrides, '
'represented as a string containing comma-separated '
'hparam_name=value pairs.')
flags.DEFINE_string(
'checkpoint_dir', None, 'Path to directory holding a checkpoint. If '
'`checkpoint_dir` is provided, this binary operates in eval-only mode, '
......@@ -68,7 +63,6 @@ def main(unused_argv):
train_and_eval_dict = model_lib.create_estimator_and_inputs(
run_config=config,
hparams=model_hparams.create_hparams(FLAGS.hparams_overrides),
pipeline_config_path=FLAGS.pipeline_config_path,
train_steps=FLAGS.num_train_steps,
sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples,
......
......@@ -54,10 +54,19 @@ flags.DEFINE_integer('eval_timeout', 3600, 'Number of seconds to wait for an'
'evaluation checkpoint before exiting.')
flags.DEFINE_bool('use_tpu', False, 'Whether the job is executing on a TPU.')
flags.DEFINE_string(
'tpu_name',
default=None,
help='Name of the Cloud TPU for Cluster Resolvers.')
flags.DEFINE_integer(
'num_workers', 1, 'When num_workers > 1, training uses '
'MultiWorkerMirroredStrategy. When num_workers = 1 it uses '
'MirroredStrategy.')
flags.DEFINE_integer(
'checkpoint_every_n', 1000, 'Integer defining how often we checkpoint.')
flags.DEFINE_boolean('record_summaries', True,
('Whether or not to record summaries during'
' training.'))
FLAGS = flags.FLAGS
......@@ -79,7 +88,10 @@ def main(unused_argv):
wait_interval=300, timeout=FLAGS.eval_timeout)
else:
if FLAGS.use_tpu:
resolver = tf.distribute.cluster_resolver.TPUClusterResolver()
# TPU is automatically inferred if tpu_name is None and
# we are running under cloud ai-platform.
resolver = tf.distribute.cluster_resolver.TPUClusterResolver(
FLAGS.tpu_name)
tf.config.experimental_connect_to_cluster(resolver)
tf.tpu.experimental.initialize_tpu_system(resolver)
strategy = tf.distribute.experimental.TPUStrategy(resolver)
......@@ -93,7 +105,9 @@ def main(unused_argv):
pipeline_config_path=FLAGS.pipeline_config_path,
model_dir=FLAGS.model_dir,
train_steps=FLAGS.num_train_steps,
use_tpu=FLAGS.use_tpu)
use_tpu=FLAGS.use_tpu,
checkpoint_every_n=FLAGS.checkpoint_every_n,
record_summaries=FLAGS.record_summaries)
if __name__ == '__main__':
tf.compat.v1.app.run()
......@@ -26,18 +26,8 @@ from absl import flags
import tensorflow.compat.v1 as tf
from object_detection import model_hparams
from object_detection import model_lib
# pylint: disable=g-import-not-at-top
try:
from tensorflow.contrib import cluster_resolver as contrib_cluster_resolver
from tensorflow.contrib import tpu as contrib_tpu
except ImportError:
# TF 2.0 doesn't ship with contrib.
pass
# pylint: enable=g-import-not-at-top
tf.flags.DEFINE_bool('use_tpu', True, 'Use TPUs rather than plain CPUs')
# Cloud TPU Cluster Resolvers
......@@ -67,10 +57,6 @@ flags.DEFINE_string('mode', 'train',
flags.DEFINE_integer('train_batch_size', None, 'Batch size for training. If '
'this is not provided, batch size is read from training '
'config.')
flags.DEFINE_string(
'hparams_overrides', None, 'Comma-separated list of '
'hyperparameters to override defaults.')
flags.DEFINE_integer('num_train_steps', None, 'Number of train steps.')
flags.DEFINE_boolean('eval_training_data', False,
'If training data should be evaluated for this job.')
......@@ -99,15 +85,15 @@ def main(unused_argv):
flags.mark_flag_as_required('pipeline_config_path')
tpu_cluster_resolver = (
contrib_cluster_resolver.TPUClusterResolver(
tf.distribute.cluster_resolver.TPUClusterResolver(
tpu=[FLAGS.tpu_name], zone=FLAGS.tpu_zone, project=FLAGS.gcp_project))
tpu_grpc_url = tpu_cluster_resolver.get_master()
config = contrib_tpu.RunConfig(
config = tf.estimator.tpu.RunConfig(
master=tpu_grpc_url,
evaluation_master=tpu_grpc_url,
model_dir=FLAGS.model_dir,
tpu_config=contrib_tpu.TPUConfig(
tpu_config=tf.estimator.tpu.TPUConfig(
iterations_per_loop=FLAGS.iterations_per_loop,
num_shards=FLAGS.num_shards))
......@@ -117,7 +103,6 @@ def main(unused_argv):
train_and_eval_dict = model_lib.create_estimator_and_inputs(
run_config=config,
hparams=model_hparams.create_hparams(FLAGS.hparams_overrides),
pipeline_config_path=FLAGS.pipeline_config_path,
train_steps=FLAGS.num_train_steps,
sample_1_of_n_eval_examples=FLAGS.sample_1_of_n_eval_examples,
......
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""MobileNet V2[1] feature extractor for CenterNet[2] meta architecture.
[1]: https://arxiv.org/abs/1801.04381
[2]: https://arxiv.org/abs/1904.07850
"""
import tensorflow.compat.v1 as tf
from object_detection.meta_architectures import center_net_meta_arch
from object_detection.models.keras_models import mobilenet_v2 as mobilenetv2
class CenterNetMobileNetV2FeatureExtractor(
center_net_meta_arch.CenterNetFeatureExtractor):
"""The MobileNet V2 feature extractor for CenterNet."""
def __init__(self,
mobilenet_v2_net,
channel_means=(0., 0., 0.),
channel_stds=(1., 1., 1.),
bgr_ordering=False):
"""Intializes the feature extractor.
Args:
mobilenet_v2_net: The underlying mobilenet_v2 network to use.
channel_means: A tuple of floats, denoting the mean of each channel
which will be subtracted from it.
channel_stds: A tuple of floats, denoting the standard deviation of each
channel. Each channel will be divided by its standard deviation value.
bgr_ordering: bool, if set will change the channel ordering to be in the
[blue, red, green] order.
"""
super(CenterNetMobileNetV2FeatureExtractor, self).__init__(
channel_means=channel_means,
channel_stds=channel_stds,
bgr_ordering=bgr_ordering)
self._network = mobilenet_v2_net
output = self._network(self._network.input)
# TODO(nkhadke): Try out MobileNet+FPN next (skip connections are cheap and
# should help with performance).
# MobileNet by itself transforms a 224x224x3 volume into a 7x7x1280, which
# leads to a stride of 32. We perform upsampling to get it to a target
# stride of 4.
for num_filters in [256, 128, 64]:
# 1. We use a simple convolution instead of a deformable convolution
conv = tf.keras.layers.Conv2D(
filters=num_filters, kernel_size=1, strides=1, padding='same')
output = conv(output)
output = tf.keras.layers.BatchNormalization()(output)
output = tf.keras.layers.ReLU()(output)
# 2. We use the default initialization for the convolution layers
# instead of initializing it to do bilinear upsampling.
conv_transpose = tf.keras.layers.Conv2DTranspose(
filters=num_filters, kernel_size=3, strides=2, padding='same')
output = conv_transpose(output)
output = tf.keras.layers.BatchNormalization()(output)
output = tf.keras.layers.ReLU()(output)
self._network = tf.keras.models.Model(
inputs=self._network.input, outputs=output)
def preprocess(self, resized_inputs):
resized_inputs = super(CenterNetMobileNetV2FeatureExtractor,
self).preprocess(resized_inputs)
return tf.keras.applications.mobilenet_v2.preprocess_input(resized_inputs)
def load_feature_extractor_weights(self, path):
self._network.load_weights(path)
def get_base_model(self):
return self._network
def call(self, inputs):
return [self._network(inputs)]
@property
def out_stride(self):
"""The stride in the output image of the network."""
return 4
@property
def num_feature_outputs(self):
"""The number of feature outputs returned by the feature extractor."""
return 1
def get_model(self):
return self._network
def mobilenet_v2(channel_means, channel_stds, bgr_ordering):
"""The MobileNetV2 backbone for CenterNet."""
# We set 'is_training' to True for now.
network = mobilenetv2.mobilenet_v2(True, include_top=False)
return CenterNetMobileNetV2FeatureExtractor(
network,
channel_means=channel_means,
channel_stds=channel_stds,
bgr_ordering=bgr_ordering)
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Testing mobilenet_v2 feature extractor for CenterNet."""
import unittest
import numpy as np
import tensorflow.compat.v1 as tf
from object_detection.models import center_net_mobilenet_v2_feature_extractor
from object_detection.models.keras_models import mobilenet_v2
from object_detection.utils import test_case
from object_detection.utils import tf_version
@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
class CenterNetMobileNetV2FeatureExtractorTest(test_case.TestCase):
def test_center_net_mobilenet_v2_feature_extractor(self):
net = mobilenet_v2.mobilenet_v2(True, include_top=False)
model = center_net_mobilenet_v2_feature_extractor.CenterNetMobileNetV2FeatureExtractor(
net)
def graph_fn():
img = np.zeros((8, 224, 224, 3), dtype=np.float32)
processed_img = model.preprocess(img)
return model(processed_img)
outputs = self.execute(graph_fn, [])
self.assertEqual(outputs.shape, (8, 56, 56, 64))
if __name__ == '__main__':
tf.test.main()
......@@ -21,9 +21,14 @@
import tensorflow.compat.v1 as tf
from object_detection.meta_architectures.center_net_meta_arch import CenterNetFeatureExtractor
from object_detection.models.keras_models import resnet_v1
_RESNET_MODEL_OUTPUT_LAYERS = {
'resnet_v1_18': ['conv2_block2_out', 'conv3_block2_out',
'conv4_block2_out', 'conv5_block2_out'],
'resnet_v1_34': ['conv2_block3_out', 'conv3_block4_out',
'conv4_block6_out', 'conv5_block3_out'],
'resnet_v1_50': ['conv2_block3_out', 'conv3_block4_out',
'conv4_block6_out', 'conv5_block3_out'],
'resnet_v1_101': ['conv2_block3_out', 'conv3_block4_out',
......@@ -69,6 +74,10 @@ class CenterNetResnetV1FpnFeatureExtractor(CenterNetFeatureExtractor):
self._base_model = tf.keras.applications.ResNet50(weights=None)
elif resnet_type == 'resnet_v1_101':
self._base_model = tf.keras.applications.ResNet101(weights=None)
elif resnet_type == 'resnet_v1_18':
self._base_model = resnet_v1.resnet_v1_18(weights=None)
elif resnet_type == 'resnet_v1_34':
self._base_model = resnet_v1.resnet_v1_34(weights=None)
else:
raise ValueError('Unknown Resnet Model {}'.format(resnet_type))
output_layers = _RESNET_MODEL_OUTPUT_LAYERS[resnet_type]
......@@ -174,3 +183,24 @@ def resnet_v1_50_fpn(channel_means, channel_stds, bgr_ordering):
channel_means=channel_means,
channel_stds=channel_stds,
bgr_ordering=bgr_ordering)
def resnet_v1_34_fpn(channel_means, channel_stds, bgr_ordering):
"""The ResNet v1 34 FPN feature extractor."""
return CenterNetResnetV1FpnFeatureExtractor(
resnet_type='resnet_v1_34',
channel_means=channel_means,
channel_stds=channel_stds,
bgr_ordering=bgr_ordering
)
def resnet_v1_18_fpn(channel_means, channel_stds, bgr_ordering):
"""The ResNet v1 18 FPN feature extractor."""
return CenterNetResnetV1FpnFeatureExtractor(
resnet_type='resnet_v1_18',
channel_means=channel_means,
channel_stds=channel_stds,
bgr_ordering=bgr_ordering)
......@@ -31,6 +31,8 @@ class CenterNetResnetV1FpnFeatureExtractorTest(test_case.TestCase,
@parameterized.parameters(
{'resnet_type': 'resnet_v1_50'},
{'resnet_type': 'resnet_v1_101'},
{'resnet_type': 'resnet_v1_18'},
{'resnet_type': 'resnet_v1_34'},
)
def test_correct_output_size(self, resnet_type):
"""Verify that shape of features returned by the backbone is correct."""
......
......@@ -56,7 +56,7 @@ class FasterRCNNResnetV1FpnKerasFeatureExtractor(
the resnet_v1.resnet_v1_{50,101,152} models.
resnet_v1_base_model_name: model name under which to construct resnet v1.
first_stage_features_stride: See base class.
conv_hyperparameters: a `hyperparams_builder.KerasLayerHyperparams` object
conv_hyperparams: a `hyperparams_builder.KerasLayerHyperparams` object
containing convolution hyperparameters for the layers added on top of
the base feature extractor.
batch_norm_trainable: See base class.
......@@ -143,19 +143,21 @@ class FasterRCNNResnetV1FpnKerasFeatureExtractor(
with tf.name_scope('ResnetV1FPN'):
full_resnet_v1_model = self._resnet_v1_base_model(
batchnorm_training=self._train_batch_norm,
conv_hyperparams=(self._conv_hyperparams
if self._override_base_feature_extractor_hyperparams
conv_hyperparams=(self._conv_hyperparams if
self._override_base_feature_extractor_hyperparams
else None),
classes=None,
weights=None,
include_top=False)
output_layers = _RESNET_MODEL_OUTPUT_LAYERS[self._resnet_v1_base_model_name]
output_layers = _RESNET_MODEL_OUTPUT_LAYERS[
self._resnet_v1_base_model_name]
outputs = [full_resnet_v1_model.get_layer(output_layer_name).output
for output_layer_name in output_layers]
self.classification_backbone = tf.keras.Model(
inputs=full_resnet_v1_model.inputs,
outputs=outputs)
backbone_outputs = self.classification_backbone(full_resnet_v1_model.inputs)
backbone_outputs = self.classification_backbone(
full_resnet_v1_model.inputs)
# construct FPN feature generator
self._base_fpn_max_level = min(self._fpn_max_level, 5)
......@@ -236,7 +238,7 @@ class FasterRCNNResnetV1FpnKerasFeatureExtractor(
"""
with tf.name_scope(name):
with tf.name_scope('ResnetV1FPN'):
# TODO: Add a batchnorm layer between two fc layers.
# TODO(yiming): Add a batchnorm layer between two fc layers.
feature_extractor_model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(units=1024, activation='relu'),
......@@ -283,12 +285,15 @@ class FasterRCNNResnet50FpnKerasFeatureExtractor(
fpn_min_level=fpn_min_level,
fpn_max_level=fpn_max_level,
additional_layer_depth=additional_layer_depth,
override_base_feature_extractor_hyperparams=override_base_feature_extractor_hyperparams)
override_base_feature_extractor_hyperparams=
override_base_feature_extractor_hyperparams
)
class FasterRCNNResnet101FpnKerasFeatureExtractor(
FasterRCNNResnetV1FpnKerasFeatureExtractor):
"""Faster RCNN with Resnet101 FPN feature extractor."""
def __init__(self,
is_training,
first_stage_features_stride=16,
......@@ -323,7 +328,8 @@ class FasterRCNNResnet101FpnKerasFeatureExtractor(
fpn_min_level=fpn_min_level,
fpn_max_level=fpn_max_level,
additional_layer_depth=additional_layer_depth,
override_base_feature_extractor_hyperparams=override_base_feature_extractor_hyperparams)
override_base_feature_extractor_hyperparams=
override_base_feature_extractor_hyperparams)
class FasterRCNNResnet152FpnKerasFeatureExtractor(
......@@ -364,4 +370,5 @@ class FasterRCNNResnet152FpnKerasFeatureExtractor(
fpn_min_level=fpn_min_level,
fpn_max_level=fpn_max_level,
additional_layer_depth=additional_layer_depth,
override_base_feature_extractor_hyperparams=override_base_feature_extractor_hyperparams)
override_base_feature_extractor_hyperparams=
override_base_feature_extractor_hyperparams)
......@@ -21,8 +21,8 @@ from google.protobuf import text_format
from object_detection.builders import hyperparams_builder
from object_detection.models import faster_rcnn_resnet_v1_fpn_keras_feature_extractor as frcnn_res_fpn
from object_detection.utils import tf_version
from object_detection.protos import hyperparams_pb2
from object_detection.utils import tf_version
@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
......@@ -40,7 +40,7 @@ class FasterRCNNResnetV1FpnKerasFeatureExtractorTest(tf.test.TestCase):
}
}
"""
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams)
text_format.Parse(conv_hyperparams_text_proto, conv_hyperparams)
return hyperparams_builder.KerasLayerHyperparams(conv_hyperparams)
def _build_feature_extractor(self):
......
......@@ -21,6 +21,7 @@ from __future__ import print_function
import tensorflow.compat.v1 as tf
from tensorflow.python.keras.applications import resnet
from object_detection.core import freezable_batch_norm
from object_detection.models.keras_models import model_utils
......@@ -95,11 +96,11 @@ class _LayersOverride(object):
self.regularizer = tf.keras.regularizers.l2(weight_decay)
self.initializer = tf.variance_scaling_initializer()
def _FixedPaddingLayer(self, kernel_size, rate=1):
def _FixedPaddingLayer(self, kernel_size, rate=1): # pylint: disable=invalid-name
return tf.keras.layers.Lambda(
lambda x: _fixed_padding(x, kernel_size, rate))
def Conv2D(self, filters, kernel_size, **kwargs):
def Conv2D(self, filters, kernel_size, **kwargs): # pylint: disable=invalid-name
"""Builds a Conv2D layer according to the current Object Detection config.
Overrides the Keras Resnet application's convolutions with ones that
......@@ -141,7 +142,7 @@ class _LayersOverride(object):
else:
return tf.keras.layers.Conv2D(filters, kernel_size, **kwargs)
def Activation(self, *args, **kwargs): # pylint: disable=unused-argument
def Activation(self, *args, **kwargs): # pylint: disable=unused-argument,invalid-name
"""Builds an activation layer.
Overrides the Keras application Activation layer specified by the
......@@ -163,7 +164,7 @@ class _LayersOverride(object):
else:
return tf.keras.layers.Lambda(tf.nn.relu, name=name)
def BatchNormalization(self, **kwargs):
def BatchNormalization(self, **kwargs): # pylint: disable=invalid-name
"""Builds a normalization layer.
Overrides the Keras application batch norm with the norm specified by the
......@@ -191,7 +192,7 @@ class _LayersOverride(object):
momentum=self._default_batchnorm_momentum,
**kwargs)
def Input(self, shape):
def Input(self, shape): # pylint: disable=invalid-name
"""Builds an Input layer.
Overrides the Keras application Input layer with one that uses a
......@@ -219,7 +220,7 @@ class _LayersOverride(object):
input=input_tensor, shape=[None] + shape)
return model_utils.input_layer(shape, placeholder_with_default)
def MaxPooling2D(self, pool_size, **kwargs):
def MaxPooling2D(self, pool_size, **kwargs): # pylint: disable=invalid-name
"""Builds a MaxPooling2D layer with default padding as 'SAME'.
This is specified by the default resnet arg_scope in slim.
......@@ -237,7 +238,7 @@ class _LayersOverride(object):
# Add alias as Keras also has it.
MaxPool2D = MaxPooling2D # pylint: disable=invalid-name
def ZeroPadding2D(self, padding, **kwargs): # pylint: disable=unused-argument
def ZeroPadding2D(self, padding, **kwargs): # pylint: disable=unused-argument,invalid-name
"""Replaces explicit padding in the Keras application with a no-op.
Args:
......@@ -395,3 +396,146 @@ def resnet_v1_152(batchnorm_training,
return tf.keras.applications.resnet.ResNet152(
layers=layers_override, **kwargs)
# pylint: enable=invalid-name
# The following codes are based on the existing keras ResNet model pattern:
# google3/third_party/tensorflow/python/keras/applications/resnet.py
def block_basic(x,
filters,
kernel_size=3,
stride=1,
conv_shortcut=False,
name=None):
"""A residual block for ResNet18/34.
Arguments:
x: input tensor.
filters: integer, filters of the bottleneck layer.
kernel_size: default 3, kernel size of the bottleneck layer.
stride: default 1, stride of the first layer.
conv_shortcut: default False, use convolution shortcut if True, otherwise
identity shortcut.
name: string, block label.
Returns:
Output tensor for the residual block.
"""
layers = tf.keras.layers
bn_axis = 3 if tf.keras.backend.image_data_format() == 'channels_last' else 1
preact = layers.BatchNormalization(
axis=bn_axis, epsilon=1.001e-5, name=name + '_preact_bn')(
x)
preact = layers.Activation('relu', name=name + '_preact_relu')(preact)
if conv_shortcut:
shortcut = layers.Conv2D(
filters, 1, strides=1, name=name + '_0_conv')(
preact)
else:
shortcut = layers.MaxPooling2D(1, strides=stride)(x) if stride > 1 else x
x = layers.ZeroPadding2D(
padding=((1, 1), (1, 1)), name=name + '_1_pad')(
preact)
x = layers.Conv2D(
filters, kernel_size, strides=1, use_bias=False, name=name + '_1_conv')(
x)
x = layers.BatchNormalization(
axis=bn_axis, epsilon=1.001e-5, name=name + '_1_bn')(
x)
x = layers.Activation('relu', name=name + '_1_relu')(x)
x = layers.ZeroPadding2D(padding=((1, 1), (1, 1)), name=name + '_2_pad')(x)
x = layers.Conv2D(
filters,
kernel_size,
strides=stride,
use_bias=False,
name=name + '_2_conv')(
x)
x = layers.BatchNormalization(
axis=bn_axis, epsilon=1.001e-5, name=name + '_2_bn')(
x)
x = layers.Activation('relu', name=name + '_2_relu')(x)
x = layers.Add(name=name + '_out')([shortcut, x])
return x
def stack_basic(x, filters, blocks, stride1=2, name=None):
"""A set of stacked residual blocks for ResNet18/34.
Arguments:
x: input tensor.
filters: integer, filters of the bottleneck layer in a block.
blocks: integer, blocks in the stacked blocks.
stride1: default 2, stride of the first layer in the first block.
name: string, stack label.
Returns:
Output tensor for the stacked blocks.
"""
x = block_basic(x, filters, conv_shortcut=True, name=name + '_block1')
for i in range(2, blocks):
x = block_basic(x, filters, name=name + '_block' + str(i))
x = block_basic(
x, filters, stride=stride1, name=name + '_block' + str(blocks))
return x
def resnet_v1_18(include_top=True,
weights='imagenet',
input_tensor=None,
input_shape=None,
pooling=None,
classes=1000,
classifier_activation='softmax'):
"""Instantiates the ResNet18 architecture."""
def stack_fn(x):
x = stack_basic(x, 64, 2, stride1=1, name='conv2')
x = stack_basic(x, 128, 2, name='conv3')
x = stack_basic(x, 256, 2, name='conv4')
return stack_basic(x, 512, 2, name='conv5')
return resnet.ResNet(
stack_fn,
True,
True,
'resnet18',
include_top,
weights,
input_tensor,
input_shape,
pooling,
classes,
classifier_activation=classifier_activation)
def resnet_v1_34(include_top=True,
weights='imagenet',
input_tensor=None,
input_shape=None,
pooling=None,
classes=1000,
classifier_activation='softmax'):
"""Instantiates the ResNet34 architecture."""
def stack_fn(x):
x = stack_basic(x, 64, 3, stride1=1, name='conv2')
x = stack_basic(x, 128, 4, name='conv3')
x = stack_basic(x, 256, 6, name='conv4')
return stack_basic(x, 512, 3, name='conv5')
return resnet.ResNet(
stack_fn,
True,
True,
'resnet34',
include_top,
weights,
input_tensor,
input_shape,
pooling,
classes,
classifier_activation=classifier_activation)
......@@ -20,12 +20,13 @@ object detection. To verify the consistency of the two models, we compare:
2. Number of global variables.
"""
import unittest
from absl.testing import parameterized
import numpy as np
from six.moves import zip
import tensorflow.compat.v1 as tf
from google.protobuf import text_format
from object_detection.builders import hyperparams_builder
from object_detection.models.keras_models import resnet_v1
from object_detection.protos import hyperparams_pb2
......@@ -180,5 +181,46 @@ class ResnetV1Test(test_case.TestCase):
self.assertEqual(len(variables), var_num)
class ResnetShapeTest(test_case.TestCase, parameterized.TestCase):
@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
@parameterized.parameters(
{
'resnet_type':
'resnet_v1_34',
'output_layer_names': [
'conv2_block3_out', 'conv3_block4_out', 'conv4_block6_out',
'conv5_block3_out'
]
}, {
'resnet_type':
'resnet_v1_18',
'output_layer_names': [
'conv2_block2_out', 'conv3_block2_out', 'conv4_block2_out',
'conv5_block2_out'
]
})
def test_output_shapes(self, resnet_type, output_layer_names):
if resnet_type == 'resnet_v1_34':
model = resnet_v1.resnet_v1_34(weights=None)
else:
model = resnet_v1.resnet_v1_18(weights=None)
outputs = [
model.get_layer(output_layer_name).output
for output_layer_name in output_layer_names
]
resnet_model = tf.keras.models.Model(inputs=model.input, outputs=outputs)
outputs = resnet_model(np.zeros((2, 64, 64, 3), dtype=np.float32))
# Check the shape of 'conv2_block3_out':
self.assertEqual(outputs[0].shape, [2, 16, 16, 64])
# Check the shape of 'conv3_block4_out':
self.assertEqual(outputs[1].shape, [2, 8, 8, 128])
# Check the shape of 'conv4_block6_out':
self.assertEqual(outputs[2].shape, [2, 4, 4, 256])
# Check the shape of 'conv5_block3_out':
self.assertEqual(outputs[3].shape, [2, 2, 2, 512])
if __name__ == '__main__':
tf.test.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment