Commit b92025a9 authored by anivegesana's avatar anivegesana
Browse files

Merge branch 'master' of https://github.com/tensorflow/models into detection_generator_pr_2

parents 1b425791 37536370
......@@ -261,12 +261,15 @@ class MaskRCNNTask(base_task.Task):
metrics.append(tf.keras.metrics.Mean(name, dtype=tf.float32))
else:
if self._task_config.annotation_file:
if (not self._task_config.model.include_mask
) or self._task_config.annotation_file:
self.coco_metric = coco_evaluator.COCOEvaluator(
annotation_file=self._task_config.annotation_file,
include_mask=self._task_config.model.include_mask,
per_category_metrics=self._task_config.per_category_metrics)
else:
# Builds COCO-style annotation file if include_mask is True, and
# annotation_file isn't provided.
annotation_path = os.path.join(self._logging_dir, 'annotation.json')
if tf.io.gfile.exists(annotation_path):
logging.info(
......
......@@ -21,9 +21,11 @@ import tensorflow as tf
class SpatialPyramidPooling(tf.keras.layers.Layer):
"""Implements the Atrous Spatial Pyramid Pooling.
Reference:
References:
[Rethinking Atrous Convolution for Semantic Image Segmentation](
https://arxiv.org/pdf/1706.05587.pdf)
[Encoder-Decoder with Atrous Separable Convolution for Semantic Image
Segmentation](https://arxiv.org/pdf/1802.02611.pdf)
"""
def __init__(
......@@ -39,6 +41,7 @@ class SpatialPyramidPooling(tf.keras.layers.Layer):
kernel_initializer='glorot_uniform',
kernel_regularizer=None,
interpolation='bilinear',
use_depthwise_convolution=False,
**kwargs):
"""Initializes `SpatialPyramidPooling`.
......@@ -60,6 +63,10 @@ class SpatialPyramidPooling(tf.keras.layers.Layer):
kernel_regularizer: Kernel regularizer for conv layers. Defaults to None.
interpolation: The interpolation method for upsampling. Defaults to
`bilinear`.
use_depthwise_convolution: Allows spatial pooling to be separable
depthwise convolusions. [Encoder-Decoder with Atrous Separable
Convolution for Semantic Image Segmentation](
https://arxiv.org/pdf/1802.02611.pdf)
**kwargs: Other keyword arguments for the layer.
"""
super(SpatialPyramidPooling, self).__init__(**kwargs)
......@@ -76,6 +83,7 @@ class SpatialPyramidPooling(tf.keras.layers.Layer):
self.interpolation = interpolation
self.input_spec = tf.keras.layers.InputSpec(ndim=4)
self.pool_kernel_size = pool_kernel_size
self.use_depthwise_convolution = use_depthwise_convolution
def build(self, input_shape):
height = input_shape[1]
......@@ -109,9 +117,20 @@ class SpatialPyramidPooling(tf.keras.layers.Layer):
self.aspp_layers.append(conv_sequential)
for dilation_rate in self.dilation_rates:
conv_sequential = tf.keras.Sequential([
leading_layers = []
kernel_size = (3, 3)
if self.use_depthwise_convolution:
leading_layers += [
tf.keras.layers.DepthwiseConv2D(
depth_multiplier=1, kernel_size=kernel_size,
padding='same', depthwise_regularizer=self.kernel_regularizer,
depthwise_initializer=self.kernel_initializer,
dilation_rate=dilation_rate, use_bias=False)
]
kernel_size = (1, 1)
conv_sequential = tf.keras.Sequential(leading_layers + [
tf.keras.layers.Conv2D(
filters=self.output_channels, kernel_size=(3, 3),
filters=self.output_channels, kernel_size=kernel_size,
padding='same', kernel_regularizer=self.kernel_regularizer,
kernel_initializer=self.kernel_initializer,
dilation_rate=dilation_rate, use_bias=False),
......
......@@ -961,7 +961,8 @@ class CenterNetCenterHeatmapTargetAssigner(object):
width,
gt_boxes_list,
gt_classes_list,
gt_weights_list=None):
gt_weights_list=None,
maximum_normalized_coordinate=1.1):
"""Computes the object center heatmap target.
Args:
......@@ -977,6 +978,9 @@ class CenterNetCenterHeatmapTargetAssigner(object):
in the gt_boxes_list.
gt_weights_list: A list of float tensors with shape [num_boxes]
representing the weight of each groundtruth detection box.
maximum_normalized_coordinate: Maximum coordinate value to be considered
as normalized, default to 1.1. This is used to check bounds during
converting normalized coordinates to absolute coordinates.
Returns:
heatmap: A Tensor of size [batch_size, output_height, output_width,
......@@ -1002,7 +1006,8 @@ class CenterNetCenterHeatmapTargetAssigner(object):
boxes = box_list_ops.to_absolute_coordinates(
boxes,
tf.maximum(height // self._stride, 1),
tf.maximum(width // self._stride, 1))
tf.maximum(width // self._stride, 1),
maximum_normalized_coordinate=maximum_normalized_coordinate)
# Get the box center coordinates. Each returned tensors have the shape of
# [num_instances]
(y_center, x_center, boxes_height,
......
......@@ -2714,7 +2714,8 @@ class CenterNetMetaArch(model.DetectionModel):
return target_assigners
def _compute_object_center_loss(self, input_height, input_width,
object_center_predictions, per_pixel_weights):
object_center_predictions, per_pixel_weights,
maximum_normalized_coordinate=1.1):
"""Computes the object center loss.
Args:
......@@ -2726,6 +2727,9 @@ class CenterNetMetaArch(model.DetectionModel):
per_pixel_weights: A float tensor of shape [batch_size,
out_height * out_width, 1] with 1s in locations where the spatial
coordinates fall within the height and width in true_image_shapes.
maximum_normalized_coordinate: Maximum coordinate value to be considered
as normalized, default to 1.1. This is used to check bounds during
converting normalized coordinates to absolute coordinates.
Returns:
A float scalar tensor representing the object center loss per instance.
......@@ -2752,7 +2756,8 @@ class CenterNetMetaArch(model.DetectionModel):
width=input_width,
gt_classes_list=gt_classes_list,
gt_keypoints_list=gt_keypoints_list,
gt_weights_list=gt_weights_list)
gt_weights_list=gt_weights_list,
maximum_normalized_coordinate=maximum_normalized_coordinate)
else:
gt_boxes_list = self.groundtruth_lists(fields.BoxListFields.boxes)
heatmap_targets = assigner.assign_center_targets_from_boxes(
......@@ -2760,7 +2765,8 @@ class CenterNetMetaArch(model.DetectionModel):
width=input_width,
gt_boxes_list=gt_boxes_list,
gt_classes_list=gt_classes_list,
gt_weights_list=gt_weights_list)
gt_weights_list=gt_weights_list,
maximum_normalized_coordinate=maximum_normalized_coordinate)
flattened_heatmap_targets = _flatten_spatial_dimensions(heatmap_targets)
num_boxes = _to_float32(get_num_instances_from_weights(gt_weights_list))
......@@ -3577,7 +3583,9 @@ class CenterNetMetaArch(model.DetectionModel):
self._batched_prediction_tensor_names = predictions.keys()
return predictions
def loss(self, prediction_dict, true_image_shapes, scope=None):
def loss(
self, prediction_dict, true_image_shapes, scope=None,
maximum_normalized_coordinate=1.1):
"""Computes scalar loss tensors with respect to provided groundtruth.
This function implements the various CenterNet losses.
......@@ -3589,6 +3597,9 @@ class CenterNetMetaArch(model.DetectionModel):
the form [height, width, channels] indicating the shapes of true images
in the resized images, as resized images can be padded with zeros.
scope: Optional scope name.
maximum_normalized_coordinate: Maximum coordinate value to be considered
as normalized, default to 1.1. This is used to check bounds during
converting normalized coordinates to absolute coordinates.
Returns:
A dictionary mapping the keys [
......@@ -3616,7 +3627,7 @@ class CenterNetMetaArch(model.DetectionModel):
# TODO(vighneshb) Explore whether using floor here is safe.
output_true_image_shapes = tf.ceil(
tf.to_float(true_image_shapes) / self._stride)
tf.cast(true_image_shapes, tf.float32) / self._stride)
valid_anchor_weights = get_valid_anchor_weights_in_flattened_image(
output_true_image_shapes, output_height, output_width)
valid_anchor_weights = tf.expand_dims(valid_anchor_weights, 2)
......@@ -3625,7 +3636,8 @@ class CenterNetMetaArch(model.DetectionModel):
object_center_predictions=prediction_dict[OBJECT_CENTER],
input_height=input_height,
input_width=input_width,
per_pixel_weights=valid_anchor_weights)
per_pixel_weights=valid_anchor_weights,
maximum_normalized_coordinate=maximum_normalized_coordinate)
losses = {
OBJECT_CENTER:
self._center_params.object_center_loss_weight * object_center_loss
......@@ -3742,10 +3754,20 @@ class CenterNetMetaArch(model.DetectionModel):
"""
object_center_prob = tf.nn.sigmoid(prediction_dict[OBJECT_CENTER][-1])
# Mask object centers by true_image_shape. [batch, h, w, 1]
object_center_mask = mask_from_true_image_shape(
_get_shape(object_center_prob, 4), true_image_shapes)
object_center_prob *= object_center_mask
if true_image_shapes is None:
# If true_image_shapes is not provided, we assume the whole image is valid
# and infer the true_image_shapes from the object_center_prob shape.
batch_size, strided_height, strided_width, _ = _get_shape(
object_center_prob, 4)
true_image_shapes = tf.stack(
[strided_height * self._stride, strided_width * self._stride,
tf.constant(len(self._feature_extractor._channel_means))]) # pylint: disable=protected-access
true_image_shapes = tf.stack([true_image_shapes] * batch_size, axis=0)
else:
# Mask object centers by true_image_shape. [batch, h, w, 1]
object_center_mask = mask_from_true_image_shape(
_get_shape(object_center_prob, 4), true_image_shapes)
object_center_prob *= object_center_mask
# Get x, y and channel indices corresponding to the top indices in the class
# center predictions.
......@@ -3755,8 +3777,8 @@ class CenterNetMetaArch(model.DetectionModel):
k=self._center_params.max_box_predictions))
multiclass_scores = tf.gather_nd(
object_center_prob, tf.stack([y_indices, x_indices], -1), batch_dims=1)
num_detections = tf.reduce_sum(tf.to_int32(detection_scores > 0), axis=1)
num_detections = tf.reduce_sum(
tf.cast(detection_scores > 0, tf.int32), axis=1)
postprocess_dict = {
fields.DetectionResultFields.detection_scores: detection_scores,
fields.DetectionResultFields.detection_multiclass_scores:
......
......@@ -2056,10 +2056,11 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
cnma.TEMPORAL_OFFSET)])
@parameterized.parameters(
{'target_class_id': 1},
{'target_class_id': 2},
{'target_class_id': 1, 'with_true_image_shape': True},
{'target_class_id': 2, 'with_true_image_shape': True},
{'target_class_id': 1, 'with_true_image_shape': False},
)
def test_postprocess(self, target_class_id):
def test_postprocess(self, target_class_id, with_true_image_shape):
"""Test the postprocess function."""
model = build_center_net_meta_arch()
max_detection = model._center_params.max_box_predictions
......@@ -2140,8 +2141,11 @@ class CenterNetMetaArchTest(test_case.TestCase, parameterized.TestCase):
}
def graph_fn():
detections = model.postprocess(prediction_dict,
tf.constant([[128, 128, 3]]))
if with_true_image_shape:
detections = model.postprocess(prediction_dict,
tf.constant([[128, 128, 3]]))
else:
detections = model.postprocess(prediction_dict, None)
return detections
detections = self.execute_cpu(graph_fn, [])
......
......@@ -21,11 +21,7 @@ REQUIRED_PACKAGES = [
'lvis',
'scipy',
'pandas',
# tensorflow 2.5.0 requires grpcio~=1.34.0.
# tf-models-official (which requires google-could-bigquery) ends
# up installing the latest grpcio which causes problems later.
'google-cloud-bigquery==1.21.0',
'tf-models-official',
'tf-models-official>=2.5.1',
]
setup(
......
......@@ -948,7 +948,8 @@ def merge_boxes_with_multiple_labels(boxes,
def nearest_neighbor_upsampling(input_tensor, scale=None, height_scale=None,
width_scale=None):
width_scale=None,
name='nearest_neighbor_upsampling'):
"""Nearest neighbor upsampling implementation.
Nearest neighbor upsampling function that maps input tensor with shape
......@@ -965,6 +966,7 @@ def nearest_neighbor_upsampling(input_tensor, scale=None, height_scale=None,
option when provided overrides `scale` option.
width_scale: An integer multiple to scale the width of input image. This
option when provided overrides `scale` option.
name: A name for the operation (optional).
Returns:
data_up: A float32 tensor of size
[batch, height_in*scale, width_in*scale, channels].
......@@ -976,13 +978,13 @@ def nearest_neighbor_upsampling(input_tensor, scale=None, height_scale=None,
if not scale and (height_scale is None or width_scale is None):
raise ValueError('Provide either `scale` or `height_scale` and'
' `width_scale`.')
with tf.name_scope('nearest_neighbor_upsampling'):
with tf.name_scope(name):
h_scale = scale if height_scale is None else height_scale
w_scale = scale if width_scale is None else width_scale
(batch_size, height, width,
channels) = shape_utils.combined_static_and_dynamic_shape(input_tensor)
output_tensor = tf.stack([input_tensor] * w_scale, axis=3)
output_tensor = tf.stack([output_tensor] * h_scale, axis=2)
output_tensor = tf.stack([input_tensor] * w_scale, axis=3, name='w_stack')
output_tensor = tf.stack([output_tensor] * h_scale, axis=2, name='h_stack')
return tf.reshape(output_tensor,
[batch_size, height * h_scale, width * w_scale, channels])
......
......@@ -197,9 +197,10 @@
},
"outputs": [],
"source": [
"# setup path\n",
"# setup path and install tf-slim\n",
"import sys\n",
"sys.path.append('/content/models/research/slim')"
"!pip install tf_slim",
]
},
{
......@@ -228,8 +229,10 @@
"outputs": [],
"source": [
"import tensorflow.compat.v1 as tf\n",
"import tf_slim as slim\n",
"from nets.mobilenet import mobilenet_v2\n",
"\n",
"tf.compat.v1.disable_eager_execution()\n"
"tf.reset_default_graph()\n",
"\n",
"# For simplicity we just decode jpeg inside tensorflow.\n",
......@@ -244,7 +247,7 @@
"images = tf.image.resize_images(images, (224, 224))\n",
"\n",
"# Note: arg_scope is optional for inference.\n",
"with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope(is_training=False)):\n",
"with slim.arg_scope(mobilenet_v2.training_scope(is_training=False)):\n",
" logits, endpoints = mobilenet_v2.mobilenet(images)\n",
" \n",
"# Restore using exponential moving average since it produces (1.5-2%) higher \n",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment