Unverified Commit 5ffcc5b6 authored by Anirudh Vegesana's avatar Anirudh Vegesana Committed by GitHub
Browse files

Merge branch 'purdue-yolo' into detection_generator_pr

parents 0b81a843 76e0c014
...@@ -448,9 +448,8 @@ def create_model_fn(detection_model_fn, configs, hparams=None, use_tpu=False, ...@@ -448,9 +448,8 @@ def create_model_fn(detection_model_fn, configs, hparams=None, use_tpu=False,
tf.keras.backend.set_learning_phase(is_training) tf.keras.backend.set_learning_phase(is_training)
# Set policy for mixed-precision training with Keras-based models. # Set policy for mixed-precision training with Keras-based models.
if use_tpu and train_config.use_bfloat16: if use_tpu and train_config.use_bfloat16:
from tensorflow.python.keras.engine import base_layer_utils # pylint: disable=g-import-not-at-top
# Enable v2 behavior, as `mixed_bfloat16` is only supported in TF 2.0. # Enable v2 behavior, as `mixed_bfloat16` is only supported in TF 2.0.
base_layer_utils.enable_v2_dtype_behavior() tf.keras.layers.enable_v2_dtype_behavior()
tf2.keras.mixed_precision.set_global_policy('mixed_bfloat16') tf2.keras.mixed_precision.set_global_policy('mixed_bfloat16')
detection_model = detection_model_fn( detection_model = detection_model_fn(
is_training=is_training, add_summaries=(not use_tpu)) is_training=is_training, add_summaries=(not use_tpu))
......
...@@ -125,14 +125,8 @@ class CenterNetMobileNetV2FPNFeatureExtractor( ...@@ -125,14 +125,8 @@ class CenterNetMobileNetV2FPNFeatureExtractor(
self._base_model.load_weights(path) self._base_model.load_weights(path)
@property @property
def supported_sub_model_types(self): def classification_backbone(self):
return ['classification'] return self._base_model
def get_sub_model(self, sub_model_type):
if sub_model_type == 'classification':
return self._base_model
else:
ValueError('Sub model type "{}" not supported.'.format(sub_model_type))
def call(self, inputs): def call(self, inputs):
return [self._feature_extractor_model(inputs)] return [self._feature_extractor_model(inputs)]
......
"""Layer for Non-Local operation.
This is a building block which mimics self-attention in a feature map.
For more information, please see https://arxiv.org/pdf/1711.07971.pdf
"""
import tensorflow as tf
from object_detection.utils import shape_utils
class NonLocalBlock(tf.keras.layers.Layer):
"""A Non-local block."""
def __init__(self, bottleneck_channels, pairwise_fn='dot', pool_size=None,
add_coord_conv=False):
"""Constructor.
Args:
bottleneck_channels: The number of channels used to do pairwise
comparisons at each feature location.
pairwise_fn: The pairwise comparison function. Currently supports
'dot' and 'embedded_softmax'.
pool_size: The downsample size (achieved with max pool) used prior to
doing pairwise comparisons. This does not affect the shape of the output
tensor, but reduces computation. For a pool_size of 2, computation is
dropped by a factor of 4. If None, no downsampling is performed.
add_coord_conv: Concatenates a 2-channel feature map with normalized
coordinates (in range [-1, 1]) to the input, prior to the
non-local block.
Raises:
RuntimeError: If self._pairwise_fn is not one of "dot" or
"embedded_softmax".
"""
super().__init__()
self._bottleneck_channels = bottleneck_channels
self._add_coord_conv = add_coord_conv
self._pool_size = pool_size
if pairwise_fn not in ('dot', 'embedded_softmax'):
raise RuntimeError('pairwise_fn must be one of "dot" or '
'"embedded_softmax"')
self._pairwise_fn = pairwise_fn
def build(self, input_shape):
channels = input_shape[-1]
self.queries_conv = tf.keras.layers.Conv2D(
filters=self._bottleneck_channels, kernel_size=1, use_bias=False,
strides=1, padding='same')
self.keys_conv = tf.keras.layers.Conv2D(
filters=self._bottleneck_channels, kernel_size=1, use_bias=False,
strides=1, padding='same')
self.values_conv = tf.keras.layers.Conv2D(
filters=self._bottleneck_channels, kernel_size=1, use_bias=False,
strides=1, padding='same')
self.expand_conv = tf.keras.layers.Conv2D(
filters=channels, kernel_size=1, use_bias=False, strides=1,
padding='same')
self.batchnorm = tf.keras.layers.BatchNormalization(
name='batchnorm', epsilon=1e-5, momentum=0.1, fused=True,
beta_initializer='zeros', gamma_initializer='zeros')
if self._pool_size:
self.maxpool_keys = tf.keras.layers.MaxPool2D(
pool_size=(self._pool_size, self._pool_size))
self.maxpool_values = tf.keras.layers.MaxPool2D(
pool_size=(self._pool_size, self._pool_size))
def call(self, inputs):
"""Applies a non-local block to an input feature map.
Args:
inputs: A [batch, height, width, channels] float32 input tensor.
Returns:
An output tensor of the same shape as the input.
"""
batch, height, width, _ = shape_utils.combined_static_and_dynamic_shape(
inputs)
x = inputs
if self._add_coord_conv:
coords_x, coords_y = tf.meshgrid(tf.linspace(-1., 1., height),
tf.linspace(-1., 1., width))
coords = tf.stack([coords_y, coords_x], axis=-1)
coords = tf.tile(coords[tf.newaxis, :, :, :],
multiples=[batch, 1, 1, 1])
x = tf.concat([x, coords], axis=-1)
# shape: [B, H, W, bottleneck_channels].
queries = self.queries_conv(x)
# shape: [B, H, W, bottleneck_channels].
keys = self.keys_conv(x)
# shape: [B, H, W, bottleneck_channels].
values = self.values_conv(x)
keys_height, keys_width = height, width
if self._pool_size:
keys_height = height // self._pool_size
keys_width = width // self._pool_size
# shape: [B, H', W', bottleneck_channels].
keys = self.maxpool_keys(keys)
values = self.maxpool_values(values)
# Produce pairwise scores.
queries = tf.reshape(
queries, [batch, height * width, self._bottleneck_channels])
keys = tf.reshape(
keys, [batch, keys_height * keys_width, self._bottleneck_channels])
# shape = [B, H*W, H'*W'].
scores = tf.linalg.matmul(queries, keys, transpose_b=True)
if self._pairwise_fn == 'dot':
normalization = tf.cast(height * width, dtype=tf.float32)
scores = (1./normalization) * scores
elif self._pairwise_fn == 'embedded_softmax':
scores = tf.nn.softmax(scores, axis=-1)
# Multiply scores with values.
# shape = [B, H'*W', bottleneck_channels].
values = tf.reshape(
values, [batch, keys_height * keys_width, self._bottleneck_channels])
# shape = [B, H, W, bottleneck_channels].
weighted_values = tf.linalg.matmul(scores, values)
weighted_values = tf.reshape(
weighted_values, [batch, height, width, self._bottleneck_channels])
# Construct residual.
expand = self.batchnorm(self.expand_conv(weighted_values))
output = expand + inputs
return output
"""Tests for google3.third_party.tensorflow_models.object_detection.models.keras_models.nonlocal_block."""
import unittest
from absl.testing import parameterized
import tensorflow as tf
from object_detection.models.keras_models import nonlocal_block
from object_detection.utils import test_case
from object_detection.utils import tf_version
@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
class NonlocalTest(test_case.TestCase, parameterized.TestCase):
@parameterized.parameters([{'pool_size': None,
'add_coord_conv': False},
{'pool_size': None,
'add_coord_conv': True},
{'pool_size': 2,
'add_coord_conv': False},
{'pool_size': 2,
'add_coord_conv': True}])
def test_run_nonlocal_block(self, pool_size, add_coord_conv):
nonlocal_op = nonlocal_block.NonLocalBlock(
8, pool_size=pool_size, add_coord_conv=add_coord_conv)
def graph_fn():
inputs = tf.zeros((4, 16, 16, 32), dtype=tf.float32)
outputs = nonlocal_op(inputs)
return outputs
outputs = self.execute(graph_fn, [])
self.assertAllEqual([4, 16, 16, 32], outputs.shape)
if __name__ == '__main__':
tf.test.main()
...@@ -19,7 +19,7 @@ from __future__ import absolute_import ...@@ -19,7 +19,7 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from keras.applications import resnet from tensorflow.python.keras.applications import resnet
import tensorflow.compat.v1 as tf import tensorflow.compat.v1 as tf
......
...@@ -21,7 +21,11 @@ REQUIRED_PACKAGES = [ ...@@ -21,7 +21,11 @@ REQUIRED_PACKAGES = [
'lvis', 'lvis',
'scipy', 'scipy',
'pandas', 'pandas',
'tf-models-official' # tensorflow 2.5.0 requires grpcio~=1.34.0.
# tf-models-official (which requires google-could-bigquery) ends
# up installing the latest grpcio which causes problems later.
'google-cloud-bigquery==1.21.0',
'tf-models-official',
] ]
setup( setup(
......
...@@ -446,6 +446,10 @@ message CenterNet { ...@@ -446,6 +446,10 @@ message CenterNet {
// The mode for jitterting box ROIs. See RandomJitterBoxes in // The mode for jitterting box ROIs. See RandomJitterBoxes in
// preprocessor.proto for more details // preprocessor.proto for more details
optional RandomJitterBoxes.JitterMode jitter_mode = 15 [default=DEFAULT]; optional RandomJitterBoxes.JitterMode jitter_mode = 15 [default=DEFAULT];
// Weight for the box consistency loss as described in the BoxInst paper
// https://arxiv.org/abs/2012.02310
optional float box_consistency_loss_weight = 16 [default=0.0];
} }
optional DeepMACMaskEstimation deepmac_mask_estimation = 14; optional DeepMACMaskEstimation deepmac_mask_estimation = 14;
......
...@@ -639,6 +639,12 @@ def _maybe_update_config_with_key_value(configs, key, value): ...@@ -639,6 +639,12 @@ def _maybe_update_config_with_key_value(configs, key, value):
_update_rescore_instances(configs["model"], value) _update_rescore_instances(configs["model"], value)
elif field_name == "unmatched_keypoint_score": elif field_name == "unmatched_keypoint_score":
_update_unmatched_keypoint_score(configs["model"], value) _update_unmatched_keypoint_score(configs["model"], value)
elif field_name == "score_distance_multiplier":
_update_score_distance_multiplier(configs["model"], value)
elif field_name == "std_dev_multiplier":
_update_std_dev_multiplier(configs["model"], value)
elif field_name == "rescoring_threshold":
_update_rescoring_threshold(configs["model"], value)
else: else:
return False return False
return True return True
...@@ -1135,10 +1141,12 @@ def _update_candidate_search_scale(model_config, search_scale): ...@@ -1135,10 +1141,12 @@ def _update_candidate_search_scale(model_config, search_scale):
def _update_candidate_ranking_mode(model_config, mode): def _update_candidate_ranking_mode(model_config, mode):
"""Updates how keypoints are snapped to candidates in CenterNet.""" """Updates how keypoints are snapped to candidates in CenterNet."""
if mode not in ("min_distance", "score_distance_ratio"): if mode not in ("min_distance", "score_distance_ratio",
"score_scaled_distance_ratio", "gaussian_weighted"):
raise ValueError("Attempting to set the keypoint candidate ranking mode " raise ValueError("Attempting to set the keypoint candidate ranking mode "
"to {}, but the only options are 'min_distance' and " "to {}, but the only options are 'min_distance', "
"'score_distance_ratio'.".format(mode)) "'score_distance_ratio', 'score_scaled_distance_ratio', "
"'gaussian_weighted'.".format(mode))
meta_architecture = model_config.WhichOneof("model") meta_architecture = model_config.WhichOneof("model")
if meta_architecture == "center_net": if meta_architecture == "center_net":
if len(model_config.center_net.keypoint_estimation_task) == 1: if len(model_config.center_net.keypoint_estimation_task) == 1:
...@@ -1214,3 +1222,50 @@ def _update_unmatched_keypoint_score(model_config, score): ...@@ -1214,3 +1222,50 @@ def _update_unmatched_keypoint_score(model_config, score):
"unmatched_keypoint_score since there are multiple " "unmatched_keypoint_score since there are multiple "
"keypoint estimation tasks") "keypoint estimation tasks")
def _update_score_distance_multiplier(model_config, score_distance_multiplier):
"""Updates the keypoint candidate selection metric. See CenterNet proto."""
meta_architecture = model_config.WhichOneof("model")
if meta_architecture == "center_net":
if len(model_config.center_net.keypoint_estimation_task) == 1:
kpt_estimation_task = model_config.center_net.keypoint_estimation_task[0]
kpt_estimation_task.score_distance_multiplier = score_distance_multiplier
else:
tf.logging.warning("Ignoring config override key for "
"score_distance_multiplier since there are multiple "
"keypoint estimation tasks")
else:
raise ValueError(
"Unsupported meta_architecture type: %s" % meta_architecture)
def _update_std_dev_multiplier(model_config, std_dev_multiplier):
"""Updates the keypoint candidate selection metric. See CenterNet proto."""
meta_architecture = model_config.WhichOneof("model")
if meta_architecture == "center_net":
if len(model_config.center_net.keypoint_estimation_task) == 1:
kpt_estimation_task = model_config.center_net.keypoint_estimation_task[0]
kpt_estimation_task.std_dev_multiplier = std_dev_multiplier
else:
tf.logging.warning("Ignoring config override key for "
"std_dev_multiplier since there are multiple "
"keypoint estimation tasks")
else:
raise ValueError(
"Unsupported meta_architecture type: %s" % meta_architecture)
def _update_rescoring_threshold(model_config, rescoring_threshold):
"""Updates the keypoint candidate selection metric. See CenterNet proto."""
meta_architecture = model_config.WhichOneof("model")
if meta_architecture == "center_net":
if len(model_config.center_net.keypoint_estimation_task) == 1:
kpt_estimation_task = model_config.center_net.keypoint_estimation_task[0]
kpt_estimation_task.rescoring_threshold = rescoring_threshold
else:
tf.logging.warning("Ignoring config override key for "
"rescoring_threshold since there are multiple "
"keypoint estimation tasks")
else:
raise ValueError(
"Unsupported meta_architecture type: %s" % meta_architecture)
...@@ -70,7 +70,7 @@ def iou(boxes1, boxes2): ...@@ -70,7 +70,7 @@ def iou(boxes1, boxes2):
Args: Args:
boxes1: a numpy array with shape [N, 4] holding N boxes. boxes1: a numpy array with shape [N, 4] holding N boxes.
boxes2: a numpy array with shape [M, 4] holding N boxes. boxes2: a numpy array with shape [M, 4] holding M boxes.
Returns: Returns:
a numpy array with shape [N, M] representing pairwise iou scores. a numpy array with shape [N, M] representing pairwise iou scores.
...@@ -92,7 +92,7 @@ def ioa(boxes1, boxes2): ...@@ -92,7 +92,7 @@ def ioa(boxes1, boxes2):
Args: Args:
boxes1: a numpy array with shape [N, 4] holding N boxes. boxes1: a numpy array with shape [N, 4] holding N boxes.
boxes2: a numpy array with shape [M, 4] holding N boxes. boxes2: a numpy array with shape [M, 4] holding M boxes.
Returns: Returns:
a numpy array with shape [N, M] representing pairwise ioa scores. a numpy array with shape [N, M] representing pairwise ioa scores.
......
...@@ -16,6 +16,8 @@ ...@@ -16,6 +16,8 @@
import tensorflow.compat.v1 as tf import tensorflow.compat.v1 as tf
from object_detection.core import box_list
from object_detection.core import box_list_ops
from object_detection.utils import shape_utils from object_detection.utils import shape_utils
...@@ -290,7 +292,8 @@ def get_valid_keypoint_mask_for_class(keypoint_coordinates, ...@@ -290,7 +292,8 @@ def get_valid_keypoint_mask_for_class(keypoint_coordinates,
def blackout_pixel_weights_by_box_regions(height, width, boxes, blackout, def blackout_pixel_weights_by_box_regions(height, width, boxes, blackout,
weights=None): weights=None,
boxes_scale=1.0):
"""Apply weights at pixel locations. """Apply weights at pixel locations.
This function is used to generate the pixel weight mask (usually in the output This function is used to generate the pixel weight mask (usually in the output
...@@ -332,6 +335,10 @@ def blackout_pixel_weights_by_box_regions(height, width, boxes, blackout, ...@@ -332,6 +335,10 @@ def blackout_pixel_weights_by_box_regions(height, width, boxes, blackout,
a value to apply in each box region. Note that if blackout=True for a a value to apply in each box region. Note that if blackout=True for a
given box, the weight will be zero. If None, all weights are assumed to be given box, the weight will be zero. If None, all weights are assumed to be
1. 1.
boxes_scale: The amount to scale the height/width of the boxes before
constructing the blackout regions. This is often useful to guarantee that
the proper weight fully covers the object boxes/masks during supervision,
as shifting might occur during image resizing, network stride, etc.
Returns: Returns:
A float tensor with shape [height, width] where all values within the A float tensor with shape [height, width] where all values within the
...@@ -347,6 +354,10 @@ def blackout_pixel_weights_by_box_regions(height, width, boxes, blackout, ...@@ -347,6 +354,10 @@ def blackout_pixel_weights_by_box_regions(height, width, boxes, blackout,
(y_grid, x_grid) = image_shape_to_grids(height, width) (y_grid, x_grid) = image_shape_to_grids(height, width)
y_grid = tf.expand_dims(y_grid, axis=0) y_grid = tf.expand_dims(y_grid, axis=0)
x_grid = tf.expand_dims(x_grid, axis=0) x_grid = tf.expand_dims(x_grid, axis=0)
boxlist = box_list.BoxList(boxes)
boxlist = box_list_ops.scale_height_width(
boxlist, y_scale=boxes_scale, x_scale=boxes_scale)
boxes = boxlist.get()
y_min = tf.expand_dims(boxes[:, 0:1], axis=-1) y_min = tf.expand_dims(boxes[:, 0:1], axis=-1)
x_min = tf.expand_dims(boxes[:, 1:2], axis=-1) x_min = tf.expand_dims(boxes[:, 1:2], axis=-1)
y_max = tf.expand_dims(boxes[:, 2:3], axis=-1) y_max = tf.expand_dims(boxes[:, 2:3], axis=-1)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment