Unverified Commit 5ffcc5b6 authored by Anirudh Vegesana's avatar Anirudh Vegesana Committed by GitHub
Browse files

Merge branch 'purdue-yolo' into detection_generator_pr

parents 0b81a843 76e0c014
......@@ -448,9 +448,8 @@ def create_model_fn(detection_model_fn, configs, hparams=None, use_tpu=False,
tf.keras.backend.set_learning_phase(is_training)
# Set policy for mixed-precision training with Keras-based models.
if use_tpu and train_config.use_bfloat16:
from tensorflow.python.keras.engine import base_layer_utils # pylint: disable=g-import-not-at-top
# Enable v2 behavior, as `mixed_bfloat16` is only supported in TF 2.0.
base_layer_utils.enable_v2_dtype_behavior()
tf.keras.layers.enable_v2_dtype_behavior()
tf2.keras.mixed_precision.set_global_policy('mixed_bfloat16')
detection_model = detection_model_fn(
is_training=is_training, add_summaries=(not use_tpu))
......
......@@ -125,14 +125,8 @@ class CenterNetMobileNetV2FPNFeatureExtractor(
self._base_model.load_weights(path)
@property
def supported_sub_model_types(self):
return ['classification']
def get_sub_model(self, sub_model_type):
if sub_model_type == 'classification':
return self._base_model
else:
ValueError('Sub model type "{}" not supported.'.format(sub_model_type))
def classification_backbone(self):
return self._base_model
def call(self, inputs):
return [self._feature_extractor_model(inputs)]
......
"""Layer for Non-Local operation.
This is a building block which mimics self-attention in a feature map.
For more information, please see https://arxiv.org/pdf/1711.07971.pdf
"""
import tensorflow as tf
from object_detection.utils import shape_utils
class NonLocalBlock(tf.keras.layers.Layer):
"""A Non-local block."""
def __init__(self, bottleneck_channels, pairwise_fn='dot', pool_size=None,
add_coord_conv=False):
"""Constructor.
Args:
bottleneck_channels: The number of channels used to do pairwise
comparisons at each feature location.
pairwise_fn: The pairwise comparison function. Currently supports
'dot' and 'embedded_softmax'.
pool_size: The downsample size (achieved with max pool) used prior to
doing pairwise comparisons. This does not affect the shape of the output
tensor, but reduces computation. For a pool_size of 2, computation is
dropped by a factor of 4. If None, no downsampling is performed.
add_coord_conv: Concatenates a 2-channel feature map with normalized
coordinates (in range [-1, 1]) to the input, prior to the
non-local block.
Raises:
RuntimeError: If self._pairwise_fn is not one of "dot" or
"embedded_softmax".
"""
super().__init__()
self._bottleneck_channels = bottleneck_channels
self._add_coord_conv = add_coord_conv
self._pool_size = pool_size
if pairwise_fn not in ('dot', 'embedded_softmax'):
raise RuntimeError('pairwise_fn must be one of "dot" or '
'"embedded_softmax"')
self._pairwise_fn = pairwise_fn
def build(self, input_shape):
channels = input_shape[-1]
self.queries_conv = tf.keras.layers.Conv2D(
filters=self._bottleneck_channels, kernel_size=1, use_bias=False,
strides=1, padding='same')
self.keys_conv = tf.keras.layers.Conv2D(
filters=self._bottleneck_channels, kernel_size=1, use_bias=False,
strides=1, padding='same')
self.values_conv = tf.keras.layers.Conv2D(
filters=self._bottleneck_channels, kernel_size=1, use_bias=False,
strides=1, padding='same')
self.expand_conv = tf.keras.layers.Conv2D(
filters=channels, kernel_size=1, use_bias=False, strides=1,
padding='same')
self.batchnorm = tf.keras.layers.BatchNormalization(
name='batchnorm', epsilon=1e-5, momentum=0.1, fused=True,
beta_initializer='zeros', gamma_initializer='zeros')
if self._pool_size:
self.maxpool_keys = tf.keras.layers.MaxPool2D(
pool_size=(self._pool_size, self._pool_size))
self.maxpool_values = tf.keras.layers.MaxPool2D(
pool_size=(self._pool_size, self._pool_size))
def call(self, inputs):
"""Applies a non-local block to an input feature map.
Args:
inputs: A [batch, height, width, channels] float32 input tensor.
Returns:
An output tensor of the same shape as the input.
"""
batch, height, width, _ = shape_utils.combined_static_and_dynamic_shape(
inputs)
x = inputs
if self._add_coord_conv:
coords_x, coords_y = tf.meshgrid(tf.linspace(-1., 1., height),
tf.linspace(-1., 1., width))
coords = tf.stack([coords_y, coords_x], axis=-1)
coords = tf.tile(coords[tf.newaxis, :, :, :],
multiples=[batch, 1, 1, 1])
x = tf.concat([x, coords], axis=-1)
# shape: [B, H, W, bottleneck_channels].
queries = self.queries_conv(x)
# shape: [B, H, W, bottleneck_channels].
keys = self.keys_conv(x)
# shape: [B, H, W, bottleneck_channels].
values = self.values_conv(x)
keys_height, keys_width = height, width
if self._pool_size:
keys_height = height // self._pool_size
keys_width = width // self._pool_size
# shape: [B, H', W', bottleneck_channels].
keys = self.maxpool_keys(keys)
values = self.maxpool_values(values)
# Produce pairwise scores.
queries = tf.reshape(
queries, [batch, height * width, self._bottleneck_channels])
keys = tf.reshape(
keys, [batch, keys_height * keys_width, self._bottleneck_channels])
# shape = [B, H*W, H'*W'].
scores = tf.linalg.matmul(queries, keys, transpose_b=True)
if self._pairwise_fn == 'dot':
normalization = tf.cast(height * width, dtype=tf.float32)
scores = (1./normalization) * scores
elif self._pairwise_fn == 'embedded_softmax':
scores = tf.nn.softmax(scores, axis=-1)
# Multiply scores with values.
# shape = [B, H'*W', bottleneck_channels].
values = tf.reshape(
values, [batch, keys_height * keys_width, self._bottleneck_channels])
# shape = [B, H, W, bottleneck_channels].
weighted_values = tf.linalg.matmul(scores, values)
weighted_values = tf.reshape(
weighted_values, [batch, height, width, self._bottleneck_channels])
# Construct residual.
expand = self.batchnorm(self.expand_conv(weighted_values))
output = expand + inputs
return output
"""Tests for google3.third_party.tensorflow_models.object_detection.models.keras_models.nonlocal_block."""
import unittest
from absl.testing import parameterized
import tensorflow as tf
from object_detection.models.keras_models import nonlocal_block
from object_detection.utils import test_case
from object_detection.utils import tf_version
@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
class NonlocalTest(test_case.TestCase, parameterized.TestCase):
@parameterized.parameters([{'pool_size': None,
'add_coord_conv': False},
{'pool_size': None,
'add_coord_conv': True},
{'pool_size': 2,
'add_coord_conv': False},
{'pool_size': 2,
'add_coord_conv': True}])
def test_run_nonlocal_block(self, pool_size, add_coord_conv):
nonlocal_op = nonlocal_block.NonLocalBlock(
8, pool_size=pool_size, add_coord_conv=add_coord_conv)
def graph_fn():
inputs = tf.zeros((4, 16, 16, 32), dtype=tf.float32)
outputs = nonlocal_op(inputs)
return outputs
outputs = self.execute(graph_fn, [])
self.assertAllEqual([4, 16, 16, 32], outputs.shape)
if __name__ == '__main__':
tf.test.main()
......@@ -19,7 +19,7 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from keras.applications import resnet
from tensorflow.python.keras.applications import resnet
import tensorflow.compat.v1 as tf
......
......@@ -21,7 +21,11 @@ REQUIRED_PACKAGES = [
'lvis',
'scipy',
'pandas',
'tf-models-official'
# tensorflow 2.5.0 requires grpcio~=1.34.0.
# tf-models-official (which requires google-could-bigquery) ends
# up installing the latest grpcio which causes problems later.
'google-cloud-bigquery==1.21.0',
'tf-models-official',
]
setup(
......
......@@ -446,6 +446,10 @@ message CenterNet {
// The mode for jitterting box ROIs. See RandomJitterBoxes in
// preprocessor.proto for more details
optional RandomJitterBoxes.JitterMode jitter_mode = 15 [default=DEFAULT];
// Weight for the box consistency loss as described in the BoxInst paper
// https://arxiv.org/abs/2012.02310
optional float box_consistency_loss_weight = 16 [default=0.0];
}
optional DeepMACMaskEstimation deepmac_mask_estimation = 14;
......
......@@ -639,6 +639,12 @@ def _maybe_update_config_with_key_value(configs, key, value):
_update_rescore_instances(configs["model"], value)
elif field_name == "unmatched_keypoint_score":
_update_unmatched_keypoint_score(configs["model"], value)
elif field_name == "score_distance_multiplier":
_update_score_distance_multiplier(configs["model"], value)
elif field_name == "std_dev_multiplier":
_update_std_dev_multiplier(configs["model"], value)
elif field_name == "rescoring_threshold":
_update_rescoring_threshold(configs["model"], value)
else:
return False
return True
......@@ -1135,10 +1141,12 @@ def _update_candidate_search_scale(model_config, search_scale):
def _update_candidate_ranking_mode(model_config, mode):
"""Updates how keypoints are snapped to candidates in CenterNet."""
if mode not in ("min_distance", "score_distance_ratio"):
if mode not in ("min_distance", "score_distance_ratio",
"score_scaled_distance_ratio", "gaussian_weighted"):
raise ValueError("Attempting to set the keypoint candidate ranking mode "
"to {}, but the only options are 'min_distance' and "
"'score_distance_ratio'.".format(mode))
"to {}, but the only options are 'min_distance', "
"'score_distance_ratio', 'score_scaled_distance_ratio', "
"'gaussian_weighted'.".format(mode))
meta_architecture = model_config.WhichOneof("model")
if meta_architecture == "center_net":
if len(model_config.center_net.keypoint_estimation_task) == 1:
......@@ -1214,3 +1222,50 @@ def _update_unmatched_keypoint_score(model_config, score):
"unmatched_keypoint_score since there are multiple "
"keypoint estimation tasks")
def _update_score_distance_multiplier(model_config, score_distance_multiplier):
"""Updates the keypoint candidate selection metric. See CenterNet proto."""
meta_architecture = model_config.WhichOneof("model")
if meta_architecture == "center_net":
if len(model_config.center_net.keypoint_estimation_task) == 1:
kpt_estimation_task = model_config.center_net.keypoint_estimation_task[0]
kpt_estimation_task.score_distance_multiplier = score_distance_multiplier
else:
tf.logging.warning("Ignoring config override key for "
"score_distance_multiplier since there are multiple "
"keypoint estimation tasks")
else:
raise ValueError(
"Unsupported meta_architecture type: %s" % meta_architecture)
def _update_std_dev_multiplier(model_config, std_dev_multiplier):
"""Updates the keypoint candidate selection metric. See CenterNet proto."""
meta_architecture = model_config.WhichOneof("model")
if meta_architecture == "center_net":
if len(model_config.center_net.keypoint_estimation_task) == 1:
kpt_estimation_task = model_config.center_net.keypoint_estimation_task[0]
kpt_estimation_task.std_dev_multiplier = std_dev_multiplier
else:
tf.logging.warning("Ignoring config override key for "
"std_dev_multiplier since there are multiple "
"keypoint estimation tasks")
else:
raise ValueError(
"Unsupported meta_architecture type: %s" % meta_architecture)
def _update_rescoring_threshold(model_config, rescoring_threshold):
"""Updates the keypoint candidate selection metric. See CenterNet proto."""
meta_architecture = model_config.WhichOneof("model")
if meta_architecture == "center_net":
if len(model_config.center_net.keypoint_estimation_task) == 1:
kpt_estimation_task = model_config.center_net.keypoint_estimation_task[0]
kpt_estimation_task.rescoring_threshold = rescoring_threshold
else:
tf.logging.warning("Ignoring config override key for "
"rescoring_threshold since there are multiple "
"keypoint estimation tasks")
else:
raise ValueError(
"Unsupported meta_architecture type: %s" % meta_architecture)
......@@ -70,7 +70,7 @@ def iou(boxes1, boxes2):
Args:
boxes1: a numpy array with shape [N, 4] holding N boxes.
boxes2: a numpy array with shape [M, 4] holding N boxes.
boxes2: a numpy array with shape [M, 4] holding M boxes.
Returns:
a numpy array with shape [N, M] representing pairwise iou scores.
......@@ -92,7 +92,7 @@ def ioa(boxes1, boxes2):
Args:
boxes1: a numpy array with shape [N, 4] holding N boxes.
boxes2: a numpy array with shape [M, 4] holding N boxes.
boxes2: a numpy array with shape [M, 4] holding M boxes.
Returns:
a numpy array with shape [N, M] representing pairwise ioa scores.
......
......@@ -16,6 +16,8 @@
import tensorflow.compat.v1 as tf
from object_detection.core import box_list
from object_detection.core import box_list_ops
from object_detection.utils import shape_utils
......@@ -290,7 +292,8 @@ def get_valid_keypoint_mask_for_class(keypoint_coordinates,
def blackout_pixel_weights_by_box_regions(height, width, boxes, blackout,
weights=None):
weights=None,
boxes_scale=1.0):
"""Apply weights at pixel locations.
This function is used to generate the pixel weight mask (usually in the output
......@@ -332,6 +335,10 @@ def blackout_pixel_weights_by_box_regions(height, width, boxes, blackout,
a value to apply in each box region. Note that if blackout=True for a
given box, the weight will be zero. If None, all weights are assumed to be
1.
boxes_scale: The amount to scale the height/width of the boxes before
constructing the blackout regions. This is often useful to guarantee that
the proper weight fully covers the object boxes/masks during supervision,
as shifting might occur during image resizing, network stride, etc.
Returns:
A float tensor with shape [height, width] where all values within the
......@@ -347,6 +354,10 @@ def blackout_pixel_weights_by_box_regions(height, width, boxes, blackout,
(y_grid, x_grid) = image_shape_to_grids(height, width)
y_grid = tf.expand_dims(y_grid, axis=0)
x_grid = tf.expand_dims(x_grid, axis=0)
boxlist = box_list.BoxList(boxes)
boxlist = box_list_ops.scale_height_width(
boxlist, y_scale=boxes_scale, x_scale=boxes_scale)
boxes = boxlist.get()
y_min = tf.expand_dims(boxes[:, 0:1], axis=-1)
x_min = tf.expand_dims(boxes[:, 1:2], axis=-1)
y_max = tf.expand_dims(boxes[:, 2:3], axis=-1)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment