Commit afd5579f authored by Kaushik Shivakumar's avatar Kaushik Shivakumar
Browse files

Merge remote-tracking branch 'upstream/master' into context_tf2

parents dcd96e02 567bd18d
......@@ -36,7 +36,7 @@ def normalize_image(image, original_minval, original_maxval, target_minval,
Returns:
image: image which is the same shape as input image.
"""
with tf.name_scope('NormalizeImage', values=[image]):
with tf.compat.v1.name_scope('NormalizeImage', values=[image]):
original_minval = float(original_minval)
original_maxval = float(original_maxval)
target_minval = float(target_minval)
......@@ -68,16 +68,17 @@ def generate_tfexample_image(input_example_strings,
A tensor with shape [batch_size, height, width, channels] of type float32
with values in the range [0..1]
"""
batch_size = tf.shape(input_example_strings)[0]
batch_size = tf.shape(input=input_example_strings)[0]
images_shape = tf.stack(
[batch_size, image_height, image_width, image_channels])
tf_example_image_key = 'image/encoded'
feature_configs = {
tf_example_image_key:
tf.FixedLenFeature(
tf.io.FixedLenFeature(
image_height * image_width * image_channels, dtype=tf.float32)
}
feature_tensors = tf.parse_example(input_example_strings, feature_configs)
feature_tensors = tf.io.parse_example(
serialized=input_example_strings, features=feature_configs)
float_images = tf.reshape(
normalize_image(
feature_tensors[tf_example_image_key],
......@@ -97,11 +98,11 @@ def attention_ocr_attention_masks(num_characters):
names = ['%s/Softmax:0' % (prefix)]
for i in range(1, num_characters):
names += ['%s_%d/Softmax:0' % (prefix, i)]
return [tf.get_default_graph().get_tensor_by_name(n) for n in names]
return [tf.compat.v1.get_default_graph().get_tensor_by_name(n) for n in names]
def build_tensor_info(tensor_dict):
return {
k: tf.saved_model.utils.build_tensor_info(t)
k: tf.compat.v1.saved_model.utils.build_tensor_info(t)
for k, t in tensor_dict.items()
}
......@@ -29,7 +29,7 @@ _CHECKPOINT_URL = (
def _clean_up():
tf.gfile.DeleteRecursively(tf.test.get_temp_dir())
tf.io.gfile.rmtree(tf.compat.v1.test.get_temp_dir())
def _create_tf_example_string(image):
......@@ -47,7 +47,7 @@ class AttentionOcrExportTest(tf.test.TestCase):
for suffix in ['.meta', '.index', '.data-00000-of-00001']:
filename = _CHECKPOINT + suffix
self.assertTrue(
tf.gfile.Exists(filename),
tf.io.gfile.exists(filename),
msg='Missing checkpoint file %s. '
'Please download and extract it from %s' %
(filename, _CHECKPOINT_URL))
......@@ -57,7 +57,8 @@ class AttentionOcrExportTest(tf.test.TestCase):
os.path.dirname(__file__), 'datasets/testdata/fsns')
tf.test.TestCase.setUp(self)
_clean_up()
self.export_dir = os.path.join(tf.test.get_temp_dir(), 'exported_model')
self.export_dir = os.path.join(
tf.compat.v1.test.get_temp_dir(), 'exported_model')
self.minimal_output_signature = {
'predictions': 'AttentionOcr_v1/predicted_chars:0',
'scores': 'AttentionOcr_v1/predicted_scores:0',
......@@ -93,10 +94,10 @@ class AttentionOcrExportTest(tf.test.TestCase):
size=self.dataset.image_shape).astype('uint8'),
}
signature_def = graph_def.signature_def[
tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY]
tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY]
if serving:
input_name = signature_def.inputs[
tf.saved_model.signature_constants.CLASSIFY_INPUTS].name
tf.saved_model.CLASSIFY_INPUTS].name
# Model for serving takes input: inputs['inputs'] = 'tf_example:0'
feed_dict = {
input_name: [
......@@ -126,11 +127,11 @@ class AttentionOcrExportTest(tf.test.TestCase):
export_for_serving: True if the model was exported for Serving. This
affects how input is fed into the model.
"""
tf.reset_default_graph()
sess = tf.Session()
graph_def = tf.saved_model.loader.load(
tf.compat.v1.reset_default_graph()
sess = tf.compat.v1.Session()
graph_def = tf.compat.v1.saved_model.loader.load(
sess=sess,
tags=[tf.saved_model.tag_constants.SERVING],
tags=[tf.saved_model.SERVING],
export_dir=self.export_dir)
feed_dict = self.create_input_feed(graph_def, export_for_serving)
results = sess.run(self.minimal_output_signature, feed_dict=feed_dict)
......
......@@ -52,7 +52,7 @@ class ModelTest(tf.test.TestCase):
self.num_char_classes)
self.length_logit_shape = (self.batch_size, self.seq_length + 1)
# Placeholder knows image dimensions, but not batch size.
self.input_images = tf.placeholder(
self.input_images = tf.compat.v1.placeholder(
tf.float32,
shape=(None, self.image_height, self.image_width, 3),
name='input_node')
......@@ -89,8 +89,8 @@ class ModelTest(tf.test.TestCase):
with self.test_session() as sess:
endpoints_tf = ocr_model.create_base(
images=self.input_images, labels_one_hot=None)
sess.run(tf.global_variables_initializer())
tf.tables_initializer().run()
sess.run(tf.compat.v1.global_variables_initializer())
tf.compat.v1.tables_initializer().run()
endpoints = sess.run(
endpoints_tf, feed_dict={self.input_images: self.fake_images})
......@@ -127,7 +127,7 @@ class ModelTest(tf.test.TestCase):
ocr_model = self.create_model()
conv_tower = ocr_model.conv_tower_fn(self.input_images)
sess.run(tf.global_variables_initializer())
sess.run(tf.compat.v1.global_variables_initializer())
conv_tower_np = sess.run(
conv_tower, feed_dict={self.input_images: self.fake_images})
......@@ -141,9 +141,9 @@ class ModelTest(tf.test.TestCase):
ocr_model = self.create_model()
ocr_model.create_base(images=self.input_images, labels_one_hot=None)
with self.test_session() as sess:
tfprof_root = tf.profiler.profile(
tfprof_root = tf.compat.v1.profiler.profile(
sess.graph,
options=tf.profiler.ProfileOptionBuilder
options=tf.compat.v1.profiler.ProfileOptionBuilder
.trainable_variables_parameter())
model_size_bytes = 4 * tfprof_root.total_parameters
......@@ -163,9 +163,9 @@ class ModelTest(tf.test.TestCase):
summaries = ocr_model.create_summaries(
data, endpoints, charset, is_training=False)
with self.test_session() as sess:
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())
tf.tables_initializer().run()
sess.run(tf.compat.v1.global_variables_initializer())
sess.run(tf.compat.v1.local_variables_initializer())
tf.compat.v1.tables_initializer().run()
sess.run(summaries) # just check it is runnable
def test_sequence_loss_function_without_label_smoothing(self):
......@@ -188,7 +188,7 @@ class ModelTest(tf.test.TestCase):
Returns:
a list of tensors with encoded image coordinates in them.
"""
batch_size = tf.shape(net)[0]
batch_size = tf.shape(input=net)[0]
_, h, w, _ = net.shape.as_list()
h_loc = [
tf.tile(
......@@ -200,7 +200,8 @@ class ModelTest(tf.test.TestCase):
h_loc = tf.concat([tf.expand_dims(t, 2) for t in h_loc], 2)
w_loc = [
tf.tile(
tf.contrib.layers.one_hot_encoding(tf.constant([i]), num_classes=w),
tf.contrib.layers.one_hot_encoding(
tf.constant([i]), num_classes=w),
[h, 1]) for i in range(w)
]
w_loc = tf.concat([tf.expand_dims(t, 2) for t in w_loc], 2)
......@@ -272,8 +273,8 @@ class ModelTest(tf.test.TestCase):
endpoints_tf = ocr_model.create_base(
images=self.fake_images, labels_one_hot=None)
sess.run(tf.global_variables_initializer())
tf.tables_initializer().run()
sess.run(tf.compat.v1.global_variables_initializer())
tf.compat.v1.tables_initializer().run()
endpoints = sess.run(endpoints_tf)
self.assertEqual(endpoints.predicted_text.shape, (self.batch_size,))
......@@ -289,7 +290,7 @@ class CharsetMapperTest(tf.test.TestCase):
charset_mapper = model.CharsetMapper(charset)
with self.test_session() as sess:
tf.tables_initializer().run()
tf.compat.v1.tables_initializer().run()
text = sess.run(charset_mapper.get_text(ids))
self.assertAllEqual(text, [b'hello', b'world'])
......
......@@ -111,12 +111,12 @@ class SequenceLayerBase(object):
self._mparams = method_params
self._net = net
self._labels_one_hot = labels_one_hot
self._batch_size = tf.shape(net)[0]
self._batch_size = tf.shape(input=net)[0]
# Initialize parameters for char logits which will be computed on the fly
# inside an LSTM decoder.
self._char_logits = {}
regularizer = slim.l2_regularizer(self._mparams.weight_decay)
regularizer = tf.keras.regularizers.l2(0.5 * (self._mparams.weight_decay))
self._softmax_w = slim.model_variable(
'softmax_w',
[self._mparams.num_lstm_units, self._params.num_char_classes],
......@@ -124,7 +124,7 @@ class SequenceLayerBase(object):
regularizer=regularizer)
self._softmax_b = slim.model_variable(
'softmax_b', [self._params.num_char_classes],
initializer=tf.zeros_initializer(),
initializer=tf.compat.v1.zeros_initializer(),
regularizer=regularizer)
@abc.abstractmethod
......@@ -203,8 +203,8 @@ class SequenceLayerBase(object):
A tensor with shape [batch_size, num_char_classes]
"""
if char_index not in self._char_logits:
self._char_logits[char_index] = tf.nn.xw_plus_b(inputs, self._softmax_w,
self._softmax_b)
self._char_logits[char_index] = tf.compat.v1.nn.xw_plus_b(inputs, self._softmax_w,
self._softmax_b)
return self._char_logits[char_index]
def char_one_hot(self, logit):
......@@ -216,7 +216,7 @@ class SequenceLayerBase(object):
Returns:
A tensor with shape [batch_size, num_char_classes]
"""
prediction = tf.argmax(logit, axis=1)
prediction = tf.argmax(input=logit, axis=1)
return slim.one_hot_encoding(prediction, self._params.num_char_classes)
def get_input(self, prev, i):
......@@ -244,10 +244,10 @@ class SequenceLayerBase(object):
Returns:
A tensor with shape [batch_size, seq_length, num_char_classes].
"""
with tf.variable_scope('LSTM'):
with tf.compat.v1.variable_scope('LSTM'):
first_label = self.get_input(prev=None, i=0)
decoder_inputs = [first_label] + [None] * (self._params.seq_length - 1)
lstm_cell = tf.contrib.rnn.LSTMCell(
lstm_cell = tf.compat.v1.nn.rnn_cell.LSTMCell(
self._mparams.num_lstm_units,
use_peepholes=False,
cell_clip=self._mparams.lstm_state_clip_value,
......@@ -259,9 +259,9 @@ class SequenceLayerBase(object):
loop_function=self.get_input,
cell=lstm_cell)
with tf.variable_scope('logits'):
with tf.compat.v1.variable_scope('logits'):
logits_list = [
tf.expand_dims(self.char_logit(logit, i), dim=1)
tf.expand_dims(self.char_logit(logit, i), axis=1)
for i, logit in enumerate(lstm_outputs)
]
......
......@@ -29,13 +29,13 @@ import sequence_layers
def fake_net(batch_size, num_features, feature_size):
return tf.convert_to_tensor(
np.random.uniform(size=(batch_size, num_features, feature_size)),
value=np.random.uniform(size=(batch_size, num_features, feature_size)),
dtype=tf.float32)
def fake_labels(batch_size, seq_length, num_char_classes):
labels_np = tf.convert_to_tensor(
np.random.randint(
value=np.random.randint(
low=0, high=num_char_classes, size=(batch_size, seq_length)))
return slim.one_hot_encoding(labels_np, num_classes=num_char_classes)
......
......@@ -96,16 +96,16 @@ def get_training_hparams():
def create_optimizer(hparams):
"""Creates optimized based on the specified flags."""
if hparams.optimizer == 'momentum':
optimizer = tf.train.MomentumOptimizer(
optimizer = tf.compat.v1.train.MomentumOptimizer(
hparams.learning_rate, momentum=hparams.momentum)
elif hparams.optimizer == 'adam':
optimizer = tf.train.AdamOptimizer(hparams.learning_rate)
optimizer = tf.compat.v1.train.AdamOptimizer(hparams.learning_rate)
elif hparams.optimizer == 'adadelta':
optimizer = tf.train.AdadeltaOptimizer(hparams.learning_rate)
optimizer = tf.compat.v1.train.AdadeltaOptimizer(hparams.learning_rate)
elif hparams.optimizer == 'adagrad':
optimizer = tf.train.AdagradOptimizer(hparams.learning_rate)
optimizer = tf.compat.v1.train.AdagradOptimizer(hparams.learning_rate)
elif hparams.optimizer == 'rmsprop':
optimizer = tf.train.RMSPropOptimizer(
optimizer = tf.compat.v1.train.RMSPropOptimizer(
hparams.learning_rate, momentum=hparams.momentum)
return optimizer
......@@ -154,14 +154,14 @@ def train(loss, init_fn, hparams):
def prepare_training_dir():
if not tf.gfile.Exists(FLAGS.train_log_dir):
if not tf.io.gfile.exists(FLAGS.train_log_dir):
logging.info('Create a new training directory %s', FLAGS.train_log_dir)
tf.gfile.MakeDirs(FLAGS.train_log_dir)
tf.io.gfile.makedirs(FLAGS.train_log_dir)
else:
if FLAGS.reset_train_dir:
logging.info('Reset the training directory %s', FLAGS.train_log_dir)
tf.gfile.DeleteRecursively(FLAGS.train_log_dir)
tf.gfile.MakeDirs(FLAGS.train_log_dir)
tf.io.gfile.rmtree(FLAGS.train_log_dir)
tf.io.gfile.makedirs(FLAGS.train_log_dir)
else:
logging.info('Use already existing training directory %s',
FLAGS.train_log_dir)
......@@ -169,7 +169,7 @@ def prepare_training_dir():
def calculate_graph_metrics():
param_stats = model_analyzer.print_model_analysis(
tf.get_default_graph(),
tf.compat.v1.get_default_graph(),
tfprof_options=model_analyzer.TRAINABLE_VARS_PARAMS_STAT_OPTIONS)
return param_stats.total_parameters
......@@ -186,7 +186,7 @@ def main(_):
# If ps_tasks is zero, the local device is used. When using multiple
# (non-local) replicas, the ReplicaDeviceSetter distributes the variables
# across the different devices.
device_setter = tf.train.replica_device_setter(
device_setter = tf.compat.v1.train.replica_device_setter(
FLAGS.ps_tasks, merge_devices=True)
with tf.device(device_setter):
data = data_provider.get_data(
......
......@@ -37,16 +37,16 @@ def logits_to_log_prob(logits):
probabilities.
"""
with tf.variable_scope('log_probabilities'):
with tf.compat.v1.variable_scope('log_probabilities'):
reduction_indices = len(logits.shape.as_list()) - 1
max_logits = tf.reduce_max(
logits, reduction_indices=reduction_indices, keep_dims=True)
input_tensor=logits, axis=reduction_indices, keepdims=True)
safe_logits = tf.subtract(logits, max_logits)
sum_exp = tf.reduce_sum(
tf.exp(safe_logits),
reduction_indices=reduction_indices,
keep_dims=True)
log_probs = tf.subtract(safe_logits, tf.log(sum_exp))
input_tensor=tf.exp(safe_logits),
axis=reduction_indices,
keepdims=True)
log_probs = tf.subtract(safe_logits, tf.math.log(sum_exp))
return log_probs
......@@ -91,7 +91,7 @@ def ConvertAllInputsToTensors(func):
"""
def FuncWrapper(*args):
tensors = [tf.convert_to_tensor(a) for a in args]
tensors = [tf.convert_to_tensor(value=a) for a in args]
return func(*tensors)
return FuncWrapper
......@@ -109,6 +109,18 @@ Sergi Caelles Prat, Shan Yang, Sudheendra Vijayanarasimhan, Tina Tian, Tomer
Kaftan, Vighnesh Birodkar, Vishnu Banna, Vivek Rathod, Yanhui Liang, Yiming Shi,
Yixin Shi, Yu-hui Chen, Zhichao Lu.
### MobileDet GPU
We have released SSDLite with MobileDet GPU backbone, which achieves 17% mAP
higher than the MobileNetV2 SSDLite (27.5 mAP vs 23.5 mAP) on a NVIDIA Jetson
Xavier at comparable latency (3.2ms vs 3.3ms).
Along with the model definition, we are also releasing model checkpoints trained
on the COCO dataset.
<b>Thanks to contributors</b>: Yongzhe Wang, Bo Chen, Hanxiao Liu, Le An
(NVIDIA), Yu-Te Cheng (NVIDIA), Oliver Knieps (NVIDIA), and Josh Park (NVIDIA).
### Context R-CNN
We have released [Context R-CNN](https://arxiv.org/abs/1912.03538), a model that
......
......@@ -39,6 +39,7 @@ from object_detection.protos import losses_pb2
from object_detection.protos import model_pb2
from object_detection.utils import label_map_util
from object_detection.utils import ops
from object_detection.utils import spatial_transform_ops as spatial_ops
from object_detection.utils import tf_version
## Feature Extractors for TF
......@@ -48,6 +49,7 @@ from object_detection.utils import tf_version
# pylint: disable=g-import-not-at-top
if tf_version.is_tf2():
from object_detection.models import center_net_hourglass_feature_extractor
from object_detection.models import center_net_mobilenet_v2_feature_extractor
from object_detection.models import center_net_resnet_feature_extractor
from object_detection.models import center_net_resnet_v1_fpn_feature_extractor
from object_detection.models import faster_rcnn_inception_resnet_v2_keras_feature_extractor as frcnn_inc_res_keras
......@@ -140,11 +142,18 @@ if tf_version.is_tf2():
CENTER_NET_EXTRACTOR_FUNCTION_MAP = {
'resnet_v2_50': center_net_resnet_feature_extractor.resnet_v2_50,
'resnet_v2_101': center_net_resnet_feature_extractor.resnet_v2_101,
'resnet_v1_18_fpn':
center_net_resnet_v1_fpn_feature_extractor.resnet_v1_18_fpn,
'resnet_v1_34_fpn':
center_net_resnet_v1_fpn_feature_extractor.resnet_v1_34_fpn,
'resnet_v1_50_fpn':
center_net_resnet_v1_fpn_feature_extractor.resnet_v1_50_fpn,
'resnet_v1_101_fpn':
center_net_resnet_v1_fpn_feature_extractor.resnet_v1_101_fpn,
'hourglass_104': center_net_hourglass_feature_extractor.hourglass_104,
'hourglass_104':
center_net_hourglass_feature_extractor.hourglass_104,
'mobilenet_v2':
center_net_mobilenet_v2_feature_extractor.mobilenet_v2,
}
FEATURE_EXTRACTOR_MAPS = [
......@@ -648,8 +657,9 @@ def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries):
second_stage_localization_loss_weight)
crop_and_resize_fn = (
ops.matmul_crop_and_resize if frcnn_config.use_matmul_crop_and_resize
else ops.native_crop_and_resize)
spatial_ops.multilevel_matmul_crop_and_resize
if frcnn_config.use_matmul_crop_and_resize
else spatial_ops.multilevel_native_crop_and_resize)
clip_anchors_to_image = (
frcnn_config.clip_anchors_to_image)
......
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "inference_from_saved_model_tf2_colab.ipynb",
"provenance": [],
"collapsed_sections": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "cT5cdSLPX0ui"
},
"source": [
"# Intro to Object Detection Colab\n",
"\n",
"Welcome to the object detection colab! This demo will take you through the steps of running an \"out-of-the-box\" detection model in SavedModel format on a collection of images.\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "vPs64QA1Zdov"
},
"source": [
"Imports"
]
},
{
"cell_type": "code",
"metadata": {
"id": "OBzb04bdNGM8",
"colab_type": "code",
"colab": {}
},
"source": [
"!pip install -U --pre tensorflow==\"2.2.0\""
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "NgSXyvKSNHIl",
"colab_type": "code",
"colab": {}
},
"source": [
"import os\n",
"import pathlib\n",
"\n",
"# Clone the tensorflow models repository if it doesn't already exist\n",
"if \"models\" in pathlib.Path.cwd().parts:\n",
" while \"models\" in pathlib.Path.cwd().parts:\n",
" os.chdir('..')\n",
"elif not pathlib.Path('models').exists():\n",
" !git clone --depth 1 https://github.com/tensorflow/models"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "rhpPgW7TNLs6",
"colab_type": "code",
"colab": {}
},
"source": [
"# Install the Object Detection API\n",
"%%bash\n",
"cd models/research/\n",
"protoc object_detection/protos/*.proto --python_out=.\n",
"cp object_detection/packages/tf2/setup.py .\n",
"python -m pip install ."
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab_type": "code",
"id": "yn5_uV1HLvaz",
"colab": {}
},
"source": [
"import io\n",
"import os\n",
"import scipy.misc\n",
"import numpy as np\n",
"import six\n",
"import time\n",
"\n",
"from six import BytesIO\n",
"\n",
"import matplotlib\n",
"import matplotlib.pyplot as plt\n",
"from PIL import Image, ImageDraw, ImageFont\n",
"\n",
"import tensorflow as tf\n",
"from object_detection.utils import visualization_utils as viz_utils\n",
"\n",
"%matplotlib inline"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab_type": "code",
"id": "-y9R0Xllefec",
"colab": {}
},
"source": [
"def load_image_into_numpy_array(path):\n",
" \"\"\"Load an image from file into a numpy array.\n",
"\n",
" Puts image into numpy array to feed into tensorflow graph.\n",
" Note that by convention we put it into a numpy array with shape\n",
" (height, width, channels), where channels=3 for RGB.\n",
"\n",
" Args:\n",
" path: a file path (this can be local or on colossus)\n",
"\n",
" Returns:\n",
" uint8 numpy array with shape (img_height, img_width, 3)\n",
" \"\"\"\n",
" img_data = tf.io.gfile.GFile(path, 'rb').read()\n",
" image = Image.open(BytesIO(img_data))\n",
" (im_width, im_height) = image.size\n",
" return np.array(image.getdata()).reshape(\n",
" (im_height, im_width, 3)).astype(np.uint8)\n",
"\n",
"# Load the COCO Label Map\n",
"category_index = {\n",
" 1: {'id': 1, 'name': 'person'},\n",
" 2: {'id': 2, 'name': 'bicycle'},\n",
" 3: {'id': 3, 'name': 'car'},\n",
" 4: {'id': 4, 'name': 'motorcycle'},\n",
" 5: {'id': 5, 'name': 'airplane'},\n",
" 6: {'id': 6, 'name': 'bus'},\n",
" 7: {'id': 7, 'name': 'train'},\n",
" 8: {'id': 8, 'name': 'truck'},\n",
" 9: {'id': 9, 'name': 'boat'},\n",
" 10: {'id': 10, 'name': 'traffic light'},\n",
" 11: {'id': 11, 'name': 'fire hydrant'},\n",
" 13: {'id': 13, 'name': 'stop sign'},\n",
" 14: {'id': 14, 'name': 'parking meter'},\n",
" 15: {'id': 15, 'name': 'bench'},\n",
" 16: {'id': 16, 'name': 'bird'},\n",
" 17: {'id': 17, 'name': 'cat'},\n",
" 18: {'id': 18, 'name': 'dog'},\n",
" 19: {'id': 19, 'name': 'horse'},\n",
" 20: {'id': 20, 'name': 'sheep'},\n",
" 21: {'id': 21, 'name': 'cow'},\n",
" 22: {'id': 22, 'name': 'elephant'},\n",
" 23: {'id': 23, 'name': 'bear'},\n",
" 24: {'id': 24, 'name': 'zebra'},\n",
" 25: {'id': 25, 'name': 'giraffe'},\n",
" 27: {'id': 27, 'name': 'backpack'},\n",
" 28: {'id': 28, 'name': 'umbrella'},\n",
" 31: {'id': 31, 'name': 'handbag'},\n",
" 32: {'id': 32, 'name': 'tie'},\n",
" 33: {'id': 33, 'name': 'suitcase'},\n",
" 34: {'id': 34, 'name': 'frisbee'},\n",
" 35: {'id': 35, 'name': 'skis'},\n",
" 36: {'id': 36, 'name': 'snowboard'},\n",
" 37: {'id': 37, 'name': 'sports ball'},\n",
" 38: {'id': 38, 'name': 'kite'},\n",
" 39: {'id': 39, 'name': 'baseball bat'},\n",
" 40: {'id': 40, 'name': 'baseball glove'},\n",
" 41: {'id': 41, 'name': 'skateboard'},\n",
" 42: {'id': 42, 'name': 'surfboard'},\n",
" 43: {'id': 43, 'name': 'tennis racket'},\n",
" 44: {'id': 44, 'name': 'bottle'},\n",
" 46: {'id': 46, 'name': 'wine glass'},\n",
" 47: {'id': 47, 'name': 'cup'},\n",
" 48: {'id': 48, 'name': 'fork'},\n",
" 49: {'id': 49, 'name': 'knife'},\n",
" 50: {'id': 50, 'name': 'spoon'},\n",
" 51: {'id': 51, 'name': 'bowl'},\n",
" 52: {'id': 52, 'name': 'banana'},\n",
" 53: {'id': 53, 'name': 'apple'},\n",
" 54: {'id': 54, 'name': 'sandwich'},\n",
" 55: {'id': 55, 'name': 'orange'},\n",
" 56: {'id': 56, 'name': 'broccoli'},\n",
" 57: {'id': 57, 'name': 'carrot'},\n",
" 58: {'id': 58, 'name': 'hot dog'},\n",
" 59: {'id': 59, 'name': 'pizza'},\n",
" 60: {'id': 60, 'name': 'donut'},\n",
" 61: {'id': 61, 'name': 'cake'},\n",
" 62: {'id': 62, 'name': 'chair'},\n",
" 63: {'id': 63, 'name': 'couch'},\n",
" 64: {'id': 64, 'name': 'potted plant'},\n",
" 65: {'id': 65, 'name': 'bed'},\n",
" 67: {'id': 67, 'name': 'dining table'},\n",
" 70: {'id': 70, 'name': 'toilet'},\n",
" 72: {'id': 72, 'name': 'tv'},\n",
" 73: {'id': 73, 'name': 'laptop'},\n",
" 74: {'id': 74, 'name': 'mouse'},\n",
" 75: {'id': 75, 'name': 'remote'},\n",
" 76: {'id': 76, 'name': 'keyboard'},\n",
" 77: {'id': 77, 'name': 'cell phone'},\n",
" 78: {'id': 78, 'name': 'microwave'},\n",
" 79: {'id': 79, 'name': 'oven'},\n",
" 80: {'id': 80, 'name': 'toaster'},\n",
" 81: {'id': 81, 'name': 'sink'},\n",
" 82: {'id': 82, 'name': 'refrigerator'},\n",
" 84: {'id': 84, 'name': 'book'},\n",
" 85: {'id': 85, 'name': 'clock'},\n",
" 86: {'id': 86, 'name': 'vase'},\n",
" 87: {'id': 87, 'name': 'scissors'},\n",
" 88: {'id': 88, 'name': 'teddy bear'},\n",
" 89: {'id': 89, 'name': 'hair drier'},\n",
" 90: {'id': 90, 'name': 'toothbrush'},\n",
"}"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "QwcBC2TlPSwg",
"colab_type": "code",
"colab": {}
},
"source": [
"# Download the saved model and put it into models/research/object_detection/test_data/\n",
"!wget http://download.tensorflow.org/models/object_detection/tf2/20200711/efficientdet_d5_coco17_tpu-32.tar.gz\n",
"!tar -xf efficientdet_d5_coco17_tpu-32.tar.gz\n",
"!mv efficientdet_d5_coco17_tpu-32/ models/research/object_detection/test_data/"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab_type": "code",
"id": "Z2p-PmKLYCVU",
"colab": {}
},
"source": [
"start_time = time.time()\n",
"tf.keras.backend.clear_session()\n",
"detect_fn = tf.saved_model.load('models/research/object_detection/test_data/efficientdet_d5_coco17_tpu-32/saved_model/')\n",
"end_time = time.time()\n",
"elapsed_time = end_time - start_time\n",
"print('Elapsed time: ' + str(elapsed_time) + 's')"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab_type": "code",
"id": "vukkhd5-9NSL",
"colab": {}
},
"source": [
"import time\n",
"\n",
"image_dir = 'models/research/object_detection/test_images'\n",
"\n",
"elapsed = []\n",
"for i in range(2):\n",
" image_path = os.path.join(image_dir, 'image' + str(i + 1) + '.jpg')\n",
" image_np = load_image_into_numpy_array(image_path)\n",
" input_tensor = np.expand_dims(image_np, 0)\n",
" start_time = time.time()\n",
" detections = detect_fn(input_tensor)\n",
" end_time = time.time()\n",
" elapsed.append(end_time - start_time)\n",
"\n",
" plt.rcParams['figure.figsize'] = [42, 21]\n",
" label_id_offset = 1\n",
" image_np_with_detections = image_np.copy()\n",
" viz_utils.visualize_boxes_and_labels_on_image_array(\n",
" image_np_with_detections,\n",
" detections['detection_boxes'][0].numpy(),\n",
" detections['detection_classes'][0].numpy().astype(np.int32),\n",
" detections['detection_scores'][0].numpy(),\n",
" category_index,\n",
" use_normalized_coordinates=True,\n",
" max_boxes_to_draw=200,\n",
" min_score_thresh=.40,\n",
" agnostic_mode=False)\n",
" plt.subplot(2, 1, i+1)\n",
" plt.imshow(image_np_with_detections)\n",
"\n",
"mean_elapsed = sum(elapsed) / float(len(elapsed))\n",
"print('Elapsed time: ' + str(mean_elapsed) + ' second per image')"
],
"execution_count": null,
"outputs": []
}
]
}
\ No newline at end of file
......@@ -1600,6 +1600,17 @@ class CenterNetKeypointTargetAssigner(object):
return (batch_indices, batch_offsets, batch_weights)
def _resize_masks(masks, height, width, method):
# Resize segmentation masks to conform to output dimensions. Use TF2
# image resize because TF1's version is buggy:
# https://yaqs.corp.google.com/eng/q/4970450458378240
masks = tf2.image.resize(
masks[:, :, :, tf.newaxis],
size=(height, width),
method=method)
return masks[:, :, :, 0]
class CenterNetMaskTargetAssigner(object):
"""Wrapper to compute targets for segmentation masks."""
......@@ -1641,13 +1652,9 @@ class CenterNetMaskTargetAssigner(object):
segmentation_targets_list = []
for gt_masks, gt_classes in zip(gt_masks_list, gt_classes_list):
# Resize segmentation masks to conform to output dimensions. Use TF2
# image resize because TF1's version is buggy:
# https://yaqs.corp.google.com/eng/q/4970450458378240
gt_masks = tf2.image.resize(
gt_masks[:, :, :, tf.newaxis],
size=(output_height, output_width),
method=mask_resize_method)
gt_masks = _resize_masks(gt_masks, output_height, output_width,
mask_resize_method)
gt_masks = gt_masks[:, :, :, tf.newaxis]
gt_classes_reshaped = tf.reshape(gt_classes, [-1, 1, 1, num_classes])
# Shape: [h, w, num_classes].
segmentations_for_image = tf.reduce_max(
......@@ -1771,3 +1778,120 @@ class CenterNetDensePoseTargetAssigner(object):
batch_surface_coords = tf.concat(batch_surface_coords, axis=0)
batch_weights = tf.concat(batch_weights, axis=0)
return batch_indices, batch_part_ids, batch_surface_coords, batch_weights
def filter_mask_overlap_min_area(masks):
"""If a pixel belongs to 2 instances, remove it from the larger instance."""
num_instances = tf.shape(masks)[0]
def _filter_min_area():
"""Helper function to filter non empty masks."""
areas = tf.reduce_sum(masks, axis=[1, 2], keepdims=True)
per_pixel_area = masks * areas
# Make sure background is ignored in argmin.
per_pixel_area = (masks * per_pixel_area +
(1 - masks) * per_pixel_area.dtype.max)
min_index = tf.cast(tf.argmin(per_pixel_area, axis=0), tf.int32)
filtered_masks = (
tf.range(num_instances)[:, tf.newaxis, tf.newaxis]
==
min_index[tf.newaxis, :, :]
)
return tf.cast(filtered_masks, tf.float32) * masks
return tf.cond(num_instances > 0, _filter_min_area,
lambda: masks)
def filter_mask_overlap(masks, method='min_area'):
if method == 'min_area':
return filter_mask_overlap_min_area(masks)
else:
raise ValueError('Unknown mask overlap filter type - {}'.format(method))
class CenterNetCornerOffsetTargetAssigner(object):
"""Wrapper to compute corner offsets for boxes using masks."""
def __init__(self, stride, overlap_resolution='min_area'):
"""Initializes the corner offset target assigner.
Args:
stride: int, the stride of the network in output pixels.
overlap_resolution: string, specifies how we handle overlapping
instance masks. Currently only 'min_area' is supported which assigns
overlapping pixels to the instance with the minimum area.
"""
self._stride = stride
self._overlap_resolution = overlap_resolution
def assign_corner_offset_targets(
self, gt_boxes_list, gt_masks_list):
"""Computes the corner offset targets and foreground map.
For each pixel that is part of any object's foreground, this function
computes the relative offsets to the top-left and bottom-right corners of
that instance's bounding box. It also returns a foreground map to indicate
which pixels contain valid corner offsets.
Args:
gt_boxes_list: A list of float tensors with shape [num_boxes, 4]
representing the groundtruth detection bounding boxes for each sample in
the batch. The coordinates are expected in normalized coordinates.
gt_masks_list: A list of float tensors with shape [num_boxes,
input_height, input_width] with values in {0, 1} representing instance
masks for each object.
Returns:
corner_offsets: A float tensor of shape [batch_size, height, width, 4]
containing, in order, the (y, x) offsets to the top left corner and
the (y, x) offsets to the bottom right corner for each foregroung pixel
foreground: A float tensor of shape [batch_size, height, width] in which
each pixel is set to 1 if it is a part of any instance's foreground
(and thus contains valid corner offsets) and 0 otherwise.
"""
_, input_height, input_width = (
shape_utils.combined_static_and_dynamic_shape(gt_masks_list[0]))
output_height = input_height // self._stride
output_width = input_width // self._stride
y_grid, x_grid = tf.meshgrid(
tf.range(output_height), tf.range(output_width),
indexing='ij')
y_grid, x_grid = tf.cast(y_grid, tf.float32), tf.cast(x_grid, tf.float32)
corner_targets = []
foreground_targets = []
for gt_masks, gt_boxes in zip(gt_masks_list, gt_boxes_list):
gt_masks = _resize_masks(gt_masks, output_height, output_width,
method=ResizeMethod.NEAREST_NEIGHBOR)
gt_masks = filter_mask_overlap(gt_masks, self._overlap_resolution)
ymin, xmin, ymax, xmax = tf.unstack(gt_boxes, axis=1)
ymin, ymax = ymin * output_height, ymax * output_height
xmin, xmax = xmin * output_width, xmax * output_width
top_y = ymin[:, tf.newaxis, tf.newaxis] - y_grid[tf.newaxis]
left_x = xmin[:, tf.newaxis, tf.newaxis] - x_grid[tf.newaxis]
bottom_y = ymax[:, tf.newaxis, tf.newaxis] - y_grid[tf.newaxis]
right_x = xmax[:, tf.newaxis, tf.newaxis] - x_grid[tf.newaxis]
foreground_target = tf.cast(tf.reduce_sum(gt_masks, axis=0) > 0.5,
tf.float32)
foreground_targets.append(foreground_target)
corner_target = tf.stack([
tf.reduce_sum(top_y * gt_masks, axis=0),
tf.reduce_sum(left_x * gt_masks, axis=0),
tf.reduce_sum(bottom_y * gt_masks, axis=0),
tf.reduce_sum(right_x * gt_masks, axis=0),
], axis=2)
corner_targets.append(corner_target)
return (tf.stack(corner_targets, axis=0),
tf.stack(foreground_targets, axis=0))
......@@ -1999,6 +1999,181 @@ class CenterNetDensePoseTargetAssignerTest(test_case.TestCase):
self.assertAllClose(expected_batch_weights, batch_weights)
class CornerOffsetTargetAssignerTest(test_case.TestCase):
def test_filter_overlap_min_area_empty(self):
"""Test that empty masks work on CPU."""
def graph_fn(masks):
return targetassigner.filter_mask_overlap_min_area(masks)
masks = self.execute_cpu(graph_fn, [np.zeros((0, 5, 5), dtype=np.float32)])
self.assertEqual(masks.shape, (0, 5, 5))
def test_filter_overlap_min_area(self):
"""Test the object with min. area is selected instead of overlap."""
def graph_fn(masks):
return targetassigner.filter_mask_overlap_min_area(masks)
masks = np.zeros((3, 4, 4), dtype=np.float32)
masks[0, :2, :2] = 1.0
masks[1, :3, :3] = 1.0
masks[2, 3, 3] = 1.0
masks = self.execute(graph_fn, [masks])
self.assertAllClose(masks[0],
[[1, 1, 0, 0],
[1, 1, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0]])
self.assertAllClose(masks[1],
[[0, 0, 1, 0],
[0, 0, 1, 0],
[1, 1, 1, 0],
[0, 0, 0, 0]])
self.assertAllClose(masks[2],
[[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 1]])
def test_assign_corner_offset_single_object(self):
"""Test that corner offsets are correct with a single object."""
assigner = targetassigner.CenterNetCornerOffsetTargetAssigner(stride=1)
def graph_fn():
boxes = [
tf.constant([[0., 0., 1., 1.]])
]
mask = np.zeros((1, 4, 4), dtype=np.float32)
mask[0, 1:3, 1:3] = 1.0
masks = [tf.constant(mask)]
return assigner.assign_corner_offset_targets(boxes, masks)
corner_offsets, foreground = self.execute(graph_fn, [])
self.assertAllClose(foreground[0],
[[0, 0, 0, 0],
[0, 1, 1, 0],
[0, 1, 1, 0],
[0, 0, 0, 0]])
self.assertAllClose(corner_offsets[0, :, :, 0],
[[0, 0, 0, 0],
[0, -1, -1, 0],
[0, -2, -2, 0],
[0, 0, 0, 0]])
self.assertAllClose(corner_offsets[0, :, :, 1],
[[0, 0, 0, 0],
[0, -1, -2, 0],
[0, -1, -2, 0],
[0, 0, 0, 0]])
self.assertAllClose(corner_offsets[0, :, :, 2],
[[0, 0, 0, 0],
[0, 3, 3, 0],
[0, 2, 2, 0],
[0, 0, 0, 0]])
self.assertAllClose(corner_offsets[0, :, :, 3],
[[0, 0, 0, 0],
[0, 3, 2, 0],
[0, 3, 2, 0],
[0, 0, 0, 0]])
def test_assign_corner_offset_multiple_objects(self):
"""Test corner offsets are correct with multiple objects."""
assigner = targetassigner.CenterNetCornerOffsetTargetAssigner(stride=1)
def graph_fn():
boxes = [
tf.constant([[0., 0., 1., 1.], [0., 0., 0., 0.]]),
tf.constant([[0., 0., .25, .25], [.25, .25, 1., 1.]])
]
mask1 = np.zeros((2, 4, 4), dtype=np.float32)
mask1[0, 0, 0] = 1.0
mask1[0, 3, 3] = 1.0
mask2 = np.zeros((2, 4, 4), dtype=np.float32)
mask2[0, :2, :2] = 1.0
mask2[1, 1:, 1:] = 1.0
masks = [tf.constant(mask1), tf.constant(mask2)]
return assigner.assign_corner_offset_targets(boxes, masks)
corner_offsets, foreground = self.execute(graph_fn, [])
self.assertEqual(corner_offsets.shape, (2, 4, 4, 4))
self.assertEqual(foreground.shape, (2, 4, 4))
self.assertAllClose(foreground[0],
[[1, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 1]])
self.assertAllClose(corner_offsets[0, :, :, 0],
[[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, -3]])
self.assertAllClose(corner_offsets[0, :, :, 1],
[[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, -3]])
self.assertAllClose(corner_offsets[0, :, :, 2],
[[4, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 1]])
self.assertAllClose(corner_offsets[0, :, :, 3],
[[4, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 1]])
self.assertAllClose(foreground[1],
[[1, 1, 0, 0],
[1, 1, 1, 1],
[0, 1, 1, 1],
[0, 1, 1, 1]])
self.assertAllClose(corner_offsets[1, :, :, 0],
[[0, 0, 0, 0],
[-1, -1, 0, 0],
[0, -1, -1, -1],
[0, -2, -2, -2]])
self.assertAllClose(corner_offsets[1, :, :, 1],
[[0, -1, 0, 0],
[0, -1, -1, -2],
[0, 0, -1, -2],
[0, 0, -1, -2]])
self.assertAllClose(corner_offsets[1, :, :, 2],
[[1, 1, 0, 0],
[0, 0, 3, 3],
[0, 2, 2, 2],
[0, 1, 1, 1]])
self.assertAllClose(corner_offsets[1, :, :, 3],
[[1, 0, 0, 0],
[1, 0, 2, 1],
[0, 3, 2, 1],
[0, 3, 2, 1]])
def test_assign_corner_offsets_no_objects(self):
"""Test assignment works with empty input on cpu."""
assigner = targetassigner.CenterNetCornerOffsetTargetAssigner(stride=1)
def graph_fn():
boxes = [
tf.zeros((0, 4), dtype=tf.float32)
]
masks = [tf.zeros((0, 5, 5), dtype=tf.float32)]
return assigner.assign_corner_offset_targets(boxes, masks)
corner_offsets, foreground = self.execute_cpu(graph_fn, [])
self.assertAllClose(corner_offsets, np.zeros((1, 5, 5, 4)))
self.assertAllClose(foreground, np.zeros((1, 5, 5)))
if __name__ == '__main__':
tf.enable_v2_behavior()
tf.test.main()
......@@ -40,6 +40,18 @@ Sergi Caelles Prat, Shan Yang, Sudheendra Vijayanarasimhan, Tina Tian, Tomer
Kaftan, Vighnesh Birodkar, Vishnu Banna, Vivek Rathod, Yanhui Liang, Yiming Shi,
Yixin Shi, Yu-hui Chen, Zhichao Lu.
### June 26th, 2020
We have released SSDLite with MobileDet GPU backbone, which achieves 17% mAP
higher than the MobileNetV2 SSDLite (27.5 mAP vs 23.5 mAP) on a NVIDIA Jetson
Xavier at comparable latency (3.2ms vs 3.3ms).
Along with the model definition, we are also releasing model checkpoints trained
on the COCO dataset.
<b>Thanks to contributors</b>: Yongzhe Wang, Bo Chen, Hanxiao Liu, Le An
(NVIDIA), Yu-Te Cheng (NVIDIA), Oliver Knieps (NVIDIA), and Josh Park (NVIDIA).
### June 17th, 2020
We have released [Context R-CNN](https://arxiv.org/abs/1912.03538), a model that
......
......@@ -1094,8 +1094,12 @@ def get_reduce_to_frame_fn(input_reader_config, is_training):
num_frames = tf.cast(
tf.shape(tensor_dict[fields.InputDataFields.source_id])[0],
dtype=tf.int32)
frame_index = tf.random.uniform((), minval=0, maxval=num_frames,
dtype=tf.int32)
if input_reader_config.frame_index == -1:
frame_index = tf.random.uniform((), minval=0, maxval=num_frames,
dtype=tf.int32)
else:
frame_index = tf.constant(input_reader_config.frame_index,
dtype=tf.int32)
out_tensor_dict = {}
for key in tensor_dict:
if key in fields.SEQUENCE_FIELDS:
......
......@@ -61,7 +61,7 @@ def _get_configs_for_model(model_name):
configs, kwargs_dict=override_dict)
def _get_configs_for_model_sequence_example(model_name):
def _get_configs_for_model_sequence_example(model_name, frame_index=-1):
"""Returns configurations for model."""
fname = os.path.join(tf.resource_loader.get_data_files_path(),
'test_data/' + model_name + '.config')
......@@ -74,7 +74,8 @@ def _get_configs_for_model_sequence_example(model_name):
override_dict = {
'train_input_path': data_path,
'eval_input_path': data_path,
'label_map_path': label_map_path
'label_map_path': label_map_path,
'frame_index': frame_index
}
return config_util.merge_external_params_with_configs(
configs, kwargs_dict=override_dict)
......@@ -312,6 +313,46 @@ class InputFnTest(test_case.TestCase, parameterized.TestCase):
tf.float32,
labels[fields.InputDataFields.groundtruth_weights].dtype)
def test_context_rcnn_resnet50_train_input_with_sequence_example_frame_index(
self, train_batch_size=8):
"""Tests the training input function for FasterRcnnResnet50."""
configs = _get_configs_for_model_sequence_example(
'context_rcnn_camera_trap', frame_index=2)
model_config = configs['model']
train_config = configs['train_config']
train_config.batch_size = train_batch_size
train_input_fn = inputs.create_train_input_fn(
train_config, configs['train_input_config'], model_config)
features, labels = _make_initializable_iterator(train_input_fn()).get_next()
self.assertAllEqual([train_batch_size, 640, 640, 3],
features[fields.InputDataFields.image].shape.as_list())
self.assertEqual(tf.float32, features[fields.InputDataFields.image].dtype)
self.assertAllEqual([train_batch_size],
features[inputs.HASH_KEY].shape.as_list())
self.assertEqual(tf.int32, features[inputs.HASH_KEY].dtype)
self.assertAllEqual(
[train_batch_size, 100, 4],
labels[fields.InputDataFields.groundtruth_boxes].shape.as_list())
self.assertEqual(tf.float32,
labels[fields.InputDataFields.groundtruth_boxes].dtype)
self.assertAllEqual(
[train_batch_size, 100, model_config.faster_rcnn.num_classes],
labels[fields.InputDataFields.groundtruth_classes].shape.as_list())
self.assertEqual(tf.float32,
labels[fields.InputDataFields.groundtruth_classes].dtype)
self.assertAllEqual(
[train_batch_size, 100],
labels[fields.InputDataFields.groundtruth_weights].shape.as_list())
self.assertEqual(tf.float32,
labels[fields.InputDataFields.groundtruth_weights].dtype)
self.assertAllEqual(
[train_batch_size, 100, model_config.faster_rcnn.num_classes],
labels[fields.InputDataFields.groundtruth_confidences].shape.as_list())
self.assertEqual(
tf.float32,
labels[fields.InputDataFields.groundtruth_confidences].dtype)
def test_ssd_inceptionV2_train_input(self):
"""Tests the training input function for SSDInceptionV2."""
configs = _get_configs_for_model('ssd_inception_v2_pets')
......
......@@ -332,7 +332,7 @@ class ContextRCNNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
"""
box_features = self._crop_and_resize_fn(
features_to_crop, proposal_boxes_normalized,
[features_to_crop], proposal_boxes_normalized, None,
[self._initial_crop_size, self._initial_crop_size])
attention_features = self._context_feature_extract_fn(
......
......@@ -41,7 +41,7 @@ from object_detection.meta_architectures import faster_rcnn_meta_arch
from object_detection.protos import box_predictor_pb2
from object_detection.protos import hyperparams_pb2
from object_detection.protos import post_processing_pb2
from object_detection.utils import ops
from object_detection.utils import spatial_transform_ops as spatial_ops
from object_detection.utils import test_case
from object_detection.utils import test_utils
from object_detection.utils import tf_version
......@@ -362,8 +362,9 @@ class ContextRCNNMetaArchTest(test_case.TestCase, parameterized.TestCase):
max_negatives_per_positive=None)
crop_and_resize_fn = (
ops.matmul_crop_and_resize
if use_matmul_crop_and_resize else ops.native_crop_and_resize)
spatial_ops.multilevel_matmul_crop_and_resize
if use_matmul_crop_and_resize
else spatial_ops.multilevel_native_crop_and_resize)
common_kwargs = {
'is_training':
is_training,
......
......@@ -1948,9 +1948,16 @@ class FasterRCNNMetaArch(model.DetectionModel):
Returns:
A float32 tensor with shape [K, new_height, new_width, depth].
"""
features_to_crop = [features_to_crop]
num_levels = len(features_to_crop)
box_levels = None
if num_levels != 1:
# If there are multiple levels to select, get the box levels
box_levels = ops.fpn_feature_levels(num_levels, num_levels - 1,
1.0/224, proposal_boxes_normalized)
cropped_regions = self._flatten_first_two_dimensions(
self._crop_and_resize_fn(
features_to_crop, proposal_boxes_normalized,
features_to_crop, proposal_boxes_normalized, box_levels,
[self._initial_crop_size, self._initial_crop_size]))
return self._maxpool_layer(cropped_regions)
......@@ -2517,8 +2524,8 @@ class FasterRCNNMetaArch(model.DetectionModel):
image_shape[1], image_shape[2], check_range=False).get()
flat_cropped_gt_mask = self._crop_and_resize_fn(
tf.expand_dims(flat_gt_masks, -1),
tf.expand_dims(flat_normalized_proposals, axis=1),
[tf.expand_dims(flat_gt_masks, -1)],
tf.expand_dims(flat_normalized_proposals, axis=1), None,
[mask_height, mask_width])
# Without stopping gradients into cropped groundtruth masks the
# performance with 100-padded groundtruth masks when batch size > 1 is
......
......@@ -34,7 +34,7 @@ from object_detection.meta_architectures import faster_rcnn_meta_arch
from object_detection.protos import box_predictor_pb2
from object_detection.protos import hyperparams_pb2
from object_detection.protos import post_processing_pb2
from object_detection.utils import ops
from object_detection.utils import spatial_transform_ops as spatial_ops
from object_detection.utils import test_case
from object_detection.utils import test_utils
from object_detection.utils import tf_version
......@@ -377,8 +377,9 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
max_negatives_per_positive=None)
crop_and_resize_fn = (
ops.matmul_crop_and_resize
if use_matmul_crop_and_resize else ops.native_crop_and_resize)
spatial_ops.multilevel_matmul_crop_and_resize
if use_matmul_crop_and_resize
else spatial_ops.multilevel_native_crop_and_resize)
common_kwargs = {
'is_training':
is_training,
......
......@@ -414,7 +414,7 @@ def train_loop(
train_steps=None,
use_tpu=False,
save_final_config=False,
checkpoint_every_n=1000,
checkpoint_every_n=5000,
checkpoint_max_to_keep=7,
**kwargs):
"""Trains a model using eager + functions.
......@@ -855,6 +855,7 @@ def eval_continuously(
checkpoint_dir=None,
wait_interval=180,
timeout=3600,
eval_index=None,
**kwargs):
"""Run continuous evaluation of a detection model eagerly.
......@@ -884,6 +885,8 @@ def eval_continuously(
new checkpoint.
timeout: The maximum number of seconds to wait for a checkpoint. Execution
will terminate if no new checkpoints are found after these many seconds.
eval_index: int, optional If give, only evaluate the dataset at the given
index.
**kwargs: Additional keyword arguments for configuration override.
"""
......@@ -937,6 +940,11 @@ def eval_continuously(
model=detection_model)
eval_inputs.append((eval_input_config.name, next_eval_input))
if eval_index is not None:
eval_inputs = [eval_inputs[eval_index]]
tf.logging.info('eval_index selected - {}'.format(
eval_inputs))
global_step = tf.compat.v2.Variable(
0, trainable=False, dtype=tf.compat.v2.dtypes.int64)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment