Commit afd5579f authored by Kaushik Shivakumar's avatar Kaushik Shivakumar
Browse files

Merge remote-tracking branch 'upstream/master' into context_tf2

parents dcd96e02 567bd18d
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""MobileNet V2[1] feature extractor for CenterNet[2] meta architecture.
[1]: https://arxiv.org/abs/1801.04381
[2]: https://arxiv.org/abs/1904.07850
"""
import tensorflow.compat.v1 as tf
from object_detection.meta_architectures import center_net_meta_arch
from object_detection.models.keras_models import mobilenet_v2 as mobilenetv2
class CenterNetMobileNetV2FeatureExtractor(
center_net_meta_arch.CenterNetFeatureExtractor):
"""The MobileNet V2 feature extractor for CenterNet."""
def __init__(self,
mobilenet_v2_net,
channel_means=(0., 0., 0.),
channel_stds=(1., 1., 1.),
bgr_ordering=False):
"""Intializes the feature extractor.
Args:
mobilenet_v2_net: The underlying mobilenet_v2 network to use.
channel_means: A tuple of floats, denoting the mean of each channel
which will be subtracted from it.
channel_stds: A tuple of floats, denoting the standard deviation of each
channel. Each channel will be divided by its standard deviation value.
bgr_ordering: bool, if set will change the channel ordering to be in the
[blue, red, green] order.
"""
super(CenterNetMobileNetV2FeatureExtractor, self).__init__(
channel_means=channel_means,
channel_stds=channel_stds,
bgr_ordering=bgr_ordering)
self._network = mobilenet_v2_net
output = self._network(self._network.input)
# TODO(nkhadke): Try out MobileNet+FPN next (skip connections are cheap and
# should help with performance).
# MobileNet by itself transforms a 224x224x3 volume into a 7x7x1280, which
# leads to a stride of 32. We perform upsampling to get it to a target
# stride of 4.
for num_filters in [256, 128, 64]:
# 1. We use a simple convolution instead of a deformable convolution
conv = tf.keras.layers.Conv2D(
filters=num_filters, kernel_size=1, strides=1, padding='same')
output = conv(output)
output = tf.keras.layers.BatchNormalization()(output)
output = tf.keras.layers.ReLU()(output)
# 2. We use the default initialization for the convolution layers
# instead of initializing it to do bilinear upsampling.
conv_transpose = tf.keras.layers.Conv2DTranspose(
filters=num_filters, kernel_size=3, strides=2, padding='same')
output = conv_transpose(output)
output = tf.keras.layers.BatchNormalization()(output)
output = tf.keras.layers.ReLU()(output)
self._network = tf.keras.models.Model(
inputs=self._network.input, outputs=output)
def preprocess(self, resized_inputs):
resized_inputs = super(CenterNetMobileNetV2FeatureExtractor,
self).preprocess(resized_inputs)
return tf.keras.applications.mobilenet_v2.preprocess_input(resized_inputs)
def load_feature_extractor_weights(self, path):
self._network.load_weights(path)
def get_base_model(self):
return self._network
def call(self, inputs):
return [self._network(inputs)]
@property
def out_stride(self):
"""The stride in the output image of the network."""
return 4
@property
def num_feature_outputs(self):
"""The number of feature outputs returned by the feature extractor."""
return 1
def get_model(self):
return self._network
def mobilenet_v2(channel_means, channel_stds, bgr_ordering):
"""The MobileNetV2 backbone for CenterNet."""
# We set 'is_training' to True for now.
network = mobilenetv2.mobilenet_v2(True, include_top=False)
return CenterNetMobileNetV2FeatureExtractor(
network,
channel_means=channel_means,
channel_stds=channel_stds,
bgr_ordering=bgr_ordering)
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Testing mobilenet_v2 feature extractor for CenterNet."""
import unittest
import numpy as np
import tensorflow.compat.v1 as tf
from object_detection.models import center_net_mobilenet_v2_feature_extractor
from object_detection.models.keras_models import mobilenet_v2
from object_detection.utils import test_case
from object_detection.utils import tf_version
@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
class CenterNetMobileNetV2FeatureExtractorTest(test_case.TestCase):
def test_center_net_mobilenet_v2_feature_extractor(self):
net = mobilenet_v2.mobilenet_v2(True, include_top=False)
model = center_net_mobilenet_v2_feature_extractor.CenterNetMobileNetV2FeatureExtractor(
net)
def graph_fn():
img = np.zeros((8, 224, 224, 3), dtype=np.float32)
processed_img = model.preprocess(img)
return model(processed_img)
outputs = self.execute(graph_fn, [])
self.assertEqual(outputs.shape, (8, 56, 56, 64))
if __name__ == '__main__':
tf.test.main()
......@@ -21,9 +21,14 @@
import tensorflow.compat.v1 as tf
from object_detection.meta_architectures.center_net_meta_arch import CenterNetFeatureExtractor
from object_detection.models.keras_models import resnet_v1
_RESNET_MODEL_OUTPUT_LAYERS = {
'resnet_v1_18': ['conv2_block2_out', 'conv3_block2_out',
'conv4_block2_out', 'conv5_block2_out'],
'resnet_v1_34': ['conv2_block3_out', 'conv3_block4_out',
'conv4_block6_out', 'conv5_block3_out'],
'resnet_v1_50': ['conv2_block3_out', 'conv3_block4_out',
'conv4_block6_out', 'conv5_block3_out'],
'resnet_v1_101': ['conv2_block3_out', 'conv3_block4_out',
......@@ -69,6 +74,10 @@ class CenterNetResnetV1FpnFeatureExtractor(CenterNetFeatureExtractor):
self._base_model = tf.keras.applications.ResNet50(weights=None)
elif resnet_type == 'resnet_v1_101':
self._base_model = tf.keras.applications.ResNet101(weights=None)
elif resnet_type == 'resnet_v1_18':
self._base_model = resnet_v1.resnet_v1_18(weights=None)
elif resnet_type == 'resnet_v1_34':
self._base_model = resnet_v1.resnet_v1_34(weights=None)
else:
raise ValueError('Unknown Resnet Model {}'.format(resnet_type))
output_layers = _RESNET_MODEL_OUTPUT_LAYERS[resnet_type]
......@@ -174,3 +183,24 @@ def resnet_v1_50_fpn(channel_means, channel_stds, bgr_ordering):
channel_means=channel_means,
channel_stds=channel_stds,
bgr_ordering=bgr_ordering)
def resnet_v1_34_fpn(channel_means, channel_stds, bgr_ordering):
"""The ResNet v1 34 FPN feature extractor."""
return CenterNetResnetV1FpnFeatureExtractor(
resnet_type='resnet_v1_34',
channel_means=channel_means,
channel_stds=channel_stds,
bgr_ordering=bgr_ordering
)
def resnet_v1_18_fpn(channel_means, channel_stds, bgr_ordering):
"""The ResNet v1 18 FPN feature extractor."""
return CenterNetResnetV1FpnFeatureExtractor(
resnet_type='resnet_v1_18',
channel_means=channel_means,
channel_stds=channel_stds,
bgr_ordering=bgr_ordering)
......@@ -31,6 +31,8 @@ class CenterNetResnetV1FpnFeatureExtractorTest(test_case.TestCase,
@parameterized.parameters(
{'resnet_type': 'resnet_v1_50'},
{'resnet_type': 'resnet_v1_101'},
{'resnet_type': 'resnet_v1_18'},
{'resnet_type': 'resnet_v1_34'},
)
def test_correct_output_size(self, resnet_type):
"""Verify that shape of features returned by the backbone is correct."""
......
......@@ -21,6 +21,7 @@ from __future__ import print_function
import tensorflow.compat.v1 as tf
from tensorflow.python.keras.applications import resnet
from object_detection.core import freezable_batch_norm
from object_detection.models.keras_models import model_utils
......@@ -95,11 +96,11 @@ class _LayersOverride(object):
self.regularizer = tf.keras.regularizers.l2(weight_decay)
self.initializer = tf.variance_scaling_initializer()
def _FixedPaddingLayer(self, kernel_size, rate=1):
def _FixedPaddingLayer(self, kernel_size, rate=1): # pylint: disable=invalid-name
return tf.keras.layers.Lambda(
lambda x: _fixed_padding(x, kernel_size, rate))
def Conv2D(self, filters, kernel_size, **kwargs):
def Conv2D(self, filters, kernel_size, **kwargs): # pylint: disable=invalid-name
"""Builds a Conv2D layer according to the current Object Detection config.
Overrides the Keras Resnet application's convolutions with ones that
......@@ -141,7 +142,7 @@ class _LayersOverride(object):
else:
return tf.keras.layers.Conv2D(filters, kernel_size, **kwargs)
def Activation(self, *args, **kwargs): # pylint: disable=unused-argument
def Activation(self, *args, **kwargs): # pylint: disable=unused-argument,invalid-name
"""Builds an activation layer.
Overrides the Keras application Activation layer specified by the
......@@ -163,7 +164,7 @@ class _LayersOverride(object):
else:
return tf.keras.layers.Lambda(tf.nn.relu, name=name)
def BatchNormalization(self, **kwargs):
def BatchNormalization(self, **kwargs): # pylint: disable=invalid-name
"""Builds a normalization layer.
Overrides the Keras application batch norm with the norm specified by the
......@@ -191,7 +192,7 @@ class _LayersOverride(object):
momentum=self._default_batchnorm_momentum,
**kwargs)
def Input(self, shape):
def Input(self, shape): # pylint: disable=invalid-name
"""Builds an Input layer.
Overrides the Keras application Input layer with one that uses a
......@@ -219,7 +220,7 @@ class _LayersOverride(object):
input=input_tensor, shape=[None] + shape)
return model_utils.input_layer(shape, placeholder_with_default)
def MaxPooling2D(self, pool_size, **kwargs):
def MaxPooling2D(self, pool_size, **kwargs): # pylint: disable=invalid-name
"""Builds a MaxPooling2D layer with default padding as 'SAME'.
This is specified by the default resnet arg_scope in slim.
......@@ -237,7 +238,7 @@ class _LayersOverride(object):
# Add alias as Keras also has it.
MaxPool2D = MaxPooling2D # pylint: disable=invalid-name
def ZeroPadding2D(self, padding, **kwargs): # pylint: disable=unused-argument
def ZeroPadding2D(self, padding, **kwargs): # pylint: disable=unused-argument,invalid-name
"""Replaces explicit padding in the Keras application with a no-op.
Args:
......@@ -395,3 +396,146 @@ def resnet_v1_152(batchnorm_training,
return tf.keras.applications.resnet.ResNet152(
layers=layers_override, **kwargs)
# pylint: enable=invalid-name
# The following codes are based on the existing keras ResNet model pattern:
# google3/third_party/tensorflow/python/keras/applications/resnet.py
def block_basic(x,
filters,
kernel_size=3,
stride=1,
conv_shortcut=False,
name=None):
"""A residual block for ResNet18/34.
Arguments:
x: input tensor.
filters: integer, filters of the bottleneck layer.
kernel_size: default 3, kernel size of the bottleneck layer.
stride: default 1, stride of the first layer.
conv_shortcut: default False, use convolution shortcut if True, otherwise
identity shortcut.
name: string, block label.
Returns:
Output tensor for the residual block.
"""
layers = tf.keras.layers
bn_axis = 3 if tf.keras.backend.image_data_format() == 'channels_last' else 1
preact = layers.BatchNormalization(
axis=bn_axis, epsilon=1.001e-5, name=name + '_preact_bn')(
x)
preact = layers.Activation('relu', name=name + '_preact_relu')(preact)
if conv_shortcut:
shortcut = layers.Conv2D(
filters, 1, strides=1, name=name + '_0_conv')(
preact)
else:
shortcut = layers.MaxPooling2D(1, strides=stride)(x) if stride > 1 else x
x = layers.ZeroPadding2D(
padding=((1, 1), (1, 1)), name=name + '_1_pad')(
preact)
x = layers.Conv2D(
filters, kernel_size, strides=1, use_bias=False, name=name + '_1_conv')(
x)
x = layers.BatchNormalization(
axis=bn_axis, epsilon=1.001e-5, name=name + '_1_bn')(
x)
x = layers.Activation('relu', name=name + '_1_relu')(x)
x = layers.ZeroPadding2D(padding=((1, 1), (1, 1)), name=name + '_2_pad')(x)
x = layers.Conv2D(
filters,
kernel_size,
strides=stride,
use_bias=False,
name=name + '_2_conv')(
x)
x = layers.BatchNormalization(
axis=bn_axis, epsilon=1.001e-5, name=name + '_2_bn')(
x)
x = layers.Activation('relu', name=name + '_2_relu')(x)
x = layers.Add(name=name + '_out')([shortcut, x])
return x
def stack_basic(x, filters, blocks, stride1=2, name=None):
"""A set of stacked residual blocks for ResNet18/34.
Arguments:
x: input tensor.
filters: integer, filters of the bottleneck layer in a block.
blocks: integer, blocks in the stacked blocks.
stride1: default 2, stride of the first layer in the first block.
name: string, stack label.
Returns:
Output tensor for the stacked blocks.
"""
x = block_basic(x, filters, conv_shortcut=True, name=name + '_block1')
for i in range(2, blocks):
x = block_basic(x, filters, name=name + '_block' + str(i))
x = block_basic(
x, filters, stride=stride1, name=name + '_block' + str(blocks))
return x
def resnet_v1_18(include_top=True,
weights='imagenet',
input_tensor=None,
input_shape=None,
pooling=None,
classes=1000,
classifier_activation='softmax'):
"""Instantiates the ResNet18 architecture."""
def stack_fn(x):
x = stack_basic(x, 64, 2, stride1=1, name='conv2')
x = stack_basic(x, 128, 2, name='conv3')
x = stack_basic(x, 256, 2, name='conv4')
return stack_basic(x, 512, 2, name='conv5')
return resnet.ResNet(
stack_fn,
True,
True,
'resnet18',
include_top,
weights,
input_tensor,
input_shape,
pooling,
classes,
classifier_activation=classifier_activation)
def resnet_v1_34(include_top=True,
weights='imagenet',
input_tensor=None,
input_shape=None,
pooling=None,
classes=1000,
classifier_activation='softmax'):
"""Instantiates the ResNet34 architecture."""
def stack_fn(x):
x = stack_basic(x, 64, 3, stride1=1, name='conv2')
x = stack_basic(x, 128, 4, name='conv3')
x = stack_basic(x, 256, 6, name='conv4')
return stack_basic(x, 512, 3, name='conv5')
return resnet.ResNet(
stack_fn,
True,
True,
'resnet34',
include_top,
weights,
input_tensor,
input_shape,
pooling,
classes,
classifier_activation=classifier_activation)
......@@ -20,12 +20,13 @@ object detection. To verify the consistency of the two models, we compare:
2. Number of global variables.
"""
import unittest
from absl.testing import parameterized
import numpy as np
from six.moves import zip
import tensorflow.compat.v1 as tf
from google.protobuf import text_format
from object_detection.builders import hyperparams_builder
from object_detection.models.keras_models import resnet_v1
from object_detection.protos import hyperparams_pb2
......@@ -180,5 +181,46 @@ class ResnetV1Test(test_case.TestCase):
self.assertEqual(len(variables), var_num)
class ResnetShapeTest(test_case.TestCase, parameterized.TestCase):
@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
@parameterized.parameters(
{
'resnet_type':
'resnet_v1_34',
'output_layer_names': [
'conv2_block3_out', 'conv3_block4_out', 'conv4_block6_out',
'conv5_block3_out'
]
}, {
'resnet_type':
'resnet_v1_18',
'output_layer_names': [
'conv2_block2_out', 'conv3_block2_out', 'conv4_block2_out',
'conv5_block2_out'
]
})
def test_output_shapes(self, resnet_type, output_layer_names):
if resnet_type == 'resnet_v1_34':
model = resnet_v1.resnet_v1_34(weights=None)
else:
model = resnet_v1.resnet_v1_18(weights=None)
outputs = [
model.get_layer(output_layer_name).output
for output_layer_name in output_layer_names
]
resnet_model = tf.keras.models.Model(inputs=model.input, outputs=outputs)
outputs = resnet_model(np.zeros((2, 64, 64, 3), dtype=np.float32))
# Check the shape of 'conv2_block3_out':
self.assertEqual(outputs[0].shape, [2, 16, 16, 64])
# Check the shape of 'conv3_block4_out':
self.assertEqual(outputs[1].shape, [2, 8, 8, 128])
# Check the shape of 'conv4_block6_out':
self.assertEqual(outputs[2].shape, [2, 4, 4, 256])
# Check the shape of 'conv5_block3_out':
self.assertEqual(outputs[3].shape, [2, 2, 2, 512])
if __name__ == '__main__':
tf.test.main()
......@@ -31,7 +31,7 @@ enum InputType {
TF_SEQUENCE_EXAMPLE = 2; // TfSequenceExample Input
}
// Next id: 32
// Next id: 33
message InputReader {
// Name of input reader. Typically used to describe the dataset that is read
// by this input reader.
......@@ -133,6 +133,10 @@ message InputReader {
// Whether input data type is tf.Examples or tf.SequenceExamples
optional InputType input_type = 30 [default = TF_EXAMPLE];
// Which frame to choose from the input if Sequence Example. -1 indicates
// random choice.
optional int32 frame_index = 32 [default = -1];
oneof input_reader {
TFRecordInputReader tf_record_input_reader = 8;
ExternalInputReader external_input_reader = 9;
......
......@@ -411,6 +411,56 @@ def multilevel_roi_align(features, boxes, box_levels, output_size,
return features_per_box
def multilevel_native_crop_and_resize(images, boxes, box_levels,
crop_size, scope=None):
"""Multilevel native crop and resize.
Same as `multilevel_matmul_crop_and_resize` but uses tf.image.crop_and_resize.
Args:
images: A list of 4-D tensor of shape
[batch, image_height, image_width, depth] representing features of
different size.
boxes: A `Tensor` of type `float32`.
A 3-D tensor of shape `[batch, num_boxes, 4]`. The boxes are specified in
normalized coordinates and are of the form `[y1, x1, y2, x2]`. A
normalized coordinate value of `y` is mapped to the image coordinate at
`y * (image_height - 1)`, so as the `[0, 1]` interval of normalized image
height is mapped to `[0, image_height - 1] in image height coordinates.
We do allow y1 > y2, in which case the sampled crop is an up-down flipped
version of the original image. The width dimension is treated similarly.
Normalized coordinates outside the `[0, 1]` range are allowed, in which
case we use `extrapolation_value` to extrapolate the input image values.
box_levels: A 2-D tensor of shape [batch, num_boxes] representing the level
of the box.
crop_size: A list of two integers `[crop_height, crop_width]`. All
cropped image patches are resized to this size. The aspect ratio of the
image content is not preserved. Both `crop_height` and `crop_width` need
to be positive.
scope: A name for the operation (optional).
Returns:
A 5-D float tensor of shape `[batch, num_boxes, crop_height, crop_width,
depth]`
"""
if box_levels is None:
return native_crop_and_resize(images[0], boxes, crop_size, scope)
with tf.name_scope('MultiLevelNativeCropAndResize'):
cropped_feature_list = []
for level, image in enumerate(images):
# For each level, crop the feature according to all boxes
# set the cropped feature not at this level to 0 tensor.
# Consider more efficient way of computing cropped features.
cropped = native_crop_and_resize(image, boxes, crop_size, scope)
cond = tf.tile(
tf.equal(box_levels, level)[:, :, tf.newaxis],
[1, 1] + [tf.math.reduce_prod(cropped.shape.as_list()[2:])])
cond = tf.reshape(cond, cropped.shape)
cropped_final = tf.where(cond, cropped, tf.zeros_like(cropped))
cropped_feature_list.append(cropped_final)
return tf.math.reduce_sum(cropped_feature_list, axis=0)
def native_crop_and_resize(image, boxes, crop_size, scope=None):
"""Same as `matmul_crop_and_resize` but uses tf.image.crop_and_resize."""
def get_box_inds(proposals):
......@@ -431,6 +481,50 @@ def native_crop_and_resize(image, boxes, crop_size, scope=None):
return tf.reshape(cropped_regions, final_shape)
def multilevel_matmul_crop_and_resize(images, boxes, box_levels, crop_size,
extrapolation_value=0.0, scope=None):
"""Multilevel matmul crop and resize.
Same as `matmul_crop_and_resize` but crop images according to box levels.
Args:
images: A list of 4-D tensor of shape
[batch, image_height, image_width, depth] representing features of
different size.
boxes: A `Tensor` of type `float32` or 'bfloat16'.
A 3-D tensor of shape `[batch, num_boxes, 4]`. The boxes are specified in
normalized coordinates and are of the form `[y1, x1, y2, x2]`. A
normalized coordinate value of `y` is mapped to the image coordinate at
`y * (image_height - 1)`, so as the `[0, 1]` interval of normalized image
height is mapped to `[0, image_height - 1] in image height coordinates.
We do allow y1 > y2, in which case the sampled crop is an up-down flipped
version of the original image. The width dimension is treated similarly.
Normalized coordinates outside the `[0, 1]` range are allowed, in which
case we use `extrapolation_value` to extrapolate the input image values.
box_levels: A 2-D tensor of shape [batch, num_boxes] representing the level
of the box.
crop_size: A list of two integers `[crop_height, crop_width]`. All
cropped image patches are resized to this size. The aspect ratio of the
image content is not preserved. Both `crop_height` and `crop_width` need
to be positive.
extrapolation_value: A float value to use for extrapolation.
scope: A name for the operation (optional).
Returns:
A 5-D float tensor of shape `[batch, num_boxes, crop_height, crop_width,
depth]`
"""
with tf.name_scope(scope, 'MultiLevelMatMulCropAndResize'):
if box_levels is None:
box_levels = tf.zeros(tf.shape(boxes)[:2], dtype=tf.int32)
return multilevel_roi_align(images,
boxes,
box_levels,
crop_size,
align_corners=True,
extrapolation_value=extrapolation_value)
def matmul_crop_and_resize(image, boxes, crop_size, extrapolation_value=0.0,
scope=None):
"""Matrix multiplication based implementation of the crop and resize op.
......
......@@ -512,6 +512,38 @@ class MatMulCropAndResizeTest(test_case.TestCase):
crop_output = self.execute(graph_fn, [image, boxes])
self.assertAllClose(crop_output, expected_output)
def testMultilevelMatMulCropAndResize(self):
def graph_fn(image1, image2, boxes, box_levels):
return spatial_ops.multilevel_matmul_crop_and_resize([image1, image2],
boxes,
box_levels,
crop_size=[2, 2])
image = [np.array([[[[1, 0], [2, 0], [3, 0]],
[[4, 0], [5, 0], [6, 0]],
[[7, 0], [8, 0], [9, 0]]],
[[[1, 0], [2, 0], [3, 0]],
[[4, 0], [5, 0], [6, 0]],
[[7, 0], [8, 0], [9, 0]]]], dtype=np.float32),
np.array([[[[1, 0], [2, 1], [3, 2]],
[[4, 3], [5, 4], [6, 5]],
[[7, 6], [8, 7], [9, 8]]],
[[[1, 0], [2, 1], [3, 2]],
[[4, 3], [5, 4], [6, 5]],
[[7, 6], [8, 7], [9, 8]]]], dtype=np.float32)]
boxes = np.array([[[1, 1, 0, 0],
[.5, .5, 0, 0]],
[[0, 0, 1, 1],
[0, 0, .5, .5]]], dtype=np.float32)
box_levels = np.array([[0, 1], [1, 1]], dtype=np.int32)
expected_output = [[[[[9, 0], [7, 0]], [[3, 0], [1, 0]]],
[[[5, 4], [4, 3]], [[2, 1], [1, 0]]]],
[[[[1, 0], [3, 2]], [[7, 6], [9, 8]]],
[[[1, 0], [2, 1]], [[4, 3], [5, 4]]]]]
crop_output = self.execute(graph_fn, image + [boxes, box_levels])
self.assertAllClose(crop_output, expected_output)
class NativeCropAndResizeTest(test_case.TestCase):
......@@ -537,6 +569,35 @@ class NativeCropAndResizeTest(test_case.TestCase):
crop_output = self.execute_cpu(graph_fn, [image, boxes])
self.assertAllClose(crop_output, expected_output)
def testMultilevelBatchCropAndResize3x3To2x2_2Channels(self):
def graph_fn(image1, image2, boxes, box_levels):
return spatial_ops.multilevel_native_crop_and_resize([image1, image2],
boxes,
box_levels,
crop_size=[2, 2])
image = [np.array([[[[1, 0], [2, 1], [3, 2]],
[[4, 3], [5, 4], [6, 5]],
[[7, 6], [8, 7], [9, 8]]],
[[[1, 0], [2, 1], [3, 2]],
[[4, 3], [5, 4], [6, 5]],
[[7, 6], [8, 7], [9, 8]]]], dtype=np.float32),
np.array([[[[1, 0], [2, 1]],
[[4, 3], [5, 4]]],
[[[1, 0], [2, 1]],
[[4, 3], [5, 4]]]], dtype=np.float32)]
boxes = np.array([[[0, 0, 1, 1],
[0, 0, .5, .5]],
[[1, 1, 0, 0],
[.5, .5, 0, 0]]], dtype=np.float32)
box_levels = np.array([[0, 1], [0, 0]], dtype=np.float32)
expected_output = [[[[[1, 0], [3, 2]], [[7, 6], [9, 8]]],
[[[1, 0], [1.5, 0.5]], [[2.5, 1.5], [3, 2]]]],
[[[[9, 8], [7, 6]], [[3, 2], [1, 0]]],
[[[5, 4], [4, 3]], [[2, 1], [1, 0]]]]]
crop_output = self.execute_cpu(graph_fn, image + [boxes, box_levels])
self.assertAllClose(crop_output, expected_output)
if __name__ == '__main__':
tf.test.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment