Unverified Commit 3f78f4cf authored by derekjchow's avatar derekjchow Committed by GitHub
Browse files

Merge pull request #3494 from pkulzc/master

Update object detection with internal changes and remove unused BUILD files.
parents 73748d01 0319908c
......@@ -38,11 +38,17 @@ message WeightedL2LocalizationLoss {
optional bool anchorwise_output = 1 [default=false];
}
// SmoothL1 (Huber) location loss: .5 * x ^ 2 if |x| < 1 else |x| - .5
// SmoothL1 (Huber) location loss.
// The smooth L1_loss is defined elementwise as .5 x^2 if |x| <= delta and
// 0.5 x^2 + delta * (|x|-delta) otherwise, where x is the difference between
// predictions and target.
message WeightedSmoothL1LocalizationLoss {
// DEPRECATED, do not use.
// Output loss per anchor.
optional bool anchorwise_output = 1 [default=false];
// Delta value for huber loss.
optional float delta = 2 [default=1.0];
}
// Intersection over union location loss: 1 - IOU
......
......@@ -20,4 +20,7 @@ message MultiscaleAnchorGenerator {
// Number of intermediate scale each scale octave
optional int32 scales_per_octave = 5 [default = 2];
// Whether to produce anchors in normalized coordinates.
optional bool normalize_coordinates = 6 [default = true];
}
......@@ -388,14 +388,6 @@ message SSDRandomCropPadFixedAspectRatioOperation {
// Probability a crop operation is skipped.
optional float random_coef = 7;
// Min ratio of padded image height and width to the input image's height and
// width. Two entries per operation.
repeated float min_padded_size_ratio = 8;
// Max ratio of padded image height and width to the input image's height and
// width. Two entries per operation.
repeated float max_padded_size_ratio = 9;
}
// Randomly crops and pads an image to a fixed aspect ratio according to:
......@@ -408,4 +400,12 @@ message SSDRandomCropPadFixedAspectRatio {
// Aspect ratio to pad to. This value is used for all crop and pad operations.
optional float aspect_ratio = 2 [default=1.0];
// Min ratio of padded image height and width to the input image's height and
// width. Two entries per operation.
repeated float min_padded_size_ratio = 3;
// Max ratio of padded image height and width to the input image's height and
// width. Two entries per operation.
repeated float max_padded_size_ratio = 4;
}
......@@ -36,6 +36,10 @@ message Ssd {
// zeros vector or a one-hot vector (where background is the 0th class).
optional bool encode_background_as_zeros = 12 [default=false];
// classification weight to be associated to negative
// anchors (default: 1.0). The weight must be in [0., 1.].
optional float negative_class_weight = 13 [default = 1.0];
// Box predictor to attach to the features.
optional BoxPredictor box_predictor = 7;
......@@ -49,6 +53,10 @@ message Ssd {
// the anchors.
optional bool normalize_loss_by_num_matches = 10 [default=true];
// Whether to normalize the localization loss by the code size of the box
// encodings. This is applied along with other normalization factors.
optional bool normalize_loc_loss_by_codesize = 14 [default=false];
// Loss configuration for training.
optional Loss loss = 11;
}
......
package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"])
exports_files([
"faster_rcnn_resnet50_pets.config",
"ssd_inception_v2_pets.config",
"ssd_mobilenet_v1_focal_loss_pets.config",
])
package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"])
exports_files([
"pets_examples.record",
])
package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"])
exports_files([
"image1.jpg",
"image2.jpg",
])
......@@ -235,7 +235,7 @@ def train(create_tensor_dict_fn, create_model_fn, train_config, master, task,
train_config.prefetch_queue_capacity, data_augmentation_options)
# Gather initial summaries.
# TODO: See if summaries can be added/extracted from global tf
# TODO(rathodv): See if summaries can be added/extracted from global tf
# collections so that they don't have to be passed around.
summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))
global_summaries = set([])
......@@ -258,17 +258,19 @@ def train(create_tensor_dict_fn, create_model_fn, train_config, master, task,
sync_optimizer = None
if train_config.sync_replicas:
training_optimizer = tf.SyncReplicasOptimizer(
training_optimizer = tf.train.SyncReplicasOptimizer(
training_optimizer,
replicas_to_aggregate=train_config.replicas_to_aggregate,
total_num_replicas=train_config.worker_replicas)
total_num_replicas=worker_replicas)
sync_optimizer = training_optimizer
# Create ops required to initialize the model from a given checkpoint.
init_fn = None
if train_config.fine_tune_checkpoint:
var_map = detection_model.restore_map(
from_detection_checkpoint=train_config.from_detection_checkpoint)
from_detection_checkpoint=train_config.from_detection_checkpoint,
load_all_detection_checkpoint_vars=(
train_config.load_all_detection_checkpoint_vars))
available_var_map = (variables_helper.
get_variables_available_in_checkpoint(
var_map, train_config.fine_tune_checkpoint))
......
# Tensorflow Object Detection API: Utility functions.
package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"])
# Apache 2.0
py_library(
name = "test_case",
srcs = ["test_case.py"],
deps = ["//tensorflow"],
)
py_library(
name = "category_util",
srcs = ["category_util.py"],
deps = ["//tensorflow"],
)
py_library(
name = "config_util",
srcs = ["config_util.py"],
deps = [
"//pyglib/logging",
"//tensorflow",
"//tensorflow/models/research/object_detection/protos:eval_py_pb2",
"//tensorflow/models/research/object_detection/protos:image_resizer_py_pb2",
"//tensorflow/models/research/object_detection/protos:input_reader_py_pb2",
"//tensorflow/models/research/object_detection/protos:model_py_pb2",
"//tensorflow/models/research/object_detection/protos:pipeline_py_pb2",
"//tensorflow/models/research/object_detection/protos:train_py_pb2",
],
)
py_library(
name = "dataset_util",
srcs = ["dataset_util.py"],
deps = [
"//tensorflow",
],
)
py_library(
name = "json_utils",
srcs = ["json_utils.py"],
deps = [],
)
py_test(
name = "json_utils_test",
srcs = ["json_utils_test.py"],
deps = [
":json_utils",
"//tensorflow",
],
)
py_library(
name = "label_map_util",
srcs = ["label_map_util.py"],
deps = [
"//google/protobuf",
"//tensorflow",
"//tensorflow/models/research/object_detection/protos:string_int_label_map_py_pb2",
],
)
py_library(
name = "learning_schedules",
srcs = ["learning_schedules.py"],
deps = [
"//tensorflow",
],
)
py_library(
name = "metrics",
srcs = ["metrics.py"],
deps = ["//numpy"],
)
py_library(
name = "np_box_list",
srcs = ["np_box_list.py"],
deps = ["//numpy"],
)
py_library(
name = "np_box_mask_list",
srcs = ["np_box_mask_list.py"],
deps = [
":np_box_list",
"//numpy",
],
)
py_library(
name = "np_box_list_ops",
srcs = ["np_box_list_ops.py"],
deps = [
":np_box_list",
":np_box_ops",
"//numpy",
],
)
py_library(
name = "np_box_mask_list_ops",
srcs = ["np_box_mask_list_ops.py"],
deps = [
":np_box_list_ops",
":np_box_mask_list",
":np_mask_ops",
"//numpy",
],
)
py_library(
name = "np_box_ops",
srcs = ["np_box_ops.py"],
deps = ["//tensorflow"],
)
py_library(
name = "np_mask_ops",
srcs = ["np_mask_ops.py"],
deps = ["//numpy"],
)
py_library(
name = "object_detection_evaluation",
srcs = ["object_detection_evaluation.py"],
deps = [
":label_map_util",
":metrics",
":per_image_evaluation",
"//tensorflow",
"//tensorflow/models/research/object_detection/core:standard_fields",
],
)
py_library(
name = "ops",
srcs = ["ops.py"],
deps = [
":shape_utils",
":static_shape",
"//tensorflow",
"//tensorflow/models/research/object_detection/core:box_list",
"//tensorflow/models/research/object_detection/core:box_list_ops",
"//tensorflow/models/research/object_detection/core:standard_fields",
],
)
py_library(
name = "per_image_evaluation",
srcs = ["per_image_evaluation.py"],
deps = [
":np_box_list",
":np_box_list_ops",
":np_box_mask_list",
":np_box_mask_list_ops",
"//tensorflow",
],
)
py_library(
name = "shape_utils",
srcs = ["shape_utils.py"],
deps = [
":static_shape",
"//tensorflow",
],
)
py_library(
name = "static_shape",
srcs = ["static_shape.py"],
deps = [],
)
py_library(
name = "test_utils",
srcs = ["test_utils.py"],
deps = [
"//tensorflow",
"//tensorflow/models/research/object_detection/core:anchor_generator",
"//tensorflow/models/research/object_detection/core:box_coder",
"//tensorflow/models/research/object_detection/core:box_list",
"//tensorflow/models/research/object_detection/core:box_predictor",
"//tensorflow/models/research/object_detection/core:matcher",
"//tensorflow/models/research/object_detection/utils:shape_utils",
],
)
py_library(
name = "variables_helper",
srcs = ["variables_helper.py"],
deps = [
"//tensorflow",
],
)
py_library(
name = "visualization_utils",
srcs = ["visualization_utils.py"],
deps = [
"//PIL:pil",
"//Tkinter", # buildcleaner: keep
"//matplotlib",
"//six",
"//tensorflow",
"//tensorflow/models/research/object_detection/core:standard_fields",
],
)
py_test(
name = "category_util_test",
srcs = ["category_util_test.py"],
deps = [
":category_util",
"//tensorflow",
],
)
py_test(
name = "config_util_test",
srcs = ["config_util_test.py"],
deps = [
":config_util",
"//tensorflow",
"//tensorflow/models/research/object_detection/protos:image_resizer_py_pb2",
"//tensorflow/models/research/object_detection/protos:input_reader_py_pb2",
"//tensorflow/models/research/object_detection/protos:model_py_pb2",
"//tensorflow/models/research/object_detection/protos:pipeline_py_pb2",
"//tensorflow/models/research/object_detection/protos:train_py_pb2",
],
)
py_test(
name = "dataset_util_test",
srcs = ["dataset_util_test.py"],
deps = [
":dataset_util",
"//tensorflow",
"//tensorflow/models/research/object_detection/protos:input_reader_py_pb2",
],
)
py_test(
name = "label_map_util_test",
srcs = ["label_map_util_test.py"],
deps = [
":label_map_util",
"//tensorflow",
],
)
py_test(
name = "learning_schedules_test",
srcs = ["learning_schedules_test.py"],
deps = [
":learning_schedules",
":test_case",
"//tensorflow",
],
)
py_test(
name = "metrics_test",
srcs = ["metrics_test.py"],
deps = [
":metrics",
"//tensorflow",
],
)
py_test(
name = "np_box_list_test",
srcs = ["np_box_list_test.py"],
deps = [
":np_box_list",
"//numpy",
"//tensorflow",
],
)
py_test(
name = "np_box_mask_list_test",
srcs = ["np_box_mask_list_test.py"],
deps = [
":np_box_mask_list",
"//numpy",
"//tensorflow",
],
)
py_test(
name = "np_box_list_ops_test",
srcs = ["np_box_list_ops_test.py"],
deps = [
":np_box_list",
":np_box_list_ops",
"//numpy",
"//tensorflow",
],
)
py_test(
name = "np_box_mask_list_ops_test",
srcs = ["np_box_mask_list_ops_test.py"],
deps = [
":np_box_mask_list",
":np_box_mask_list_ops",
"//numpy",
"//tensorflow",
],
)
py_test(
name = "np_box_ops_test",
srcs = ["np_box_ops_test.py"],
deps = [
":np_box_ops",
"//tensorflow",
],
)
py_test(
name = "np_mask_ops_test",
srcs = ["np_mask_ops_test.py"],
deps = [
":np_mask_ops",
"//tensorflow",
],
)
py_test(
name = "object_detection_evaluation_test",
srcs = ["object_detection_evaluation_test.py"],
deps = [
":object_detection_evaluation",
"//tensorflow",
"//tensorflow/models/research/object_detection/core:standard_fields",
],
)
py_test(
name = "ops_test",
srcs = ["ops_test.py"],
deps = [
":ops",
":test_case",
"//tensorflow",
"//tensorflow/models/research/object_detection/core:standard_fields",
],
)
py_test(
name = "per_image_evaluation_test",
srcs = ["per_image_evaluation_test.py"],
deps = [
":per_image_evaluation",
"//tensorflow",
],
)
py_test(
name = "shape_utils_test",
srcs = ["shape_utils_test.py"],
deps = [
":shape_utils",
"//numpy",
"//tensorflow",
],
)
py_test(
name = "static_shape_test",
srcs = ["static_shape_test.py"],
deps = [
":static_shape",
"//tensorflow",
],
)
py_test(
name = "test_utils_test",
srcs = ["test_utils_test.py"],
deps = [
":test_utils",
"//tensorflow",
],
)
py_test(
name = "variables_helper_test",
srcs = ["variables_helper_test.py"],
deps = [
":variables_helper",
"//tensorflow",
],
)
py_test(
name = "visualization_utils_test",
srcs = ["visualization_utils_test.py"],
data = [
"//tensorflow/models/research/object_detection/test_images:image1.jpg",
],
deps = [
":visualization_utils",
"//pyglib/flags",
"//PIL:pil",
],
)
......@@ -241,6 +241,10 @@ def merge_external_params_with_configs(configs, hparams=None, **kwargs):
if hparams:
kwargs.update(hparams.values())
for key, value in kwargs.items():
# pylint: disable=g-explicit-bool-comparison
if value == "" or value is None:
continue
# pylint: enable=g-explicit-bool-comparison
if key == "learning_rate":
_update_initial_learning_rate(configs, value)
tf.logging.info("Overwriting learning rate: %f", value)
......@@ -270,9 +274,8 @@ def merge_external_params_with_configs(configs, hparams=None, **kwargs):
_update_input_path(configs["eval_input_config"], value)
tf.logging.info("Overwriting eval input path: %s", value)
if key == "label_map_path":
if value:
_update_label_map_path(configs, value)
tf.logging.info("Overwriting label map path: %s", value)
_update_label_map_path(configs, value)
tf.logging.info("Overwriting label map path: %s", value)
if key == "mask_type":
_update_mask_type(configs, value)
tf.logging.info("Overwritten mask type: %s", value)
......
......@@ -397,6 +397,27 @@ class ConfigUtilTest(tf.test.TestCase):
self.assertEqual(new_label_map_path,
configs["eval_input_config"].label_map_path)
def testDontOverwriteEmptyLabelMapPath(self):
"""Tests that label map path will not by overwritten with empty string."""
original_label_map_path = "path/to/original/label_map"
new_label_map_path = ""
pipeline_config_path = os.path.join(self.get_temp_dir(), "pipeline.config")
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
train_input_reader = pipeline_config.train_input_reader
train_input_reader.label_map_path = original_label_map_path
eval_input_reader = pipeline_config.eval_input_reader
eval_input_reader.label_map_path = original_label_map_path
_write_config(pipeline_config, pipeline_config_path)
configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
configs = config_util.merge_external_params_with_configs(
configs, label_map_path=new_label_map_path)
self.assertEqual(original_label_map_path,
configs["train_input_config"].label_map_path)
self.assertEqual(original_label_map_path,
configs["eval_input_config"].label_map_path)
def testNewMaskType(self):
"""Tests that mask type can be overwritten in input readers."""
original_mask_type = input_reader_pb2.NUMERICAL_MASKS
......
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Utilities for dealing with writing json strings.
json_utils wraps json.dump and json.dumps so that they can be used to safely
......
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for google3.image.understanding.object_detection.utils.json_utils."""
import os
......
......@@ -142,6 +142,7 @@ def manual_stepping(global_step, boundaries, rates):
if len(rates) != len(boundaries) + 1:
raise ValueError('Number of provided learning rates must exceed '
'number of boundary points by exactly 1.')
if not boundaries: return tf.constant(rates[0])
step_boundaries = tf.constant(boundaries, tf.int32)
num_boundaries = len(boundaries)
learning_rates = tf.constant(rates, tf.float32)
......
......@@ -75,5 +75,21 @@ class LearningSchedulesTest(test_case.TestCase):
exp_rates = [1.0, 1.0, 2.0, 3.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0]
self.assertAllClose(output_rates, exp_rates)
def testManualSteppingWithZeroBoundaries(self):
def graph_fn(global_step):
boundaries = []
rates = [0.01]
learning_rate = learning_schedules.manual_stepping(
global_step, boundaries, rates)
return (learning_rate,)
output_rates = [
self.execute(graph_fn, [np.array(i).astype(np.int64)])
for i in range(4)
]
exp_rates = [0.01] * 4
self.assertAllClose(output_rates, exp_rates)
if __name__ == '__main__':
tf.test.main()
......@@ -19,7 +19,6 @@ Example box operations that are supported:
* Areas: compute bounding box areas
* IOU: pairwise intersection-over-union scores
"""
import numpy as np
from object_detection.utils import np_box_list
......
......@@ -19,7 +19,6 @@ Example box operations that are supported:
* Areas: compute bounding box areas
* IOU: pairwise intersection-over-union scores
"""
import numpy as np
from object_detection.utils import np_box_list_ops
......
......@@ -224,7 +224,7 @@ def padded_one_hot_encoding(indices, depth, left_pad):
ValueError: if `indices` does not have rank 1 or if `left_pad` or `depth are
either negative or non-integers.
TODO: add runtime checks for depth and indices.
TODO(rathodv): add runtime checks for depth and indices.
"""
if depth < 0 or not isinstance(depth, six.integer_types):
raise ValueError('`depth` must be a non-negative integer.')
......@@ -474,7 +474,7 @@ def normalize_to_target(inputs,
Note that the rank of `inputs` must be known and the dimension to which
normalization is to be applied should be statically defined.
TODO: Add option to scale by L2 norm of the entire input.
TODO(jonathanhuang): Add option to scale by L2 norm of the entire input.
Args:
inputs: A `Tensor` of arbitrary size.
......@@ -704,7 +704,7 @@ def reframe_box_masks_to_image_masks(box_masks, boxes, image_height,
Returns:
A tf.float32 tensor of size [num_masks, image_height, image_width].
"""
# TODO: Make this a public function.
# TODO(rathodv): Make this a public function.
def transform_boxes_relative_to_boxes(boxes, reference_boxes):
boxes = tf.reshape(boxes, [-1, 2, 2])
min_corner = tf.expand_dims(reference_boxes[:, 0:2], 1)
......@@ -820,3 +820,127 @@ def matmul_gather_on_zeroth_axis(params, indices, scope=None):
gathered_result_flattened = tf.matmul(indicator_matrix, params2d)
return tf.reshape(gathered_result_flattened,
tf.stack(indices_shape + params_shape[1:]))
def matmul_crop_and_resize(image, boxes, crop_size, scope=None):
"""Matrix multiplication based implementation of the crop and resize op.
Extracts crops from the input image tensor and bilinearly resizes them
(possibly with aspect ratio change) to a common output size specified by
crop_size. This is more general than the crop_to_bounding_box op which
extracts a fixed size slice from the input image and does not allow
resizing or aspect ratio change.
Returns a tensor with crops from the input image at positions defined at
the bounding box locations in boxes. The cropped boxes are all resized
(with bilinear interpolation) to a fixed size = `[crop_height, crop_width]`.
The result is a 4-D tensor `[num_boxes, crop_height, crop_width, depth]`.
Running time complexity:
O((# channels) * (# boxes) * (crop_size)^2 * M), where M is the number
of pixels of the longer edge of the image.
Note that this operation is meant to replicate the behavior of the standard
tf.image.crop_and_resize operation but there are a few differences.
Specifically:
1) The extrapolation value (the values that are interpolated from outside
the bounds of the image window) is always zero
2) Only XLA supported operations are used (e.g., matrix multiplication).
3) There is no `box_indices` argument --- to run this op on multiple images,
one must currently call this op independently on each image.
4) All shapes and the `crop_size` parameter are assumed to be statically
defined. Moreover, the number of boxes must be strictly nonzero.
Args:
image: A `Tensor`. Must be one of the following types: `uint8`, `int8`,
`int16`, `int32`, `int64`, `half`, `float32`, `float64`.
A 4-D tensor of shape `[batch, image_height, image_width, depth]`.
Both `image_height` and `image_width` need to be positive.
boxes: A `Tensor` of type `float32`.
A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor
specifies the coordinates of a box in the `box_ind[i]` image and is
specified in normalized coordinates `[y1, x1, y2, x2]`. A normalized
coordinate value of `y` is mapped to the image coordinate at
`y * (image_height - 1)`, so as the `[0, 1]` interval of normalized image
height is mapped to `[0, image_height - 1] in image height coordinates.
We do allow y1 > y2, in which case the sampled crop is an up-down flipped
version of the original image. The width dimension is treated similarly.
Normalized coordinates outside the `[0, 1]` range are allowed, in which
case we use `extrapolation_value` to extrapolate the input image values.
crop_size: A list of two integers `[crop_height, crop_width]`. All
cropped image patches are resized to this size. The aspect ratio of the
image content is not preserved. Both `crop_height` and `crop_width` need
to be positive.
scope: A name for the operation (optional).
Returns:
A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`
Raises:
ValueError: if image tensor does not have shape
`[1, image_height, image_width, depth]` and all dimensions statically
defined.
ValueError: if boxes tensor does not have shape `[num_boxes, 4]` where
num_boxes > 0.
ValueError: if crop_size is not a list of two positive integers
"""
img_shape = image.shape.as_list()
boxes_shape = boxes.shape.as_list()
_, img_height, img_width, _ = img_shape
if not isinstance(crop_size, list) or len(crop_size) != 2:
raise ValueError('`crop_size` must be a list of length 2')
dimensions = img_shape + crop_size + boxes_shape
if not all([isinstance(dim, int) for dim in dimensions]):
raise ValueError('all input shapes must be statically defined')
if len(crop_size) != 2:
raise ValueError('`crop_size` must be a list of length 2')
if len(boxes_shape) != 2 or boxes_shape[1] != 4:
raise ValueError('`boxes` should have shape `[num_boxes, 4]`')
if len(img_shape) != 4 and img_shape[0] != 1:
raise ValueError('image should have shape '
'`[1, image_height, image_width, depth]`')
num_crops = boxes_shape[0]
if not num_crops > 0:
raise ValueError('number of boxes must be > 0')
if not (crop_size[0] > 0 and crop_size[1] > 0):
raise ValueError('`crop_size` must be a list of two positive integers.')
def _lin_space_weights(num, img_size):
if num > 1:
alpha = (img_size - 1) / float(num - 1)
indices = np.reshape(np.arange(num), (1, num))
start_weights = alpha * (num - 1 - indices)
stop_weights = alpha * indices
else:
start_weights = num * [.5 * (img_size - 1)]
stop_weights = num * [.5 * (img_size - 1)]
return (tf.constant(start_weights, dtype=tf.float32),
tf.constant(stop_weights, dtype=tf.float32))
with tf.name_scope(scope, 'MatMulCropAndResize'):
y1_weights, y2_weights = _lin_space_weights(crop_size[0], img_height)
x1_weights, x2_weights = _lin_space_weights(crop_size[1], img_width)
[y1, x1, y2, x2] = tf.split(value=boxes, num_or_size_splits=4, axis=1)
# Pixel centers of input image and grid points along height and width
image_idx_h = tf.constant(
np.reshape(np.arange(img_height), (1, 1, img_height)), dtype=tf.float32)
image_idx_w = tf.constant(
np.reshape(np.arange(img_width), (1, 1, img_width)), dtype=tf.float32)
grid_pos_h = tf.expand_dims(y1 * y1_weights + y2 * y2_weights, 2)
grid_pos_w = tf.expand_dims(x1 * x1_weights + x2 * x2_weights, 2)
# Create kernel matrices of pairwise kernel evaluations between pixel
# centers of image and grid points.
kernel_h = tf.nn.relu(1 - tf.abs(image_idx_h - grid_pos_h))
kernel_w = tf.nn.relu(1 - tf.abs(image_idx_w - grid_pos_w))
# TODO(jonathanhuang): investigate whether all channels can be processed
# without the explicit unstack --- possibly with a permute and map_fn call.
result_channels = []
for channel in tf.unstack(image, axis=3):
result_channels.append(
tf.matmul(
tf.matmul(kernel_h, tf.tile(channel, [num_crops, 1, 1])),
kernel_w, transpose_b=True))
return tf.stack(result_channels, axis=3)
......@@ -1171,12 +1171,15 @@ class NearestNeighborUpsamplingTest(test_case.TestCase):
def graph_fn(inputs):
custom_op_output = ops.nearest_neighbor_upsampling(inputs, scale=2)
tf_op_output = tf.image.resize_images(
inputs, [4, 4], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
return (custom_op_output, tf_op_output)
inputs = np.reshape(np.arange(2**4), [2, 2, 2, 2])
(custom_op_output, tf_op_output) = self.execute(graph_fn, [inputs])
self.assertAllClose(custom_op_output, tf_op_output)
return custom_op_output
inputs = np.reshape(np.arange(4).astype(np.float32), [1, 2, 2, 1])
custom_op_output = self.execute(graph_fn, [inputs])
expected_output = [[[[0], [0], [1], [1]],
[[0], [0], [1], [1]],
[[2], [2], [3], [3]],
[[2], [2], [3], [3]]]]
self.assertAllClose(custom_op_output, expected_output)
class MatmulGatherOnZerothAxis(test_case.TestCase):
......@@ -1190,7 +1193,7 @@ class MatmulGatherOnZerothAxis(test_case.TestCase):
[5, 6, 7, 8],
[9, 10, 11, 12],
[0, 1, 0, 0]], dtype=np.float32)
indices = np.array([2, 2, 1])
indices = np.array([2, 2, 1], dtype=np.int32)
expected_output = np.array([[9, 10, 11, 12], [9, 10, 11, 12], [5, 6, 7, 8]])
gather_output = self.execute(graph_fn, [params, indices])
self.assertAllClose(gather_output, expected_output)
......@@ -1204,7 +1207,7 @@ class MatmulGatherOnZerothAxis(test_case.TestCase):
[[5, 6], [7, 8]],
[[9, 10], [11, 12]],
[[0, 1], [0, 0]]], dtype=np.float32)
indices = np.array([0, 3, 1])
indices = np.array([0, 3, 1], dtype=np.int32)
expected_output = np.array([[[1, 2], [3, 4]],
[[0, 1], [0, 0]],
[[5, 6], [7, 8]]])
......@@ -1220,7 +1223,7 @@ class MatmulGatherOnZerothAxis(test_case.TestCase):
[5, 6, 7, 8],
[9, 10, 11, 12],
[0, 1, 0, 0]], dtype=np.float32)
indices = np.array([0, 0, 0, 0, 0, 0])
indices = np.array([0, 0, 0, 0, 0, 0], dtype=np.int32)
expected_output = np.array(6*[[1, 2, 3, 4]])
gather_output = self.execute(graph_fn, [params, indices])
self.assertAllClose(gather_output, expected_output)
......@@ -1241,5 +1244,109 @@ class MatmulGatherOnZerothAxis(test_case.TestCase):
params_placeholder: params, indices_placeholder: indices})
self.assertAllClose(gather_output, expected_output)
class OpsTestMatMulCropAndResize(test_case.TestCase):
def testMatMulCropAndResize2x2To1x1(self):
def graph_fn(image, boxes):
return ops.matmul_crop_and_resize(image, boxes, crop_size=[1, 1])
image = np.array([[[[1], [2]], [[3], [4]]]], dtype=np.float32)
boxes = np.array([[0, 0, 1, 1]], dtype=np.float32)
expected_output = [[[[2.5]]]]
crop_output = self.execute(graph_fn, [image, boxes])
self.assertAllClose(crop_output, expected_output)
def testMatMulCropAndResize2x2To1x1Flipped(self):
def graph_fn(image, boxes):
return ops.matmul_crop_and_resize(image, boxes, crop_size=[1, 1])
image = np.array([[[[1], [2]], [[3], [4]]]], dtype=np.float32)
boxes = np.array([[1, 1, 0, 0]], dtype=np.float32)
expected_output = [[[[2.5]]]]
crop_output = self.execute(graph_fn, [image, boxes])
self.assertAllClose(crop_output, expected_output)
def testMatMulCropAndResize2x2To3x3(self):
def graph_fn(image, boxes):
return ops.matmul_crop_and_resize(image, boxes, crop_size=[3, 3])
image = np.array([[[[1], [2]], [[3], [4]]]], dtype=np.float32)
boxes = np.array([[0, 0, 1, 1]], dtype=np.float32)
expected_output = [[[[1.0], [1.5], [2.0]],
[[2.0], [2.5], [3.0]],
[[3.0], [3.5], [4.0]]]]
crop_output = self.execute(graph_fn, [image, boxes])
self.assertAllClose(crop_output, expected_output)
def testMatMulCropAndResize2x2To3x3Flipped(self):
def graph_fn(image, boxes):
return ops.matmul_crop_and_resize(image, boxes, crop_size=[3, 3])
image = np.array([[[[1], [2]], [[3], [4]]]], dtype=np.float32)
boxes = np.array([[1, 1, 0, 0]], dtype=np.float32)
expected_output = [[[[4.0], [3.5], [3.0]],
[[3.0], [2.5], [2.0]],
[[2.0], [1.5], [1.0]]]]
crop_output = self.execute(graph_fn, [image, boxes])
self.assertAllClose(crop_output, expected_output)
def testMatMulCropAndResize3x3To2x2(self):
def graph_fn(image, boxes):
return ops.matmul_crop_and_resize(image, boxes, crop_size=[2, 2])
image = np.array([[[[1], [2], [3]],
[[4], [5], [6]],
[[7], [8], [9]]]], dtype=np.float32)
boxes = np.array([[0, 0, 1, 1],
[0, 0, .5, .5]], dtype=np.float32)
expected_output = [[[[1], [3]], [[7], [9]]],
[[[1], [2]], [[4], [5]]]]
crop_output = self.execute(graph_fn, [image, boxes])
self.assertAllClose(crop_output, expected_output)
def testMatMulCropAndResize3x3To2x2MultiChannel(self):
def graph_fn(image, boxes):
return ops.matmul_crop_and_resize(image, boxes, crop_size=[2, 2])
image = np.array([[[[1, 0], [2, 1], [3, 2]],
[[4, 3], [5, 4], [6, 5]],
[[7, 6], [8, 7], [9, 8]]]], dtype=np.float32)
boxes = np.array([[0, 0, 1, 1],
[0, 0, .5, .5]], dtype=np.float32)
expected_output = [[[[1, 0], [3, 2]], [[7, 6], [9, 8]]],
[[[1, 0], [2, 1]], [[4, 3], [5, 4]]]]
crop_output = self.execute(graph_fn, [image, boxes])
self.assertAllClose(crop_output, expected_output)
def testMatMulCropAndResize3x3To2x2Flipped(self):
def graph_fn(image, boxes):
return ops.matmul_crop_and_resize(image, boxes, crop_size=[2, 2])
image = np.array([[[[1], [2], [3]],
[[4], [5], [6]],
[[7], [8], [9]]]], dtype=np.float32)
boxes = np.array([[1, 1, 0, 0],
[.5, .5, 0, 0]], dtype=np.float32)
expected_output = [[[[9], [7]], [[3], [1]]],
[[[5], [4]], [[2], [1]]]]
crop_output = self.execute(graph_fn, [image, boxes])
self.assertAllClose(crop_output, expected_output)
def testInvalidInputShape(self):
image = tf.constant([[[1], [2]], [[3], [4]]], dtype=tf.float32)
boxes = tf.constant([[-1, -1, 1, 1]], dtype=tf.float32)
crop_size = [4, 4]
with self.assertRaises(ValueError):
_ = ops.matmul_crop_and_resize(image, boxes, crop_size)
if __name__ == '__main__':
tf.test.main()
......@@ -152,7 +152,7 @@ def static_or_dynamic_map_fn(fn, elems, dtype=None,
Tensors or lists of Tensors). Likewise, the output of `fn` can only be a
Tensor or list of Tensors.
TODO: make this function fully interchangeable with tf.map_fn.
TODO(jonathanhuang): make this function fully interchangeable with tf.map_fn.
Args:
fn: The callable to be performed. It accepts one argument, which will have
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment