Commit 31ca3b97 authored by Kaushik Shivakumar's avatar Kaushik Shivakumar
Browse files

resovle merge conflicts

parents 3e9d886d 7fcd7cba
......@@ -26,7 +26,8 @@ from object_detection.utils import shape_utils
def create_conv_block(name, num_filters, kernel_size, strides, padding,
use_separable, apply_batchnorm, apply_activation,
conv_hyperparams, is_training, freeze_batchnorm):
conv_hyperparams, is_training, freeze_batchnorm,
conv_bn_act_pattern=True):
"""Create Keras layers for regular or separable convolutions.
Args:
......@@ -50,6 +51,9 @@ def create_conv_block(name, num_filters, kernel_size, strides, padding,
training or not. When training with a small batch size (e.g. 1), it is
desirable to freeze batch norm update and use pretrained batch norm
params.
conv_bn_act_pattern: Bool. By default, when True, the layers returned by
this function are in the order [conv, batchnorm, activation]. Otherwise,
when False, the order of the layers is [activation, conv, batchnorm].
Returns:
A list of keras layers, including (regular or seperable) convolution, and
......@@ -73,7 +77,7 @@ def create_conv_block(name, num_filters, kernel_size, strides, padding,
depth_multiplier=1,
padding=padding,
strides=strides,
name=name + '_separable_conv',
name=name + 'separable_conv',
**kwargs))
else:
layers.append(
......@@ -82,18 +86,22 @@ def create_conv_block(name, num_filters, kernel_size, strides, padding,
kernel_size=kernel_size,
padding=padding,
strides=strides,
name=name + '_conv',
name=name + 'conv',
**conv_hyperparams.params()))
if apply_batchnorm:
layers.append(
conv_hyperparams.build_batch_norm(
training=(is_training and not freeze_batchnorm),
name=name + '_batchnorm'))
name=name + 'batchnorm'))
if apply_activation:
layers.append(
conv_hyperparams.build_activation_layer(name=name + '_activation'))
activation_layer = conv_hyperparams.build_activation_layer(
name=name + 'activation')
if conv_bn_act_pattern:
layers.append(activation_layer)
else:
layers = [activation_layer] + layers
return layers
......@@ -133,28 +141,28 @@ def create_downsample_feature_map_ops(scale, downsample_method,
pool_size=kernel_size,
strides=stride,
padding=padding,
name=name + '_downsample_max_x{}'.format(stride)))
name=name + 'downsample_max_x{}'.format(stride)))
elif downsample_method == 'avg_pooling':
layers.append(
tf.keras.layers.AveragePooling2D(
pool_size=kernel_size,
strides=stride,
padding=padding,
name=name + '_downsample_avg_x{}'.format(stride)))
name=name + 'downsample_avg_x{}'.format(stride)))
elif downsample_method == 'depthwise_conv':
layers.append(
tf.keras.layers.DepthwiseConv2D(
kernel_size=kernel_size,
strides=stride,
padding=padding,
name=name + '_downsample_depthwise_x{}'.format(stride)))
name=name + 'downsample_depthwise_x{}'.format(stride)))
layers.append(
conv_hyperparams.build_batch_norm(
training=(is_training and not freeze_batchnorm),
name=name + '_downsample_batchnorm'))
name=name + 'downsample_batchnorm'))
layers.append(
conv_hyperparams.build_activation_layer(name=name +
'_downsample_activation'))
'downsample_activation'))
else:
raise ValueError('Unknown downsample method: {}'.format(downsample_method))
......
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Utils for colab tutorials located in object_detection/colab_tutorials/..."""
import base64
import io
import json
from typing import Dict
from typing import List
from typing import Union
import uuid
from IPython.display import display
from IPython.display import Javascript
import numpy as np
from PIL import Image
from google.colab import output
from google.colab.output import eval_js
def image_from_numpy(image):
"""Open an image at the specified path and encode it in Base64.
Args:
image: np.ndarray
Image represented as a numpy array
Returns:
An encoded Base64 representation of the image
"""
with io.BytesIO() as img_output:
Image.fromarray(image).save(img_output, format='JPEG')
data = img_output.getvalue()
data = str(base64.b64encode(data))[2:-1]
return data
def draw_bbox(image_urls, callbackId): # pylint: disable=invalid-name
"""Open the bounding box UI and send the results to a callback function.
Args:
image_urls: list[str | np.ndarray]
List of locations from where to load the images from. If a np.ndarray is
given, the array is interpretted as an image and sent to the frontend.
If a str is given, the string is interpreted as a path and is read as a
np.ndarray before being sent to the frontend.
callbackId: str
The ID for the callback function to send the bounding box results to
when the user hits submit.
"""
js = Javascript('''
async function load_image(imgs, callbackId) {
//init organizational elements
const div = document.createElement('div');
var image_cont = document.createElement('div');
var errorlog = document.createElement('div');
var crosshair_h = document.createElement('div');
crosshair_h.style.position = "absolute";
crosshair_h.style.backgroundColor = "transparent";
crosshair_h.style.width = "100%";
crosshair_h.style.height = "0px";
crosshair_h.style.zIndex = 9998;
crosshair_h.style.borderStyle = "dotted";
crosshair_h.style.borderWidth = "2px";
crosshair_h.style.borderColor = "rgba(255, 0, 0, 0.75)";
crosshair_h.style.cursor = "crosshair";
var crosshair_v = document.createElement('div');
crosshair_v.style.position = "absolute";
crosshair_v.style.backgroundColor = "transparent";
crosshair_v.style.width = "0px";
crosshair_v.style.height = "100%";
crosshair_v.style.zIndex = 9999;
crosshair_v.style.top = "0px";
crosshair_v.style.borderStyle = "dotted";
crosshair_v.style.borderWidth = "2px";
crosshair_v.style.borderColor = "rgba(255, 0, 0, 0.75)";
crosshair_v.style.cursor = "crosshair";
crosshair_v.style.marginTop = "23px";
var brdiv = document.createElement('br');
//init control elements
var next = document.createElement('button');
var prev = document.createElement('button');
var submit = document.createElement('button');
var deleteButton = document.createElement('button');
var deleteAllbutton = document.createElement('button');
//init image containers
var image = new Image();
var canvas_img = document.createElement('canvas');
var ctx = canvas_img.getContext("2d");
canvas_img.style.cursor = "crosshair";
canvas_img.setAttribute('draggable', false);
crosshair_v.setAttribute('draggable', false);
crosshair_h.setAttribute('draggable', false);
// bounding box containers
const height = 600
var allBoundingBoxes = [];
var curr_image = 0
var im_height = 0;
var im_width = 0;
//initialize bounding boxes
for (var i = 0; i < imgs.length; i++) {
allBoundingBoxes[i] = [];
}
//initialize image view
errorlog.id = 'errorlog';
image.style.display = 'block';
image.setAttribute('draggable', false);
//load the first image
img = imgs[curr_image];
image.src = "data:image/png;base64," + img;
image.onload = function() {
// normalize display height and canvas
image.height = height;
image_cont.height = canvas_img.height = image.height;
image_cont.width = canvas_img.width = image.naturalWidth;
crosshair_v.style.height = image_cont.height + "px";
crosshair_h.style.width = image_cont.width + "px";
// draw the new image
ctx.drawImage(image, 0, 0, image.naturalWidth, image.naturalHeight, 0, 0, canvas_img.width, canvas_img.height);
};
// move to next image in array
next.textContent = "next image";
next.onclick = function(){
if (curr_image < imgs.length - 1){
// clear canvas and load new image
curr_image += 1;
errorlog.innerHTML = "";
}
else{
errorlog.innerHTML = "All images completed!!";
}
resetcanvas();
}
//move forward through list of images
prev.textContent = "prev image"
prev.onclick = function(){
if (curr_image > 0){
// clear canvas and load new image
curr_image -= 1;
errorlog.innerHTML = "";
}
else{
errorlog.innerHTML = "at the beginning";
}
resetcanvas();
}
// on delete, deletes the last bounding box
deleteButton.textContent = "undo bbox";
deleteButton.onclick = function(){
boundingBoxes.pop();
ctx.clearRect(0, 0, canvas_img.width, canvas_img.height);
image.src = "data:image/png;base64," + img;
image.onload = function() {
ctx.drawImage(image, 0, 0, image.naturalWidth, image.naturalHeight, 0, 0, canvas_img.width, canvas_img.height);
boundingBoxes.map(r => {drawRect(r)});
};
}
// on all delete, deletes all of the bounding box
deleteAllbutton.textContent = "delete all"
deleteAllbutton.onclick = function(){
boundingBoxes = [];
ctx.clearRect(0, 0, canvas_img.width, canvas_img.height);
image.src = "data:image/png;base64," + img;
image.onload = function() {
ctx.drawImage(image, 0, 0, image.naturalWidth, image.naturalHeight, 0, 0, canvas_img.width, canvas_img.height);
//boundingBoxes.map(r => {drawRect(r)});
};
}
// on submit, send the boxes to display
submit.textContent = "submit";
submit.onclick = function(){
errorlog.innerHTML = "";
// send box data to callback fucntion
google.colab.kernel.invokeFunction(callbackId, [allBoundingBoxes], {});
}
// init template for annotations
const annotation = {
x: 0,
y: 0,
w: 0,
h: 0,
};
// the array of all rectangles
let boundingBoxes = allBoundingBoxes[curr_image];
// the actual rectangle, the one that is being drawn
let o = {};
// a variable to store the mouse position
let m = {},
// a variable to store the point where you begin to draw the
// rectangle
start = {};
// a boolean variable to store the drawing state
let isDrawing = false;
var elem = null;
function handleMouseDown(e) {
// on mouse click set change the cursor and start tracking the mouse position
start = oMousePos(canvas_img, e);
// configure is drawing to true
isDrawing = true;
}
function handleMouseMove(e) {
// move crosshairs, but only within the bounds of the canvas
if (document.elementsFromPoint(e.pageX, e.pageY).includes(canvas_img)) {
crosshair_h.style.top = e.pageY + "px";
crosshair_v.style.left = e.pageX + "px";
}
// move the bounding box
if(isDrawing){
m = oMousePos(canvas_img, e);
draw();
}
}
function handleMouseUp(e) {
if (isDrawing) {
// on mouse release, push a bounding box to array and draw all boxes
isDrawing = false;
const box = Object.create(annotation);
// calculate the position of the rectangle
if (o.w > 0){
box.x = o.x;
}
else{
box.x = o.x + o.w;
}
if (o.h > 0){
box.y = o.y;
}
else{
box.y = o.y + o.h;
}
box.w = Math.abs(o.w);
box.h = Math.abs(o.h);
// add the bounding box to the image
boundingBoxes.push(box);
draw();
}
}
function draw() {
o.x = (start.x)/image.width; // start position of x
o.y = (start.y)/image.height; // start position of y
o.w = (m.x - start.x)/image.width; // width
o.h = (m.y - start.y)/image.height; // height
ctx.clearRect(0, 0, canvas_img.width, canvas_img.height);
ctx.drawImage(image, 0, 0, image.naturalWidth, image.naturalHeight, 0, 0, canvas_img.width, canvas_img.height);
// draw all the rectangles saved in the rectsRy
boundingBoxes.map(r => {drawRect(r)});
// draw the actual rectangle
drawRect(o);
}
// add the handlers needed for dragging
crosshair_h.addEventListener("mousedown", handleMouseDown);
crosshair_v.addEventListener("mousedown", handleMouseDown);
document.addEventListener("mousemove", handleMouseMove);
document.addEventListener("mouseup", handleMouseUp);
function resetcanvas(){
// clear canvas
ctx.clearRect(0, 0, canvas_img.width, canvas_img.height);
img = imgs[curr_image]
image.src = "data:image/png;base64," + img;
// onload init new canvas and display image
image.onload = function() {
// normalize display height and canvas
image.height = height;
image_cont.height = canvas_img.height = image.height;
image_cont.width = canvas_img.width = image.naturalWidth;
crosshair_v.style.height = image_cont.height + "px";
crosshair_h.style.width = image_cont.width + "px";
// draw the new image
ctx.drawImage(image, 0, 0, image.naturalWidth, image.naturalHeight, 0, 0, canvas_img.width, canvas_img.height);
// draw bounding boxes
boundingBoxes = allBoundingBoxes[curr_image];
boundingBoxes.map(r => {drawRect(r)});
};
}
function drawRect(o){
// draw a predefined rectangle
ctx.strokeStyle = "red";
ctx.lineWidth = 2;
ctx.beginPath(o);
ctx.rect(o.x * image.width, o.y * image.height, o.w * image.width, o.h * image.height);
ctx.stroke();
}
// Function to detect the mouse position
function oMousePos(canvas_img, evt) {
let ClientRect = canvas_img.getBoundingClientRect();
return {
x: evt.clientX - ClientRect.left,
y: evt.clientY - ClientRect.top
};
}
//configure colab output display
google.colab.output.setIframeHeight(document.documentElement.scrollHeight, true);
//build the html document that will be seen in output
div.appendChild(document.createElement('br'))
div.appendChild(image_cont)
image_cont.appendChild(canvas_img)
image_cont.appendChild(crosshair_h)
image_cont.appendChild(crosshair_v)
div.appendChild(document.createElement('br'))
div.appendChild(errorlog)
div.appendChild(prev)
div.appendChild(next)
div.appendChild(deleteButton)
div.appendChild(deleteAllbutton)
div.appendChild(document.createElement('br'))
div.appendChild(brdiv)
div.appendChild(submit)
document.querySelector("#output-area").appendChild(div);
return
}''')
# load the images as a byte array
bytearrays = []
for image in image_urls:
if isinstance(image, np.ndarray):
bytearrays.append(image_from_numpy(image))
else:
raise TypeError('Image has unsupported type {}.'.format(type(image)))
# format arrays for input
image_data = json.dumps(bytearrays)
del bytearrays
# call java script function pass string byte array(image_data) as input
display(js)
eval_js('load_image({}, \'{}\')'.format(image_data, callbackId))
return
def annotate(imgs: List[Union[str, np.ndarray]], # pylint: disable=invalid-name
box_storage_pointer: List[np.ndarray],
callbackId: str = None):
"""Open the bounding box UI and prompt the user for input.
Args:
imgs: list[str | np.ndarray]
List of locations from where to load the images from. If a np.ndarray is
given, the array is interpretted as an image and sent to the frontend. If
a str is given, the string is interpreted as a path and is read as a
np.ndarray before being sent to the frontend.
box_storage_pointer: list[np.ndarray]
Destination list for bounding box arrays. Each array in this list
corresponds to one of the images given in imgs. The array is a
N x 4 array where N is the number of bounding boxes given by the user
for that particular image. If there are no bounding boxes for an image,
None is used instead of an empty array.
callbackId: str, optional
The ID for the callback function that communicates between the fontend
and the backend. If no ID is given, a random UUID string is used instead.
"""
# Set a random ID for the callback function
if callbackId is None:
callbackId = str(uuid.uuid1()).replace('-', '')
def dictToList(input_bbox): # pylint: disable=invalid-name
"""Convert bbox.
This function converts the dictionary from the frontend (if the format
{x, y, w, h} as shown in callbackFunction) into a list
([y_min, x_min, y_max, x_max])
Args:
input_bbox:
Returns:
A list with bbox coordinates in the form [ymin, xmin, ymax, xmax].
"""
return (input_bbox['y'], input_bbox['x'], input_bbox['y'] + input_bbox['h'],
input_bbox['x'] + input_bbox['w'])
def callbackFunction(annotations: List[List[Dict[str, float]]]): # pylint: disable=invalid-name
"""Callback function.
This is the call back function to capture the data from the frontend and
convert the data into a numpy array.
Args:
annotations: list[list[dict[str, float]]]
The input of the call back function is a list of list of objects
corresponding to the annotations. The format of annotations is shown
below
[
// stuff for image 1
[
// stuff for rect 1
{x, y, w, h},
// stuff for rect 2
{x, y, w, h},
...
],
// stuff for image 2
[
// stuff for rect 1
{x, y, w, h},
// stuff for rect 2
{x, y, w, h},
...
],
...
]
"""
# reset the boxes list
nonlocal box_storage_pointer
boxes: List[np.ndarray] = box_storage_pointer
boxes.clear()
# load the new annotations into the boxes list
for annotations_per_img in annotations:
rectangles_as_arrays = [np.clip(dictToList(annotation), 0, 1)
for annotation in annotations_per_img]
if rectangles_as_arrays:
boxes.append(np.stack(rectangles_as_arrays))
else:
boxes.append(None)
# output the annotations to the errorlog
with output.redirect_to_element('#errorlog'):
display('--boxes array populated--')
output.register_callback(callbackId, callbackFunction)
draw_bbox(imgs, callbackId)
......@@ -147,6 +147,7 @@ def clear_fine_tune_checkpoint(pipeline_config_path,
"""Clears fine_tune_checkpoint and writes a new pipeline config file."""
configs = get_configs_from_pipeline_file(pipeline_config_path)
configs["train_config"].fine_tune_checkpoint = ""
configs["train_config"].load_all_detection_checkpoint_vars = False
pipeline_proto = create_pipeline_proto_from_configs(configs)
with tf.gfile.Open(new_pipeline_config_path, "wb") as f:
f.write(text_format.MessageToString(pipeline_proto))
......
......@@ -54,8 +54,8 @@ def extract_submodel(model, inputs, outputs, name=None):
for layer in model.layers:
layer_output = layer.output
layer_inputs = layer.input
output_to_layer[layer_output.ref()] = layer
output_to_layer_input[layer_output.ref()] = layer_inputs
output_to_layer[layer_output.experimental_ref()] = layer
output_to_layer_input[layer_output.experimental_ref()] = layer_inputs
model_inputs_dict = {}
memoized_results = {}
......@@ -63,21 +63,22 @@ def extract_submodel(model, inputs, outputs, name=None):
# Relies on recursion, very low limit in python
def _recurse_in_model(tensor):
"""Walk the existing model recursively to copy a submodel."""
if tensor.ref() in memoized_results:
return memoized_results[tensor.ref()]
if (tensor.ref() == inputs.ref()) or (
if tensor.experimental_ref() in memoized_results:
return memoized_results[tensor.experimental_ref()]
if (tensor.experimental_ref() == inputs.experimental_ref()) or (
isinstance(inputs, list) and tensor in inputs):
if tensor.ref() not in model_inputs_dict:
model_inputs_dict[tensor.ref()] = tf.keras.layers.Input(tensor=tensor)
out = model_inputs_dict[tensor.ref()]
if tensor.experimental_ref() not in model_inputs_dict:
model_inputs_dict[tensor.experimental_ref()] = tf.keras.layers.Input(
tensor=tensor)
out = model_inputs_dict[tensor.experimental_ref()]
else:
cur_inputs = output_to_layer_input[tensor.ref()]
cur_layer = output_to_layer[tensor.ref()]
cur_inputs = output_to_layer_input[tensor.experimental_ref()]
cur_layer = output_to_layer[tensor.experimental_ref()]
if isinstance(cur_inputs, list):
out = cur_layer([_recurse_in_model(inp) for inp in cur_inputs])
else:
out = cur_layer(_recurse_in_model(cur_inputs))
memoized_results[tensor.ref()] = out
memoized_results[tensor.experimental_ref()] = out
return out
if isinstance(outputs, list):
......@@ -86,8 +87,10 @@ def extract_submodel(model, inputs, outputs, name=None):
model_outputs = _recurse_in_model(outputs)
if isinstance(inputs, list):
model_inputs = [model_inputs_dict[tensor.ref()] for tensor in inputs]
model_inputs = [
model_inputs_dict[tensor.experimental_ref()] for tensor in inputs
]
else:
model_inputs = model_inputs_dict[inputs.ref()]
model_inputs = model_inputs_dict[inputs.experimental_ref()]
return tf.keras.Model(inputs=model_inputs, outputs=model_outputs, name=name)
......@@ -799,14 +799,14 @@ def position_sensitive_crop_regions(image,
def reframe_box_masks_to_image_masks(box_masks, boxes, image_height,
image_width):
image_width, resize_method='bilinear'):
"""Transforms the box masks back to full image masks.
Embeds masks in bounding boxes of larger masks whose shapes correspond to
image shape.
Args:
box_masks: A tf.float32 tensor of size [num_masks, mask_height, mask_width].
box_masks: A tensor of size [num_masks, mask_height, mask_width].
boxes: A tf.float32 tensor of size [num_masks, 4] containing the box
corners. Row i contains [ymin, xmin, ymax, xmax] of the box
corresponding to mask i. Note that the box corners are in
......@@ -815,10 +815,14 @@ def reframe_box_masks_to_image_masks(box_masks, boxes, image_height,
the image height.
image_width: Image width. The output mask will have the same width as the
image width.
resize_method: The resize method, either 'bilinear' or 'nearest'. Note that
'bilinear' is only respected if box_masks is a float.
Returns:
A tf.float32 tensor of size [num_masks, image_height, image_width].
A tensor of size [num_masks, image_height, image_width] with the same dtype
as `box_masks`.
"""
resize_method = 'nearest' if box_masks.dtype == tf.uint8 else resize_method
# TODO(rathodv): Make this a public function.
def reframe_box_masks_to_image_masks_default():
"""The default function when there are more than 0 box masks."""
......@@ -840,16 +844,19 @@ def reframe_box_masks_to_image_masks(box_masks, boxes, image_height,
# TODO(vighneshb) Use matmul_crop_and_resize so that the output shape
# is static. This will help us run and test on TPUs.
return tf.image.crop_and_resize(
resized_crops = tf.image.crop_and_resize(
image=box_masks_expanded,
boxes=reverse_boxes,
box_ind=tf.range(num_boxes),
crop_size=[image_height, image_width],
extrapolation_value=0.0)
method=resize_method,
extrapolation_value=0)
return tf.cast(resized_crops, box_masks.dtype)
image_masks = tf.cond(
tf.shape(box_masks)[0] > 0,
reframe_box_masks_to_image_masks_default,
lambda: tf.zeros([0, image_height, image_width, 1], dtype=tf.float32))
lambda: tf.zeros([0, image_height, image_width, 1], box_masks.dtype))
return tf.squeeze(image_masks, axis=3)
......
......@@ -18,6 +18,8 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from absl.testing import parameterized
import numpy as np
import six
from six.moves import range
......@@ -1082,7 +1084,7 @@ class OpsTestPositionSensitiveCropRegions(test_case.TestCase):
return ps_crop_and_pool
output = self.execute(graph_fn, [])
self.assertAllEqual(output, expected_output[crop_size_mult - 1])
self.assertAllClose(output, expected_output[crop_size_mult - 1])
def test_raise_value_error_on_non_square_block_size(self):
num_spatial_bins = [3, 2]
......@@ -1190,36 +1192,59 @@ class OpsTestBatchPositionSensitiveCropRegions(test_case.TestCase):
# The following tests are only executed on CPU because the output
# shape is not constant.
class ReframeBoxMasksToImageMasksTest(test_case.TestCase):
def testZeroImageOnEmptyMask(self):
class ReframeBoxMasksToImageMasksTest(test_case.TestCase,
parameterized.TestCase):
@parameterized.parameters(
{'mask_dtype': tf.float32, 'mask_dtype_np': np.float32,
'resize_method': 'bilinear'},
{'mask_dtype': tf.float32, 'mask_dtype_np': np.float32,
'resize_method': 'nearest'},
{'mask_dtype': tf.uint8, 'mask_dtype_np': np.uint8,
'resize_method': 'bilinear'},
{'mask_dtype': tf.uint8, 'mask_dtype_np': np.uint8,
'resize_method': 'nearest'},
)
def testZeroImageOnEmptyMask(self, mask_dtype, mask_dtype_np, resize_method):
np_expected_image_masks = np.array([[[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0]]], dtype=np.float32)
[0, 0, 0, 0]]])
def graph_fn():
box_masks = tf.constant([[[0, 0],
[0, 0]]], dtype=tf.float32)
[0, 0]]], dtype=mask_dtype)
boxes = tf.constant([[0.0, 0.0, 1.0, 1.0]], dtype=tf.float32)
image_masks = ops.reframe_box_masks_to_image_masks(box_masks, boxes,
image_height=4,
image_width=4)
image_masks = ops.reframe_box_masks_to_image_masks(
box_masks, boxes, image_height=4, image_width=4,
resize_method=resize_method)
return image_masks
np_image_masks = self.execute_cpu(graph_fn, [])
self.assertEqual(np_image_masks.dtype, mask_dtype_np)
self.assertAllClose(np_image_masks, np_expected_image_masks)
def testZeroBoxMasks(self):
@parameterized.parameters(
{'mask_dtype': tf.float32, 'mask_dtype_np': np.float32,
'resize_method': 'bilinear'},
{'mask_dtype': tf.float32, 'mask_dtype_np': np.float32,
'resize_method': 'nearest'},
{'mask_dtype': tf.uint8, 'mask_dtype_np': np.uint8,
'resize_method': 'bilinear'},
{'mask_dtype': tf.uint8, 'mask_dtype_np': np.uint8,
'resize_method': 'nearest'},
)
def testZeroBoxMasks(self, mask_dtype, mask_dtype_np, resize_method):
def graph_fn():
box_masks = tf.zeros([0, 3, 3], dtype=tf.float32)
box_masks = tf.zeros([0, 3, 3], dtype=mask_dtype)
boxes = tf.zeros([0, 4], dtype=tf.float32)
image_masks = ops.reframe_box_masks_to_image_masks(box_masks, boxes,
image_height=4,
image_width=4)
image_masks = ops.reframe_box_masks_to_image_masks(
box_masks, boxes, image_height=4, image_width=4,
resize_method=resize_method)
return image_masks
np_image_masks = self.execute_cpu(graph_fn, [])
self.assertEqual(np_image_masks.dtype, mask_dtype_np)
self.assertAllEqual(np_image_masks.shape, np.array([0, 4, 4]))
def testBoxWithZeroArea(self):
......@@ -1235,40 +1260,70 @@ class ReframeBoxMasksToImageMasksTest(test_case.TestCase):
np_image_masks = self.execute_cpu(graph_fn, [])
self.assertAllEqual(np_image_masks.shape, np.array([1, 4, 4]))
def testMaskIsCenteredInImageWhenBoxIsCentered(self):
@parameterized.parameters(
{'mask_dtype': tf.float32, 'mask_dtype_np': np.float32,
'resize_method': 'bilinear'},
{'mask_dtype': tf.float32, 'mask_dtype_np': np.float32,
'resize_method': 'nearest'},
{'mask_dtype': tf.uint8, 'mask_dtype_np': np.uint8,
'resize_method': 'bilinear'},
{'mask_dtype': tf.uint8, 'mask_dtype_np': np.uint8,
'resize_method': 'nearest'},
)
def testMaskIsCenteredInImageWhenBoxIsCentered(self, mask_dtype,
mask_dtype_np, resize_method):
def graph_fn():
box_masks = tf.constant([[[1, 1],
[1, 1]]], dtype=tf.float32)
box_masks = tf.constant([[[4, 4],
[4, 4]]], dtype=mask_dtype)
boxes = tf.constant([[0.25, 0.25, 0.75, 0.75]], dtype=tf.float32)
image_masks = ops.reframe_box_masks_to_image_masks(box_masks, boxes,
image_height=4,
image_width=4)
image_masks = ops.reframe_box_masks_to_image_masks(
box_masks, boxes, image_height=4, image_width=4,
resize_method=resize_method)
return image_masks
np_expected_image_masks = np.array([[[0, 0, 0, 0],
[0, 1, 1, 0],
[0, 1, 1, 0],
[0, 0, 0, 0]]], dtype=np.float32)
[0, 4, 4, 0],
[0, 4, 4, 0],
[0, 0, 0, 0]]], dtype=mask_dtype_np)
np_image_masks = self.execute_cpu(graph_fn, [])
self.assertEqual(np_image_masks.dtype, mask_dtype_np)
self.assertAllClose(np_image_masks, np_expected_image_masks)
def testMaskOffCenterRemainsOffCenterInImage(self):
@parameterized.parameters(
{'mask_dtype': tf.float32, 'mask_dtype_np': np.float32,
'resize_method': 'bilinear'},
{'mask_dtype': tf.float32, 'mask_dtype_np': np.float32,
'resize_method': 'nearest'},
{'mask_dtype': tf.uint8, 'mask_dtype_np': np.uint8,
'resize_method': 'bilinear'},
{'mask_dtype': tf.uint8, 'mask_dtype_np': np.uint8,
'resize_method': 'nearest'},
)
def testMaskOffCenterRemainsOffCenterInImage(self, mask_dtype,
mask_dtype_np, resize_method):
def graph_fn():
box_masks = tf.constant([[[1, 0],
[0, 1]]], dtype=tf.float32)
[0, 1]]], dtype=mask_dtype)
boxes = tf.constant([[0.25, 0.5, 0.75, 1.0]], dtype=tf.float32)
image_masks = ops.reframe_box_masks_to_image_masks(box_masks, boxes,
image_height=4,
image_width=4)
image_masks = ops.reframe_box_masks_to_image_masks(
box_masks, boxes, image_height=4, image_width=4,
resize_method=resize_method)
return image_masks
np_expected_image_masks = np.array([[[0, 0, 0, 0],
[0, 0, 0.6111111, 0.16666669],
[0, 0, 0.3888889, 0.83333337],
[0, 0, 0, 0]]], dtype=np.float32)
if mask_dtype == tf.float32 and resize_method == 'bilinear':
np_expected_image_masks = np.array([[[0, 0, 0, 0],
[0, 0, 0.6111111, 0.16666669],
[0, 0, 0.3888889, 0.83333337],
[0, 0, 0, 0]]], dtype=np.float32)
else:
np_expected_image_masks = np.array([[[0, 0, 0, 0],
[0, 0, 1, 0],
[0, 0, 0, 1],
[0, 0, 0, 0]]], dtype=mask_dtype_np)
np_image_masks = self.execute_cpu(graph_fn, [])
self.assertEqual(np_image_masks.dtype, mask_dtype_np)
self.assertAllClose(np_image_masks, np_expected_image_masks)
......
......@@ -411,6 +411,56 @@ def multilevel_roi_align(features, boxes, box_levels, output_size,
return features_per_box
def multilevel_native_crop_and_resize(images, boxes, box_levels,
crop_size, scope=None):
"""Multilevel native crop and resize.
Same as `multilevel_matmul_crop_and_resize` but uses tf.image.crop_and_resize.
Args:
images: A list of 4-D tensor of shape
[batch, image_height, image_width, depth] representing features of
different size.
boxes: A `Tensor` of type `float32`.
A 3-D tensor of shape `[batch, num_boxes, 4]`. The boxes are specified in
normalized coordinates and are of the form `[y1, x1, y2, x2]`. A
normalized coordinate value of `y` is mapped to the image coordinate at
`y * (image_height - 1)`, so as the `[0, 1]` interval of normalized image
height is mapped to `[0, image_height - 1] in image height coordinates.
We do allow y1 > y2, in which case the sampled crop is an up-down flipped
version of the original image. The width dimension is treated similarly.
Normalized coordinates outside the `[0, 1]` range are allowed, in which
case we use `extrapolation_value` to extrapolate the input image values.
box_levels: A 2-D tensor of shape [batch, num_boxes] representing the level
of the box.
crop_size: A list of two integers `[crop_height, crop_width]`. All
cropped image patches are resized to this size. The aspect ratio of the
image content is not preserved. Both `crop_height` and `crop_width` need
to be positive.
scope: A name for the operation (optional).
Returns:
A 5-D float tensor of shape `[batch, num_boxes, crop_height, crop_width,
depth]`
"""
if box_levels is None:
return native_crop_and_resize(images[0], boxes, crop_size, scope)
with tf.name_scope('MultiLevelNativeCropAndResize'):
cropped_feature_list = []
for level, image in enumerate(images):
# For each level, crop the feature according to all boxes
# set the cropped feature not at this level to 0 tensor.
# Consider more efficient way of computing cropped features.
cropped = native_crop_and_resize(image, boxes, crop_size, scope)
cond = tf.tile(
tf.equal(box_levels, level)[:, :, tf.newaxis],
[1, 1] + [tf.math.reduce_prod(cropped.shape.as_list()[2:])])
cond = tf.reshape(cond, cropped.shape)
cropped_final = tf.where(cond, cropped, tf.zeros_like(cropped))
cropped_feature_list.append(cropped_final)
return tf.math.reduce_sum(cropped_feature_list, axis=0)
def native_crop_and_resize(image, boxes, crop_size, scope=None):
"""Same as `matmul_crop_and_resize` but uses tf.image.crop_and_resize."""
def get_box_inds(proposals):
......@@ -431,6 +481,50 @@ def native_crop_and_resize(image, boxes, crop_size, scope=None):
return tf.reshape(cropped_regions, final_shape)
def multilevel_matmul_crop_and_resize(images, boxes, box_levels, crop_size,
extrapolation_value=0.0, scope=None):
"""Multilevel matmul crop and resize.
Same as `matmul_crop_and_resize` but crop images according to box levels.
Args:
images: A list of 4-D tensor of shape
[batch, image_height, image_width, depth] representing features of
different size.
boxes: A `Tensor` of type `float32` or 'bfloat16'.
A 3-D tensor of shape `[batch, num_boxes, 4]`. The boxes are specified in
normalized coordinates and are of the form `[y1, x1, y2, x2]`. A
normalized coordinate value of `y` is mapped to the image coordinate at
`y * (image_height - 1)`, so as the `[0, 1]` interval of normalized image
height is mapped to `[0, image_height - 1] in image height coordinates.
We do allow y1 > y2, in which case the sampled crop is an up-down flipped
version of the original image. The width dimension is treated similarly.
Normalized coordinates outside the `[0, 1]` range are allowed, in which
case we use `extrapolation_value` to extrapolate the input image values.
box_levels: A 2-D tensor of shape [batch, num_boxes] representing the level
of the box.
crop_size: A list of two integers `[crop_height, crop_width]`. All
cropped image patches are resized to this size. The aspect ratio of the
image content is not preserved. Both `crop_height` and `crop_width` need
to be positive.
extrapolation_value: A float value to use for extrapolation.
scope: A name for the operation (optional).
Returns:
A 5-D float tensor of shape `[batch, num_boxes, crop_height, crop_width,
depth]`
"""
with tf.name_scope(scope, 'MultiLevelMatMulCropAndResize'):
if box_levels is None:
box_levels = tf.zeros(tf.shape(boxes)[:2], dtype=tf.int32)
return multilevel_roi_align(images,
boxes,
box_levels,
crop_size,
align_corners=True,
extrapolation_value=extrapolation_value)
def matmul_crop_and_resize(image, boxes, crop_size, extrapolation_value=0.0,
scope=None):
"""Matrix multiplication based implementation of the crop and resize op.
......
......@@ -512,6 +512,38 @@ class MatMulCropAndResizeTest(test_case.TestCase):
crop_output = self.execute(graph_fn, [image, boxes])
self.assertAllClose(crop_output, expected_output)
def testMultilevelMatMulCropAndResize(self):
def graph_fn(image1, image2, boxes, box_levels):
return spatial_ops.multilevel_matmul_crop_and_resize([image1, image2],
boxes,
box_levels,
crop_size=[2, 2])
image = [np.array([[[[1, 0], [2, 0], [3, 0]],
[[4, 0], [5, 0], [6, 0]],
[[7, 0], [8, 0], [9, 0]]],
[[[1, 0], [2, 0], [3, 0]],
[[4, 0], [5, 0], [6, 0]],
[[7, 0], [8, 0], [9, 0]]]], dtype=np.float32),
np.array([[[[1, 0], [2, 1], [3, 2]],
[[4, 3], [5, 4], [6, 5]],
[[7, 6], [8, 7], [9, 8]]],
[[[1, 0], [2, 1], [3, 2]],
[[4, 3], [5, 4], [6, 5]],
[[7, 6], [8, 7], [9, 8]]]], dtype=np.float32)]
boxes = np.array([[[1, 1, 0, 0],
[.5, .5, 0, 0]],
[[0, 0, 1, 1],
[0, 0, .5, .5]]], dtype=np.float32)
box_levels = np.array([[0, 1], [1, 1]], dtype=np.int32)
expected_output = [[[[[9, 0], [7, 0]], [[3, 0], [1, 0]]],
[[[5, 4], [4, 3]], [[2, 1], [1, 0]]]],
[[[[1, 0], [3, 2]], [[7, 6], [9, 8]]],
[[[1, 0], [2, 1]], [[4, 3], [5, 4]]]]]
crop_output = self.execute(graph_fn, image + [boxes, box_levels])
self.assertAllClose(crop_output, expected_output)
class NativeCropAndResizeTest(test_case.TestCase):
......@@ -537,6 +569,35 @@ class NativeCropAndResizeTest(test_case.TestCase):
crop_output = self.execute_cpu(graph_fn, [image, boxes])
self.assertAllClose(crop_output, expected_output)
def testMultilevelBatchCropAndResize3x3To2x2_2Channels(self):
def graph_fn(image1, image2, boxes, box_levels):
return spatial_ops.multilevel_native_crop_and_resize([image1, image2],
boxes,
box_levels,
crop_size=[2, 2])
image = [np.array([[[[1, 0], [2, 1], [3, 2]],
[[4, 3], [5, 4], [6, 5]],
[[7, 6], [8, 7], [9, 8]]],
[[[1, 0], [2, 1], [3, 2]],
[[4, 3], [5, 4], [6, 5]],
[[7, 6], [8, 7], [9, 8]]]], dtype=np.float32),
np.array([[[[1, 0], [2, 1]],
[[4, 3], [5, 4]]],
[[[1, 0], [2, 1]],
[[4, 3], [5, 4]]]], dtype=np.float32)]
boxes = np.array([[[0, 0, 1, 1],
[0, 0, .5, .5]],
[[1, 1, 0, 0],
[.5, .5, 0, 0]]], dtype=np.float32)
box_levels = np.array([[0, 1], [0, 0]], dtype=np.float32)
expected_output = [[[[[1, 0], [3, 2]], [[7, 6], [9, 8]]],
[[[1, 0], [1.5, 0.5]], [[2.5, 1.5], [3, 2]]]],
[[[[9, 8], [7, 6]], [[3, 2], [1, 0]]],
[[[5, 4], [4, 3]], [[2, 1], [1, 0]]]]]
crop_output = self.execute_cpu(graph_fn, image + [boxes, box_levels])
self.assertAllClose(crop_output, expected_output)
if __name__ == '__main__':
tf.test.main()
......@@ -790,6 +790,81 @@ def draw_side_by_side_evaluation_image(eval_dict,
return images_with_detections_list
def draw_densepose_visualizations(eval_dict,
max_boxes_to_draw=20,
min_score_thresh=0.2,
num_parts=24,
dp_coord_to_visualize=0):
"""Draws DensePose visualizations.
Args:
eval_dict: The evaluation dictionary returned by
eval_util.result_dict_for_batched_example().
max_boxes_to_draw: The maximum number of boxes to draw for detections.
min_score_thresh: The minimum score threshold for showing detections.
num_parts: The number of different densepose parts.
dp_coord_to_visualize: Whether to visualize v-coordinates (0) or
u-coordinates (0) overlaid on the person masks.
Returns:
A list of [1, H, W, C] uint8 tensor, each element corresponding to an image
in the batch.
Raises:
ValueError: If `dp_coord_to_visualize` is not 0 or 1.
"""
if dp_coord_to_visualize not in (0, 1):
raise ValueError('`dp_coord_to_visualize` must be either 0 for v '
'coordinates), or 1 for u coordinates, but instead got '
'{}'.format(dp_coord_to_visualize))
detection_fields = fields.DetectionResultFields()
input_data_fields = fields.InputDataFields()
if detection_fields.detection_masks not in eval_dict:
raise ValueError('Expected `detection_masks` in `eval_dict`.')
if detection_fields.detection_surface_coords not in eval_dict:
raise ValueError('Expected `detection_surface_coords` in `eval_dict`.')
images_with_detections_list = []
for indx in range(eval_dict[input_data_fields.original_image].shape[0]):
# Note that detection masks have already been resized to the original image
# shapes, but `original_image` has not.
# TODO(ronnyvotel): Consider resizing `original_image` in
# eval_util.result_dict_for_batched_example().
true_shape = eval_dict[input_data_fields.true_image_shape][indx]
original_shape = eval_dict[
input_data_fields.original_image_spatial_shape][indx]
image = eval_dict[input_data_fields.original_image][indx]
image = shape_utils.pad_or_clip_nd(image, [true_shape[0], true_shape[1], 3])
image = _resize_original_image(image, original_shape)
scores = eval_dict[detection_fields.detection_scores][indx]
detection_masks = eval_dict[detection_fields.detection_masks][indx]
surface_coords = eval_dict[detection_fields.detection_surface_coords][indx]
def draw_densepose_py_func(image, detection_masks, surface_coords, scores):
"""Overlays part masks and surface coords on original images."""
surface_coord_image = np.copy(image)
for i, (score, surface_coord, mask) in enumerate(
zip(scores, surface_coords, detection_masks)):
if i == max_boxes_to_draw:
break
if score > min_score_thresh:
draw_part_mask_on_image_array(image, mask, num_parts=num_parts)
draw_float_channel_on_image_array(
surface_coord_image, surface_coord[:, :, dp_coord_to_visualize],
mask)
return np.concatenate([image, surface_coord_image], axis=1)
image_with_densepose = tf.py_func(
draw_densepose_py_func,
[image, detection_masks, surface_coords, scores],
tf.uint8)
images_with_detections_list.append(
image_with_densepose[tf.newaxis, :, :, :])
return images_with_detections_list
def draw_keypoints_on_image_array(image,
keypoints,
keypoint_scores=None,
......@@ -918,8 +993,6 @@ def draw_mask_on_image_array(image, mask, color='red', alpha=0.4):
raise ValueError('`image` not of type np.uint8')
if mask.dtype != np.uint8:
raise ValueError('`mask` not of type np.uint8')
if np.any(np.logical_and(mask != 1, mask != 0)):
raise ValueError('`mask` elements should be in [0, 1]')
if image.shape[:2] != mask.shape:
raise ValueError('The image has spatial dimensions %s but the mask has '
'dimensions %s' % (image.shape[:2], mask.shape))
......@@ -929,11 +1002,85 @@ def draw_mask_on_image_array(image, mask, color='red', alpha=0.4):
solid_color = np.expand_dims(
np.ones_like(mask), axis=2) * np.reshape(list(rgb), [1, 1, 3])
pil_solid_color = Image.fromarray(np.uint8(solid_color)).convert('RGBA')
pil_mask = Image.fromarray(np.uint8(255.0*alpha*mask)).convert('L')
pil_mask = Image.fromarray(np.uint8(255.0*alpha*(mask > 0))).convert('L')
pil_image = Image.composite(pil_solid_color, pil_image, pil_mask)
np.copyto(image, np.array(pil_image.convert('RGB')))
def draw_part_mask_on_image_array(image, mask, alpha=0.4, num_parts=24):
"""Draws part mask on an image.
Args:
image: uint8 numpy array with shape (img_height, img_height, 3)
mask: a uint8 numpy array of shape (img_height, img_height) with
1-indexed parts (0 for background).
alpha: transparency value between 0 and 1 (default: 0.4)
num_parts: the maximum number of parts that may exist in the image (default
24 for DensePose).
Raises:
ValueError: On incorrect data type for image or masks.
"""
if image.dtype != np.uint8:
raise ValueError('`image` not of type np.uint8')
if mask.dtype != np.uint8:
raise ValueError('`mask` not of type np.uint8')
if image.shape[:2] != mask.shape:
raise ValueError('The image has spatial dimensions %s but the mask has '
'dimensions %s' % (image.shape[:2], mask.shape))
pil_image = Image.fromarray(image)
part_colors = np.zeros_like(image)
mask_1_channel = mask[:, :, np.newaxis]
for i, color in enumerate(STANDARD_COLORS[:num_parts]):
rgb = np.array(ImageColor.getrgb(color), dtype=np.uint8)
part_colors += (mask_1_channel == i + 1) * rgb[np.newaxis, np.newaxis, :]
pil_part_colors = Image.fromarray(np.uint8(part_colors)).convert('RGBA')
pil_mask = Image.fromarray(np.uint8(255.0 * alpha * (mask > 0))).convert('L')
pil_image = Image.composite(pil_part_colors, pil_image, pil_mask)
np.copyto(image, np.array(pil_image.convert('RGB')))
def draw_float_channel_on_image_array(image, channel, mask, alpha=0.9,
cmap='YlGn'):
"""Draws a floating point channel on an image array.
Args:
image: uint8 numpy array with shape (img_height, img_height, 3)
channel: float32 numpy array with shape (img_height, img_height). The values
should be in the range [0, 1], and will be mapped to colors using the
provided colormap `cmap` argument.
mask: a uint8 numpy array of shape (img_height, img_height) with
1-indexed parts (0 for background).
alpha: transparency value between 0 and 1 (default: 0.9)
cmap: string with the colormap to use.
Raises:
ValueError: On incorrect data type for image or masks.
"""
if image.dtype != np.uint8:
raise ValueError('`image` not of type np.uint8')
if channel.dtype != np.float32:
raise ValueError('`channel` not of type np.float32')
if mask.dtype != np.uint8:
raise ValueError('`mask` not of type np.uint8')
if image.shape[:2] != channel.shape:
raise ValueError('The image has spatial dimensions %s but the channel has '
'dimensions %s' % (image.shape[:2], channel.shape))
if image.shape[:2] != mask.shape:
raise ValueError('The image has spatial dimensions %s but the mask has '
'dimensions %s' % (image.shape[:2], mask.shape))
cm = plt.get_cmap(cmap)
pil_image = Image.fromarray(image)
colored_channel = cm(channel)[:, :, :3]
pil_colored_channel = Image.fromarray(
np.uint8(colored_channel * 255)).convert('RGBA')
pil_mask = Image.fromarray(np.uint8(255.0 * alpha * (mask > 0))).convert('L')
pil_image = Image.composite(pil_colored_channel, pil_image, pil_mask)
np.copyto(image, np.array(pil_image.convert('RGB')))
def visualize_boxes_and_labels_on_image_array(
image,
boxes,
......@@ -973,8 +1120,8 @@ def visualize_boxes_and_labels_on_image_array(
boxes and plot all boxes as black with no classes or scores.
category_index: a dict containing category dictionaries (each holding
category index `id` and category name `name`) keyed by category indices.
instance_masks: a numpy array of shape [N, image_height, image_width] with
values ranging between 0 and 1, can be None.
instance_masks: a uint8 numpy array of shape [N, image_height, image_width],
can be None.
instance_boundaries: a numpy array of shape [N, image_height, image_width]
with values ranging between 0 and 1, can be None.
keypoints: a numpy array of shape [N, num_keypoints, 2], can
......
......@@ -373,6 +373,38 @@ class VisualizationUtilsTest(test_case.TestCase):
color='Blue', alpha=.5)
self.assertAllEqual(test_image, expected_result)
def test_draw_part_mask_on_image_array(self):
test_image = np.asarray([[[0, 0, 0], [0, 0, 0]],
[[0, 0, 0], [0, 0, 0]]], dtype=np.uint8)
mask = np.asarray([[0, 1],
[1, 6]], dtype=np.uint8)
visualization_utils.draw_part_mask_on_image_array(test_image, mask,
alpha=.5)
self.assertAllEqual([0, 0, 0], test_image[0, 0])
self.assertAllGreater(test_image[0, 1], 0)
self.assertAllGreater(test_image[1, 0], 0)
self.assertAllGreater(test_image[1, 1], 0)
self.assertAllEqual(test_image[0, 1], test_image[1, 0])
def test_draw_float_channel_on_image_array(self):
test_image = np.asarray([[[0, 0, 0], [0, 0, 0]],
[[0, 0, 0], [0, 0, 0]]], dtype=np.uint8)
channel = np.asarray([[0., 0.5],
[0., 1.]], dtype=np.float32)
mask = np.asarray([[0, 1],
[1, 1]], dtype=np.uint8)
# The colormap ('bwr') maps the values as follows:
# 0.0 -> Blue
# 0.5 -> White
# 1.0 -> Red
visualization_utils.draw_float_channel_on_image_array(
test_image, channel, mask, alpha=1.0, cmap='bwr')
expected_result = np.asarray([[[0, 0, 0], [255, 254, 254]],
[[0, 0, 255], [255, 0, 0]]], dtype=np.uint8)
self.assertAllEqual(test_image, expected_result)
def test_draw_heatmaps_on_image(self):
test_image = self.create_colorful_test_image()
test_image = Image.fromarray(test_image)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment