Commit 2e9bb539 authored by stephenwu's avatar stephenwu
Browse files

Merge branch 'master' of https://github.com/tensorflow/models into RTESuperGLUE

parents 7bae5317 8fba84f8
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Bounding box utils."""
import math
import tensorflow as tf
def yxyx_to_xcycwh(box: tf.Tensor):
"""Converts boxes from ymin, xmin, ymax, xmax.
to x_center, y_center, width, height.
Args:
box: `Tensor` whose shape is [..., 4] and represents the coordinates
of boxes in ymin, xmin, ymax, xmax.
Returns:
`Tensor` whose shape is [..., 4] and contains the new format.
Raises:
ValueError: If the last dimension of box is not 4 or if box's dtype isn't
a floating point type.
"""
with tf.name_scope('yxyx_to_xcycwh'):
ymin, xmin, ymax, xmax = tf.split(box, 4, axis=-1)
x_center = (xmax + xmin) / 2
y_center = (ymax + ymin) / 2
width = xmax - xmin
height = ymax - ymin
box = tf.concat([x_center, y_center, width, height], axis=-1)
return box
def xcycwh_to_yxyx(box: tf.Tensor, split_min_max: bool = False):
"""Converts boxes from x_center, y_center, width, height.
to ymin, xmin, ymax, xmax.
Args:
box: a `Tensor` whose shape is [..., 4] and represents the coordinates
of boxes in x_center, y_center, width, height.
split_min_max: bool, whether or not to split x, y min and max values.
Returns:
box: a `Tensor` whose shape is [..., 4] and contains the new format.
Raises:
ValueError: If the last dimension of box is not 4 or if box's dtype isn't
a floating point type.
"""
with tf.name_scope('xcycwh_to_yxyx'):
xy, wh = tf.split(box, 2, axis=-1)
xy_min = xy - wh / 2
xy_max = xy + wh / 2
x_min, y_min = tf.split(xy_min, 2, axis=-1)
x_max, y_max = tf.split(xy_max, 2, axis=-1)
box = tf.concat([y_min, x_min, y_max, x_max], axis=-1)
if split_min_max:
box = tf.split(box, 2, axis=-1)
return box
def xcycwh_to_xyxy(box: tf.Tensor, split_min_max: bool = False):
"""Converts boxes from x_center, y_center, width, height to.
xmin, ymin, xmax, ymax.
Args:
box: box: a `Tensor` whose shape is [..., 4] and represents the
coordinates of boxes in x_center, y_center, width, height.
split_min_max: bool, whether or not to split x, y min and max values.
Returns:
box: a `Tensor` whose shape is [..., 4] and contains the new format.
Raises:
ValueError: If the last dimension of box is not 4 or if box's dtype isn't
a floating point type.
"""
with tf.name_scope('xcycwh_to_yxyx'):
xy, wh = tf.split(box, 2, axis=-1)
xy_min = xy - wh / 2
xy_max = xy + wh / 2
box = (xy_min, xy_max)
if not split_min_max:
box = tf.concat(box, axis=-1)
return box
def center_distance(center_1: tf.Tensor, center_2: tf.Tensor):
"""Calculates the squared distance between two points.
This function is mathematically equivalent to the following code, but has
smaller rounding errors.
tf.norm(center_1 - center_2, axis=-1)**2
Args:
center_1: a `Tensor` whose shape is [..., 2] and represents a point.
center_2: a `Tensor` whose shape is [..., 2] and represents a point.
Returns:
dist: a `Tensor` whose shape is [...] and value represents the squared
distance between center_1 and center_2.
Raises:
ValueError: If the last dimension of either center_1 or center_2 is not 2.
"""
with tf.name_scope('center_distance'):
dist = (center_1[..., 0] - center_2[..., 0])**2 + (center_1[..., 1] -
center_2[..., 1])**2
return dist
def compute_iou(box1, box2, yxyx=False):
"""Calculates the intersection of union between box1 and box2.
Args:
box1: a `Tensor` whose shape is [..., 4] and represents the coordinates of
boxes in x_center, y_center, width, height.
box2: a `Tensor` whose shape is [..., 4] and represents the coordinates of
boxes in x_center, y_center, width, height.
yxyx: `bool`, whether or not box1, and box2 are in yxyx format.
Returns:
iou: a `Tensor` whose shape is [...] and value represents the intersection
over union.
Raises:
ValueError: If the last dimension of either box1 or box2 is not 4.
"""
# Get box corners
with tf.name_scope('iou'):
if not yxyx:
box1 = xcycwh_to_yxyx(box1)
box2 = xcycwh_to_yxyx(box2)
b1mi, b1ma = tf.split(box1, 2, axis=-1)
b2mi, b2ma = tf.split(box2, 2, axis=-1)
intersect_mins = tf.math.maximum(b1mi, b2mi)
intersect_maxes = tf.math.minimum(b1ma, b2ma)
intersect_wh = tf.math.maximum(intersect_maxes - intersect_mins,
tf.zeros_like(intersect_mins))
intersection = tf.reduce_prod(
intersect_wh, axis=-1) # intersect_wh[..., 0] * intersect_wh[..., 1]
box1_area = tf.math.abs(tf.reduce_prod(b1ma - b1mi, axis=-1))
box2_area = tf.math.abs(tf.reduce_prod(b2ma - b2mi, axis=-1))
union = box1_area + box2_area - intersection
iou = intersection / (union + 1e-7)
iou = tf.clip_by_value(iou, clip_value_min=0.0, clip_value_max=1.0)
return iou
def compute_giou(box1, box2):
"""Calculates the generalized intersection of union between box1 and box2.
Args:
box1: a `Tensor` whose shape is [..., 4] and represents the coordinates of
boxes in x_center, y_center, width, height.
box2: a `Tensor` whose shape is [..., 4] and represents the coordinates of
boxes in x_center, y_center, width, height.
Returns:
iou: a `Tensor` whose shape is [...] and value represents the generalized
intersection over union.
Raises:
ValueError: If the last dimension of either box1 or box2 is not 4.
"""
with tf.name_scope('giou'):
# get box corners
box1 = xcycwh_to_yxyx(box1)
box2 = xcycwh_to_yxyx(box2)
# compute IOU
intersect_mins = tf.math.maximum(box1[..., 0:2], box2[..., 0:2])
intersect_maxes = tf.math.minimum(box1[..., 2:4], box2[..., 2:4])
intersect_wh = tf.math.maximum(intersect_maxes - intersect_mins,
tf.zeros_like(intersect_mins))
intersection = intersect_wh[..., 0] * intersect_wh[..., 1]
box1_area = tf.math.abs(
tf.reduce_prod(box1[..., 2:4] - box1[..., 0:2], axis=-1))
box2_area = tf.math.abs(
tf.reduce_prod(box2[..., 2:4] - box2[..., 0:2], axis=-1))
union = box1_area + box2_area - intersection
iou = tf.math.divide_no_nan(intersection, union)
iou = tf.clip_by_value(iou, clip_value_min=0.0, clip_value_max=1.0)
# find the smallest box to encompase both box1 and box2
c_mins = tf.math.minimum(box1[..., 0:2], box2[..., 0:2])
c_maxes = tf.math.maximum(box1[..., 2:4], box2[..., 2:4])
c = tf.math.abs(tf.reduce_prod(c_mins - c_maxes, axis=-1))
# compute giou
giou = iou - tf.math.divide_no_nan((c - union), c)
return iou, giou
def compute_diou(box1, box2):
"""Calculates the distance intersection of union between box1 and box2.
Args:
box1: a `Tensor` whose shape is [..., 4] and represents the coordinates of
boxes in x_center, y_center, width, height.
box2: a `Tensor` whose shape is [..., 4] and represents the coordinates of
boxes in x_center, y_center, width, height.
Returns:
iou: a `Tensor` whose shape is [...] and value represents the distance
intersection over union.
Raises:
ValueError: If the last dimension of either box1 or box2 is not 4.
"""
with tf.name_scope('diou'):
# compute center distance
dist = center_distance(box1[..., 0:2], box2[..., 0:2])
# get box corners
box1 = xcycwh_to_yxyx(box1)
box2 = xcycwh_to_yxyx(box2)
# compute IOU
intersect_mins = tf.math.maximum(box1[..., 0:2], box2[..., 0:2])
intersect_maxes = tf.math.minimum(box1[..., 2:4], box2[..., 2:4])
intersect_wh = tf.math.maximum(intersect_maxes - intersect_mins,
tf.zeros_like(intersect_mins))
intersection = intersect_wh[..., 0] * intersect_wh[..., 1]
box1_area = tf.math.abs(
tf.reduce_prod(box1[..., 2:4] - box1[..., 0:2], axis=-1))
box2_area = tf.math.abs(
tf.reduce_prod(box2[..., 2:4] - box2[..., 0:2], axis=-1))
union = box1_area + box2_area - intersection
iou = tf.math.divide_no_nan(intersection, union)
iou = tf.clip_by_value(iou, clip_value_min=0.0, clip_value_max=1.0)
# compute max diagnal of the smallest enclosing box
c_mins = tf.math.minimum(box1[..., 0:2], box2[..., 0:2])
c_maxes = tf.math.maximum(box1[..., 2:4], box2[..., 2:4])
diag_dist = tf.reduce_sum((c_maxes - c_mins)**2, axis=-1)
regularization = tf.math.divide_no_nan(dist, diag_dist)
diou = iou + regularization
return iou, diou
def compute_ciou(box1, box2):
"""Calculates the complete intersection of union between box1 and box2.
Args:
box1: a `Tensor` whose shape is [..., 4] and represents the coordinates
of boxes in x_center, y_center, width, height.
box2: a `Tensor` whose shape is [..., 4] and represents the coordinates of
boxes in x_center, y_center, width, height.
Returns:
iou: a `Tensor` whose shape is [...] and value represents the complete
intersection over union.
Raises:
ValueError: If the last dimension of either box1 or box2 is not 4.
"""
with tf.name_scope('ciou'):
# compute DIOU and IOU
iou, diou = compute_diou(box1, box2)
# computer aspect ratio consistency
arcterm = (
tf.math.atan(tf.math.divide_no_nan(box1[..., 2], box1[..., 3])) -
tf.math.atan(tf.math.divide_no_nan(box2[..., 2], box2[..., 3])))**2
v = 4 * arcterm / (math.pi)**2
# compute IOU regularization
a = tf.math.divide_no_nan(v, ((1 - iou) + v))
ciou = diou + v * a
return iou, ciou
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
from absl.testing import parameterized
import numpy as np
import tensorflow as tf
from official.vision.beta.projects.yolo.ops import box_ops
class InputUtilsTest(parameterized.TestCase, tf.test.TestCase):
@parameterized.parameters((1), (4))
def test_box_conversions(self, num_boxes):
boxes = tf.convert_to_tensor(np.random.rand(num_boxes, 4))
expected_shape = np.array([num_boxes, 4])
xywh_box = box_ops.yxyx_to_xcycwh(boxes)
yxyx_box = box_ops.xcycwh_to_yxyx(boxes)
xyxy_box = box_ops.xcycwh_to_xyxy(boxes)
self.assertAllEqual(tf.shape(xywh_box).numpy(), expected_shape)
self.assertAllEqual(tf.shape(yxyx_box).numpy(), expected_shape)
self.assertAllEqual(tf.shape(xyxy_box).numpy(), expected_shape)
@parameterized.parameters((1), (5), (7))
def test_ious(self, num_boxes):
boxes = tf.convert_to_tensor(np.random.rand(num_boxes, 4))
expected_shape = np.array([
num_boxes,
])
expected_iou = np.ones([
num_boxes,
])
iou = box_ops.compute_iou(boxes, boxes)
_, giou = box_ops.compute_giou(boxes, boxes)
_, ciou = box_ops.compute_ciou(boxes, boxes)
_, diou = box_ops.compute_diou(boxes, boxes)
self.assertAllEqual(tf.shape(iou).numpy(), expected_shape)
self.assertArrayNear(iou, expected_iou, 0.001)
self.assertArrayNear(giou, expected_iou, 0.001)
self.assertArrayNear(ciou, expected_iou, 0.001)
self.assertArrayNear(diou, expected_iou, 0.001)
if __name__ == '__main__':
tf.test.main()
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Yolo preprocess ops."""
import tensorflow as tf
import tensorflow_addons as tfa
from official.vision.beta.projects.yolo.ops import box_ops
def resize_crop_filter(image, boxes, default_width, default_height,
target_width, target_height):
"""Apply zooming to the image and boxes.
Args:
image: a `Tensor` representing the image.
boxes: a `Tensor` represeting the boxes.
default_width: a `Tensor` representing the width of the image.
default_height: a `Tensor` representing the height of the image.
target_width: a `Tensor` representing the desired width of the image.
target_height: a `Tensor` representing the desired height of the image.
Returns:
images: a `Tensor` representing the augmented image.
boxes: a `Tensor` representing the augmented boxes.
"""
with tf.name_scope('resize_crop_filter'):
image = tf.image.resize(image, (target_width, target_height))
image = tf.image.resize_with_crop_or_pad(image,
target_height=default_height,
target_width=default_width)
default_width = tf.cast(default_width, boxes.dtype)
default_height = tf.cast(default_height, boxes.dtype)
target_width = tf.cast(target_width, boxes.dtype)
target_height = tf.cast(target_height, boxes.dtype)
aspect_change_width = target_width / default_width
aspect_change_height = target_height / default_height
x, y, width, height = tf.split(boxes, 4, axis=-1)
x = (x - 0.5) * target_width / default_width + 0.5
y = (y - 0.5) * target_height / default_height + 0.5
width = width * aspect_change_width
height = height * aspect_change_height
boxes = tf.concat([x, y, width, height], axis=-1)
return image, boxes
def random_translate(image, box, t, seed=None):
"""Randomly translate the image and boxes.
Args:
image: a `Tensor` representing the image.
box: a `Tensor` represeting the boxes.
t: an `int` representing the translation factor
seed: an optional seed for tf.random operations
Returns:
image: a `Tensor` representing the augmented image.
box: a `Tensor` representing the augmented boxes.
"""
t_x = tf.random.uniform(minval=-t,
maxval=t,
shape=(),
dtype=tf.float32,
seed=seed)
t_y = tf.random.uniform(minval=-t,
maxval=t,
shape=(),
dtype=tf.float32,
seed=seed)
box = translate_boxes(box, t_x, t_y)
image = translate_image(image, t_x, t_y)
return image, box
def translate_boxes(box, translate_x, translate_y):
"""Randomly translate the boxes.
Args:
box: a `Tensor` represeitng the boxes.
translate_x: a `Tensor` represting the translation on the x-axis.
translate_y: a `Tensor` represting the translation on the y-axis.
Returns:
box: a `Tensor` representing the augmented boxes.
"""
with tf.name_scope('translate_boxs'):
x = box[..., 0] + translate_x
y = box[..., 1] + translate_y
box = tf.stack([x, y, box[..., 2], box[..., 3]], axis=-1)
box.set_shape([None, 4])
return box
def translate_image(image, translate_x, translate_y):
"""Randomly translate the image.
Args:
image: a `Tensor` representing the image.
translate_x: a `Tensor` represting the translation on the x-axis.
translate_y: a `Tensor` represting the translation on the y-axis.
Returns:
box: a `Tensor` representing the augmented boxes.
"""
with tf.name_scope('translate_image'):
if (translate_x != 0 and translate_y != 0):
image_jitter = tf.convert_to_tensor([translate_x, translate_y])
image_jitter.set_shape([2])
image = tfa.image.translate(
image, image_jitter * tf.cast(tf.shape(image)[1], tf.float32))
return image
def pad_max_instances(value, instances, pad_value=0, pad_axis=0):
"""Pads tensors to max number of instances."""
shape = tf.shape(value)
dim1 = shape[pad_axis]
take = tf.math.reduce_min([instances, dim1])
value, _ = tf.split(value, [take, -1],
axis=pad_axis) # value[:instances, ...]
pad = tf.convert_to_tensor([tf.math.reduce_max([instances - dim1, 0])])
nshape = tf.concat([shape[:pad_axis], pad, shape[(pad_axis + 1):]], axis=0)
pad_tensor = tf.fill(nshape, tf.cast(pad_value, dtype=value.dtype))
value = tf.concat([value, pad_tensor], axis=pad_axis)
return value
def fit_preserve_aspect_ratio(image,
boxes,
width=None,
height=None,
target_dim=None):
"""Resizes the image while peserving the image aspect ratio.
Args:
image: a `Tensor` representing the image.
boxes: a `Tensor` representing the boxes.
width: int for the image width.
height: int for the image height.
target_dim: list or a Tensor of height and width.
Returns:
image: a `Tensor` representing the image.
box: a `Tensor` representing the boxes.
"""
if width is None or height is None:
shape = tf.shape(image)
if tf.shape(shape)[0] == 4:
width = shape[1]
height = shape[2]
else:
width = shape[0]
height = shape[1]
clipper = tf.math.maximum(width, height)
if target_dim is None:
target_dim = clipper
pad_width = clipper - width
pad_height = clipper - height
image = tf.image.pad_to_bounding_box(image, pad_width // 2, pad_height // 2,
clipper, clipper)
boxes = box_ops.yxyx_to_xcycwh(boxes)
x, y, w, h = tf.split(boxes, 4, axis=-1)
y *= tf.cast(width / clipper, tf.float32)
x *= tf.cast(height / clipper, tf.float32)
y += tf.cast((pad_width / clipper) / 2, tf.float32)
x += tf.cast((pad_height / clipper) / 2, tf.float32)
h *= tf.cast(width / clipper, tf.float32)
w *= tf.cast(height / clipper, tf.float32)
boxes = tf.concat([x, y, w, h], axis=-1)
boxes = box_ops.xcycwh_to_yxyx(boxes)
image = tf.image.resize(image, (target_dim, target_dim))
return image, boxes
def get_best_anchor(y_true, anchors, width=1, height=1):
"""Gets the correct anchor that is assoiciated with each box using IOU.
Args:
y_true: tf.Tensor[] for the list of bounding boxes in the yolo format
anchors: list or tensor for the anchor boxes to be used in prediction
found via Kmeans
width: int for the image width
height: int for the image height
Returns:
tf.Tensor: y_true with the anchor associated with each ground truth
box known.
"""
with tf.name_scope('get_anchor'):
width = tf.cast(width, dtype=tf.float32)
height = tf.cast(height, dtype=tf.float32)
# split the boxes into center and width height
anchor_xy = y_true[..., 0:2]
# scale thhe boxes
anchors = tf.convert_to_tensor(anchors, dtype=tf.float32)
anchors_x = anchors[..., 0] / width
anchors_y = anchors[..., 1] / height
anchors = tf.stack([anchors_x, anchors_y], axis=-1)
k = tf.shape(anchors)[0]
# build a matrix of anchor boxes of shape [num_anchors, num_boxes, 4]
anchors = tf.transpose(anchors, perm=[1, 0])
anchor_xy = tf.tile(tf.expand_dims(anchor_xy, axis=-1),
[1, 1, tf.shape(anchors)[-1]])
anchors = tf.tile(tf.expand_dims(anchors, axis=0),
[tf.shape(anchor_xy)[0], 1, 1])
# stack the xy so, each anchor is asscoaited once with each center from
# the ground truth input
anchors = tf.concat([anchor_xy, anchors], axis=1)
anchors = tf.transpose(anchors, perm=[2, 0, 1])
# copy the gt n times so that each anchor from above can be compared to
# input ground truth to shape: [num_anchors, num_boxes, 4]
truth_comp = tf.tile(tf.expand_dims(y_true[..., 0:4], axis=-1),
[1, 1, tf.shape(anchors)[0]])
truth_comp = tf.transpose(truth_comp, perm=[2, 0, 1])
# compute intersection over union of the boxes, and take the argmax of
# comuted iou for each box. thus each box is associated with the
# largest interection over union
iou_raw = box_ops.compute_iou(truth_comp, anchors)
values, indexes = tf.math.top_k(tf.transpose(iou_raw, perm=[1, 0]),
k=tf.cast(k, dtype=tf.int32),
sorted=True)
ind_mask = tf.cast(values > 0.213, dtype=indexes.dtype)
# pad the indexs such that all values less than the thresh are -1
# add one, multiply the mask to zeros all the bad locations
# subtract 1 makeing all the bad locations 0.
iou_index = tf.concat([
tf.keras.backend.expand_dims(indexes[..., 0], axis=-1),
((indexes[..., 1:] + 1) * ind_mask[..., 1:]) - 1
],
axis=-1)
iou_index = iou_index[..., :6]
return tf.cast(iou_index, dtype=tf.float32)
def build_grided_gt(y_true, mask, size, dtype, use_tie_breaker):
"""Converts ground truth for use in loss functions.
Args:
y_true: tf.Tensor[] ground truth
[box coords[0:4], classes_onehot[0:-1], best_fit_anchor_box]
mask: list of the anchor boxes choresponding to the output,
ex. [1, 2, 3] tells this layer to predict only the first 3
anchors in the total.
size: The dimensions of this output, for regular, it progresses
from 13, to 26, to 52.
dtype: The expected output dtype.
use_tie_breaker: boolean value for wether or not to use the tie_breaker.
Returns:
tf.Tensor[] of shape [size, size, #of_anchors, 4, 1, num_classes]
"""
# unpack required components from the input ground truth
boxes = tf.cast(y_true['bbox'], dtype)
classes = tf.expand_dims(tf.cast(y_true['classes'], dtype=dtype), axis=-1)
anchors = tf.cast(y_true['best_anchors'], dtype)
# get the number of boxes in the ground truth boxs
num_boxes = tf.shape(boxes)[0]
# get the number of anchor boxes used for this anchor scale
len_masks = tf.shape(mask)[0]
# init a fixed memeory size grid for this prediction scale
# [size, size, # of anchors, 1 + 1 + number of anchors per scale]
full = tf.zeros([size, size, len_masks, 6], dtype=dtype)
# init a grid to use to track which locations have already
# been used before (for the tie breaker)
depth_track = tf.zeros((size, size, len_masks), dtype=tf.int32)
# rescale the x and y centers to the size of the grid [size, size]
x = tf.cast(boxes[..., 0] * tf.cast(size, dtype=dtype), dtype=tf.int32)
y = tf.cast(boxes[..., 1] * tf.cast(size, dtype=dtype), dtype=tf.int32)
# init all the tensorArrays to be used in storeing the index
# and the values to be used to update both depth_track and full
update_index = tf.TensorArray(tf.int32, size=0, dynamic_size=True)
update = tf.TensorArray(dtype, size=0, dynamic_size=True)
# init constants and match data types before entering loop
i = 0
anchor_id = 0
const = tf.cast(tf.convert_to_tensor([1.]), dtype=dtype)
mask = tf.cast(mask, dtype=dtype)
rand_update = 0.0
for box_id in range(num_boxes):
# If the width or height of the box is zero, skip it.
# After pre processing, if the box is not in the i image bounds anymore,
# skip it.
if tf.keras.backend.all(tf.math.equal(
boxes[box_id, 2:4], 0)) or tf.keras.backend.any(
tf.math.less(boxes[box_id, 0:2], 0.0)) or tf.keras.backend.any(
tf.math.greater_equal(boxes[box_id, 0:2], 1.0)):
continue
if use_tie_breaker:
for anchor_id in range(tf.shape(anchors)[-1]):
index = tf.math.equal(anchors[box_id, anchor_id], mask)
if tf.keras.backend.any(index):
# using the boolean index mask to determine exactly which
# anchor box was used
p = tf.cast(
tf.keras.backend.argmax(tf.cast(index, dtype=tf.int32)),
dtype=tf.int32)
# determine if the index was used or not
used = depth_track[y[box_id], x[box_id], p]
# defualt used upadte value
uid = 1
# if anchor_id is 0, this is the best matched anchor for this box
# with the highest IOU
if anchor_id == 0:
# write the box to the update list
# create random numbr to trigger a replacment if the cell
# is used already
if tf.math.equal(used, 1):
rand_update = tf.random.uniform([], maxval=1)
else:
rand_update = 1.0
if rand_update > 0.5:
# write the box to the update list
update_index = update_index.write(i, [y[box_id], x[box_id], p])
value = tf.concat([boxes[box_id], const, classes[box_id]],
axis=-1)
update = update.write(i, value)
# if used is 2, this cell is filled with a non-optimal box
# if used is 0, the cell in the ground truth is not yet consumed
# in either case you can replace that cell with a new box, as long
# as it is not consumed by an optimal box with anchor_id = 0
elif tf.math.equal(used, 2) or tf.math.equal(used, 0):
uid = 2
# write the box to the update list
update_index = update_index.write(i, [y[box_id], x[box_id], p])
value = tf.concat([boxes[box_id], const, classes[box_id]], axis=-1)
update = update.write(i, value)
depth_track = tf.tensor_scatter_nd_update(
depth_track, [(y[box_id], x[box_id], p)], [uid])
i += 1
else:
index = tf.math.equal(anchors[box_id, 0], mask)
# if any there is an index match
if tf.keras.backend.any(index):
# find the index
p = tf.cast(
tf.keras.backend.argmax(tf.cast(index, dtype=tf.int32)),
dtype=tf.int32)
# update the list of used boxes
update_index = update_index.write(i, [y[box_id], x[box_id], p])
value = tf.concat([boxes[box_id], const, classes[box_id]], axis=-1)
update = update.write(i, value)
i += 1
# if the size of the update list is not 0, do an update, other wise,
# no boxes and pass an empty grid
if tf.math.greater(update_index.size(), 0):
update_index = update_index.stack()
update = update.stack()
full = tf.tensor_scatter_nd_update(full, update_index, update)
return full
def build_batch_grided_gt(y_true, mask, size, dtype, use_tie_breaker):
"""Converts ground truth for use in loss functions.
Args:
y_true: tf.Tensor[] ground truth
[batch, box coords[0:4], classes_onehot[0:-1], best_fit_anchor_box]
mask: list of the anchor boxes choresponding to the output,
ex. [1, 2, 3] tells this layer to predict only the first 3 anchors
in the total.
size: the dimensions of this output, for regular, it progresses from
13, to 26, to 52
dtype: expected output datatype
use_tie_breaker: boolean value for wether or not to use the tie
breaker
Returns:
tf.Tensor[] of shape [batch, size, size, #of_anchors, 4, 1, num_classes]
"""
# unpack required components from the input ground truth
boxes = tf.cast(y_true['bbox'], dtype)
classes = tf.expand_dims(tf.cast(y_true['classes'], dtype=dtype), axis=-1)
anchors = tf.cast(y_true['best_anchors'], dtype)
# get the batch size
batches = tf.shape(boxes)[0]
# get the number of boxes in the ground truth boxs
num_boxes = tf.shape(boxes)[1]
# get the number of anchor boxes used for this anchor scale
len_masks = tf.shape(mask)[0]
# init a fixed memeory size grid for this prediction scale
# [batch, size, size, # of anchors, 1 + 1 + number of anchors per scale]
full = tf.zeros([batches, size, size, len_masks, 1 + 4 + 1], dtype=dtype)
# init a grid to use to track which locations have already
# been used before (for the tie breaker)
depth_track = tf.zeros((batches, size, size, len_masks), dtype=tf.int32)
# rescale the x and y centers to the size of the grid [size, size]
x = tf.cast(boxes[..., 0] * tf.cast(size, dtype=dtype), dtype=tf.int32)
y = tf.cast(boxes[..., 1] * tf.cast(size, dtype=dtype), dtype=tf.int32)
# init all the tensorArrays to be used in storeing the index and the values
# to be used to update both depth_track and full
update_index = tf.TensorArray(tf.int32, size=0, dynamic_size=True)
update = tf.TensorArray(dtype, size=0, dynamic_size=True)
# init constants and match data types before entering loop
i = 0
anchor_id = 0
const = tf.cast(tf.convert_to_tensor([1.]), dtype=dtype)
mask = tf.cast(mask, dtype=dtype)
rand_update = 0.0
for batch in range(batches):
for box_id in range(num_boxes):
# if the width or height of the box is zero, skip it
if tf.keras.backend.all(tf.math.equal(boxes[batch, box_id, 2:4], 0)):
continue
# after pre processing, if the box is not in the image bounds anymore
# skip the box
if tf.keras.backend.any(tf.math.less(
boxes[batch, box_id, 0:2], 0.0)) or tf.keras.backend.any(
tf.math.greater_equal(boxes[batch, box_id, 0:2], 1.0)):
continue
if use_tie_breaker:
for anchor_id in range(tf.shape(anchors)[-1]):
index = tf.math.equal(anchors[batch, box_id, anchor_id], mask)
if tf.keras.backend.any(index):
# using the boolean index mask to determine exactly which anchor
# box was used
p = tf.cast(tf.keras.backend.argmax(tf.cast(index, dtype=tf.int32)),
dtype=tf.int32)
# determine if the index was used or not
used = depth_track[batch, y[batch, box_id], x[batch, box_id], p]
# defualt used upadte value
uid = 1
# if anchor_id is 0, this is the best matched anchor for this box
# with the highest IOU
if anchor_id == 0:
# create random number to trigger a replacment if the cell
# is used already
if tf.math.equal(used, 1):
rand_update = tf.random.uniform([], maxval=1)
else:
rand_update = 1.0
if rand_update > 0.5:
# write the box to the update list
update_index = update_index.write(
i, [batch, y[batch, box_id], x[batch, box_id], p])
value = tf.concat(
[boxes[batch, box_id], const, classes[batch, box_id]],
axis=-1)
update = update.write(i, value)
# if used is 2, this cell is filled with a non-optimal box
# if used is 0, the cell in the ground truth is not yet consumed
# in either case you can replace that cell with a new box, as long
# as it is not consumed by an optimal box with anchor_id = 0
elif tf.math.equal(used, 2) or tf.math.equal(used, 0):
uid = 2
# write the box to the update list
update_index = update_index.write(
i, [batch, y[batch, box_id], x[batch, box_id], p])
value = ([boxes[batch, box_id], const, classes[batch, box_id]])
update = update.write(i, value)
# update the used index for where and how the box was placed
depth_track = tf.tensor_scatter_nd_update(
depth_track, [(batch, y[batch, box_id], x[batch, box_id], p)],
[uid])
i += 1
else:
index = tf.math.equal(anchors[batch, box_id, 0], mask)
if tf.keras.backend.any(index):
# if any there is an index match
p = tf.cast(
tf.keras.backend.argmax(tf.cast(index, dtype=tf.int32)),
dtype=tf.int32)
# write the box to the update list
update_index = update_index.write(
i, [batch, y[batch, box_id], x[batch, box_id], p])
value = tf.concat(
[boxes[batch, box_id], const, classes[batch, box_id]], axis=-1)
update = update.write(i, value)
i += 1
# if the size of the update list is not 0, do an update, other wise,
# no boxes and pass an empty grid
if tf.math.greater(update_index.size(), 0):
update_index = update_index.stack()
update = update.stack()
full = tf.tensor_scatter_nd_update(full, update_index, update)
return full
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
from absl.testing import parameterized
import numpy as np
import tensorflow as tf
from official.vision.beta.projects.yolo.ops import preprocess_ops
class PreprocessOpsTest(parameterized.TestCase, tf.test.TestCase):
@parameterized.parameters((416, 416, 5, 300, 300), (100, 200, 6, 50, 50))
def test_resize_crop_filter(self, default_width, default_height, num_boxes,
target_width, target_height):
image = tf.convert_to_tensor(
np.random.rand(default_width, default_height, 3))
boxes = tf.convert_to_tensor(np.random.rand(num_boxes, 4))
resized_image, resized_boxes = preprocess_ops.resize_crop_filter(
image, boxes, default_width, default_height, target_width,
target_height)
resized_image_shape = tf.shape(resized_image)
resized_boxes_shape = tf.shape(resized_boxes)
self.assertAllEqual([default_height, default_width, 3],
resized_image_shape.numpy())
self.assertAllEqual([num_boxes, 4], resized_boxes_shape.numpy())
@parameterized.parameters((7, 7., 5.), (25, 35., 45.))
def test_translate_boxes(self, num_boxes, translate_x, translate_y):
boxes = tf.convert_to_tensor(np.random.rand(num_boxes, 4))
translated_boxes = preprocess_ops.translate_boxes(
boxes, translate_x, translate_y)
translated_boxes_shape = tf.shape(translated_boxes)
self.assertAllEqual([num_boxes, 4], translated_boxes_shape.numpy())
@parameterized.parameters((100, 200, 75., 25.), (400, 600, 25., 75.))
def test_translate_image(self, image_height, image_width, translate_x,
translate_y):
image = tf.convert_to_tensor(np.random.rand(image_height, image_width, 4))
translated_image = preprocess_ops.translate_image(
image, translate_x, translate_y)
translated_image_shape = tf.shape(translated_image)
self.assertAllEqual([image_height, image_width, 4],
translated_image_shape.numpy())
@parameterized.parameters(([1, 2], 20, 0), ([13, 2, 4], 15, 0))
def test_pad_max_instances(self, input_shape, instances, pad_axis):
expected_output_shape = input_shape
expected_output_shape[pad_axis] = instances
output = preprocess_ops.pad_max_instances(
np.ones(input_shape), instances, pad_axis=pad_axis)
self.assertAllEqual(expected_output_shape, tf.shape(output).numpy())
if __name__ == '__main__':
tf.test.main()
......@@ -70,8 +70,6 @@ class DetectionModule(export_base.ExportModule):
aug_scale_min=1.0,
aug_scale_max=1.0)
image_shape = image_info[1, :] # Shape of original image.
input_anchor = anchor.build_anchor_generator(
min_level=model_params.min_level,
max_level=model_params.max_level,
......@@ -81,7 +79,7 @@ class DetectionModule(export_base.ExportModule):
anchor_boxes = input_anchor(image_size=(self._input_image_size[0],
self._input_image_size[1]))
return image, anchor_boxes, image_shape
return image, anchor_boxes, image_info
def _run_inference_on_image_tensors(self, images: tf.Tensor):
"""Cast image to float and run inference.
......@@ -111,20 +109,22 @@ class DetectionModule(export_base.ExportModule):
dtype=tf.float32)
anchor_shapes.append((str(level), anchor_level_spec))
image_shape_spec = tf.TensorSpec(shape=[2,], dtype=tf.float32)
image_info_spec = tf.TensorSpec(shape=[4, 2], dtype=tf.float32)
images, anchor_boxes, image_shape = tf.nest.map_structure(
images, anchor_boxes, image_info = tf.nest.map_structure(
tf.identity,
tf.map_fn(
self._build_inputs,
elems=images,
fn_output_signature=(images_spec, dict(anchor_shapes),
image_shape_spec),
image_info_spec),
parallel_iterations=32))
input_image_shape = image_info[:, 1, :]
detections = self._model.call(
images=images,
image_shape=image_shape,
image_shape=input_image_shape,
anchor_boxes=anchor_boxes,
training=False)
......@@ -132,7 +132,8 @@ class DetectionModule(export_base.ExportModule):
'detection_boxes': detections['detection_boxes'],
'detection_scores': detections['detection_scores'],
'detection_classes': detections['detection_classes'],
'num_detections': detections['num_detections']
'num_detections': detections['num_detections'],
'image_info': image_info
}
if 'detection_masks' in detections.keys():
final_outputs['detection_masks'] = detections['detection_masks']
......
......@@ -125,10 +125,11 @@ class DetectionExportTest(tf.test.TestCase, parameterized.TestCase):
images = self._get_dummy_input(input_type, batch_size, image_size)
processed_images, anchor_boxes, image_shape = module._build_inputs(
processed_images, anchor_boxes, image_info = module._build_inputs(
tf.zeros((224, 224, 3), dtype=tf.uint8))
processed_images = tf.expand_dims(processed_images, 0)
image_shape = image_info[1, :]
image_shape = tf.expand_dims(image_shape, 0)
processed_images = tf.expand_dims(processed_images, 0)
for l, l_boxes in anchor_boxes.items():
anchor_boxes[l] = tf.expand_dims(l_boxes, 0)
......
......@@ -16,33 +16,30 @@
"""Base class for model export."""
import abc
import tensorflow as tf
def _decode_image(encoded_image_bytes):
image_tensor = tf.image.decode_image(encoded_image_bytes, channels=3)
image_tensor.set_shape((None, None, 3))
return image_tensor
from typing import Optional, Sequence, Mapping
import tensorflow as tf
def _decode_tf_example(tf_example_string_tensor):
keys_to_features = {'image/encoded': tf.io.FixedLenFeature((), tf.string)}
parsed_tensors = tf.io.parse_single_example(
serialized=tf_example_string_tensor, features=keys_to_features)
image_tensor = _decode_image(parsed_tensors['image/encoded'])
return image_tensor
from official.modeling.hyperparams import config_definitions as cfg
class ExportModule(tf.Module, metaclass=abc.ABCMeta):
"""Base Export Module."""
def __init__(self, params, batch_size, input_image_size, model=None):
def __init__(self,
params: cfg.ExperimentConfig,
batch_size: int,
input_image_size: Sequence[int],
num_channels: int = 3,
model: Optional[tf.keras.Model] = None):
"""Initializes a module for export.
Args:
params: Experiment params.
batch_size: Int or None.
input_image_size: List or Tuple of height, width of the input image.
batch_size: The batch size of the model input. Can be `int` or None.
input_image_size: List or Tuple of size of the input image. For 2D image,
it is [height, width].
num_channels: The number of the image channels.
model: A tf.keras.Model instance to be exported.
"""
......@@ -50,48 +47,98 @@ class ExportModule(tf.Module, metaclass=abc.ABCMeta):
self._params = params
self._batch_size = batch_size
self._input_image_size = input_image_size
self._num_channels = num_channels
self._model = model
def _decode_image(self, encoded_image_bytes: str) -> tf.Tensor:
"""Decodes an image bytes to an image tensor.
Use `tf.image.decode_image` to decode an image if input is expected to be 2D
image; otherwise use `tf.io.decode_raw` to convert the raw bytes to tensor
and reshape it to desire shape.
Args:
encoded_image_bytes: An encoded image string to be decoded.
Returns:
A decoded image tensor.
"""
if len(self._input_image_size) == 2:
# Decode an image if 2D input is expected.
image_tensor = tf.image.decode_image(
encoded_image_bytes, channels=self._num_channels)
image_tensor.set_shape((None, None, self._num_channels))
else:
# Convert raw bytes into a tensor and reshape it, if not 2D input.
image_tensor = tf.io.decode_raw(encoded_image_bytes, out_type=tf.uint8)
image_tensor = tf.reshape(image_tensor,
self._input_image_size + [self._num_channels])
return image_tensor
def _decode_tf_example(
self, tf_example_string_tensor: tf.train.Example) -> tf.Tensor:
"""Decodes a TF Example to an image tensor.
Args:
tf_example_string_tensor: A tf.train.Example of encoded image and other
information.
Returns:
A decoded image tensor.
"""
keys_to_features = {'image/encoded': tf.io.FixedLenFeature((), tf.string)}
parsed_tensors = tf.io.parse_single_example(
serialized=tf_example_string_tensor, features=keys_to_features)
image_tensor = self._decode_image(parsed_tensors['image/encoded'])
return image_tensor
@abc.abstractmethod
def build_model(self):
def build_model(self, **kwargs):
"""Builds model and sets self._model."""
@abc.abstractmethod
def _run_inference_on_image_tensors(self, images):
def _run_inference_on_image_tensors(
self, images: tf.Tensor) -> Mapping[str, tf.Tensor]:
"""Runs inference on images."""
@tf.function
def inference_from_image_tensors(self, input_tensor):
def inference_from_image_tensors(
self, input_tensor: tf.Tensor) -> Mapping[str, tf.Tensor]:
return self._run_inference_on_image_tensors(input_tensor)
@tf.function
def inference_from_image_bytes(self, input_tensor):
def inference_from_image_bytes(self, input_tensor: str):
with tf.device('cpu:0'):
images = tf.nest.map_structure(
tf.identity,
tf.map_fn(
_decode_image,
self._decode_image,
elems=input_tensor,
fn_output_signature=tf.TensorSpec(
shape=[None, None, 3], dtype=tf.uint8),
shape=[None] * len(self._input_image_size) +
[self._num_channels],
dtype=tf.uint8),
parallel_iterations=32))
images = tf.stack(images)
return self._run_inference_on_image_tensors(images)
@tf.function
def inference_from_tf_example(self, input_tensor):
def inference_from_tf_example(
self, input_tensor: tf.train.Example) -> Mapping[str, tf.Tensor]:
with tf.device('cpu:0'):
images = tf.nest.map_structure(
tf.identity,
tf.map_fn(
_decode_tf_example,
self._decode_tf_example,
elems=input_tensor,
# Height/width of the shape of input images is unspecified (None)
# at the time of decoding the example, but the shape will
# be adjusted to conform to the input layer of the model,
# by _run_inference_on_image_tensors() below.
fn_output_signature=tf.TensorSpec(
shape=[None, None, 3], dtype=tf.uint8),
shape=[None] * len(self._input_image_size) +
[self._num_channels],
dtype=tf.uint8),
dtype=tf.uint8,
parallel_iterations=32))
images = tf.stack(images)
......
......@@ -17,19 +17,30 @@ r"""Vision models export utility function for serving/inference."""
import os
from typing import Optional, List
import tensorflow as tf
from official.core import config_definitions as cfg
from official.core import train_utils
from official.vision.beta import configs
from official.vision.beta.serving import detection
from official.vision.beta.serving import export_base
from official.vision.beta.serving import image_classification
from official.vision.beta.serving import semantic_segmentation
def export_inference_graph(input_type, batch_size, input_image_size, params,
checkpoint_path, export_dir,
export_checkpoint_subdir=None,
export_saved_model_subdir=None):
def export_inference_graph(
input_type: str,
batch_size: Optional[int],
input_image_size: List[int],
params: cfg.ExperimentConfig,
checkpoint_path: str,
export_dir: str,
num_channels: Optional[int] = 3,
export_module: Optional[export_base.ExportModule] = None,
export_checkpoint_subdir: Optional[str] = None,
export_saved_model_subdir: Optional[str] = None):
"""Exports inference graph for the model specified in the exp config.
Saved model is stored at export_dir/saved_model, checkpoint is saved
......@@ -42,6 +53,10 @@ def export_inference_graph(input_type, batch_size, input_image_size, params,
params: Experiment params.
checkpoint_path: Trained checkpoint path or directory.
export_dir: Export directory path.
num_channels: The number of input image channels.
export_module: Optional export module to be used instead of using params
to create one. If None, the params will be used to create an export
module.
export_checkpoint_subdir: Optional subdirectory under export_dir
to store checkpoint.
export_saved_model_subdir: Optional subdirectory under export_dir
......@@ -60,21 +75,31 @@ def export_inference_graph(input_type, batch_size, input_image_size, params,
else:
output_saved_model_directory = export_dir
if isinstance(params.task,
configs.image_classification.ImageClassificationTask):
export_module = image_classification.ClassificationModule(
params=params, batch_size=batch_size, input_image_size=input_image_size)
elif isinstance(params.task, configs.retinanet.RetinaNetTask) or isinstance(
params.task, configs.maskrcnn.MaskRCNNTask):
export_module = detection.DetectionModule(
params=params, batch_size=batch_size, input_image_size=input_image_size)
elif isinstance(params.task,
configs.semantic_segmentation.SemanticSegmentationTask):
export_module = semantic_segmentation.SegmentationModule(
params=params, batch_size=batch_size, input_image_size=input_image_size)
else:
raise ValueError('Export module not implemented for {} task.'.format(
type(params.task)))
if not export_module:
if isinstance(params.task,
configs.image_classification.ImageClassificationTask):
export_module = image_classification.ClassificationModule(
params=params,
batch_size=batch_size,
input_image_size=input_image_size,
num_channels=num_channels)
elif isinstance(params.task, configs.retinanet.RetinaNetTask) or isinstance(
params.task, configs.maskrcnn.MaskRCNNTask):
export_module = detection.DetectionModule(
params=params,
batch_size=batch_size,
input_image_size=input_image_size,
num_channels=num_channels)
elif isinstance(params.task,
configs.semantic_segmentation.SemanticSegmentationTask):
export_module = semantic_segmentation.SegmentationModule(
params=params,
batch_size=batch_size,
input_image_size=input_image_size,
num_channels=num_channels)
else:
raise ValueError('Export module not implemented for {} task.'.format(
type(params.task)))
model = export_module.build_model()
......@@ -87,7 +112,7 @@ def export_inference_graph(input_type, batch_size, input_image_size, params,
if input_type == 'image_tensor':
input_signature = tf.TensorSpec(
shape=[batch_size, None, None, 3],
shape=[batch_size] + [None] * len(input_image_size) + [num_channels],
dtype=tf.uint8)
signatures = {
'serving_default':
......
# Lint as: python3
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A script to export the image classification as a TF-Hub SavedModel."""
# Import libraries
from absl import app
from absl import flags
import tensorflow as tf
from official.common import registry_imports # pylint: disable=unused-import
from official.core import exp_factory
from official.modeling import hyperparams
from official.vision.beta.serving import image_classification
FLAGS = flags.FLAGS
flags.DEFINE_string(
'experiment', None, 'experiment type, e.g. resnet_imagenet')
flags.DEFINE_string(
'checkpoint_path', None, 'Checkpoint path.')
flags.DEFINE_string(
'export_path', None, 'The export directory.')
flags.DEFINE_multi_string(
'config_file',
None,
'A YAML/JSON files which specifies overrides. The override order '
'follows the order of args. Note that each file '
'can be used as an override template to override the default parameters '
'specified in Python. If the same parameter is specified in both '
'`--config_file` and `--params_override`, `config_file` will be used '
'first, followed by params_override.')
flags.DEFINE_string(
'params_override', '',
'The JSON/YAML file or string which specifies the parameter to be overriden'
' on top of `config_file` template.')
flags.DEFINE_integer(
'batch_size', None, 'The batch size.')
flags.DEFINE_string(
'input_image_size',
'224,224',
'The comma-separated string of two integers representing the height,width '
'of the input to the model.')
flags.DEFINE_boolean(
'skip_logits_layer',
False,
'Whether to skip the prediction layer and only output the feature vector.')
def export_model_to_tfhub(params,
batch_size,
input_image_size,
skip_logits_layer,
checkpoint_path,
export_path):
"""Export an image classification model to TF-Hub."""
export_module = image_classification.ClassificationModule(
params=params, batch_size=batch_size, input_image_size=input_image_size)
model = export_module.build_model(skip_logits_layer=skip_logits_layer)
checkpoint = tf.train.Checkpoint(model=model)
checkpoint.restore(checkpoint_path).assert_existing_objects_matched()
model.save(export_path, include_optimizer=False, save_format='tf')
def main(_):
params = exp_factory.get_exp_config(FLAGS.experiment)
for config_file in FLAGS.config_file or []:
params = hyperparams.override_params_dict(
params, config_file, is_strict=True)
if FLAGS.params_override:
params = hyperparams.override_params_dict(
params, FLAGS.params_override, is_strict=True)
params.validate()
params.lock()
export_model_to_tfhub(
params=params,
batch_size=FLAGS.batch_size,
input_image_size=[int(x) for x in FLAGS.input_image_size.split(',')],
skip_logits_layer=FLAGS.skip_logits_layer,
checkpoint_path=FLAGS.checkpoint_path,
export_path=FLAGS.export_path)
if __name__ == '__main__':
app.run(main)
......@@ -29,14 +29,15 @@ STDDEV_RGB = (0.229 * 255, 0.224 * 255, 0.225 * 255)
class ClassificationModule(export_base.ExportModule):
"""classification Module."""
def build_model(self):
def build_model(self, skip_logits_layer=False):
input_specs = tf.keras.layers.InputSpec(
shape=[self._batch_size] + self._input_image_size + [3])
self._model = factory.build_classification_model(
input_specs=input_specs,
model_config=self._params.task.model,
l2_regularizer=None)
l2_regularizer=None,
skip_logits_layer=skip_logits_layer)
return self._model
......
......@@ -16,13 +16,14 @@
"""Image classification task definition."""
from absl import logging
import tensorflow as tf
from official.common import dataset_fn
from official.core import base_task
from official.core import input_reader
from official.core import task_factory
from official.modeling import tf_utils
from official.vision.beta.configs import image_classification as exp_cfg
from official.vision.beta.dataloaders import classification_input
from official.vision.beta.dataloaders import input_reader_factory
from official.vision.beta.dataloaders import tfds_classification_decoders
from official.vision.beta.modeling import factory
......@@ -93,9 +94,10 @@ class ImageClassificationTask(base_task.Task):
output_size=input_size[:2],
num_classes=num_classes,
aug_policy=params.aug_policy,
randaug_magnitude=params.randaug_magnitude,
dtype=params.dtype)
reader = input_reader.InputReader(
reader = input_reader_factory.input_reader_generator(
params,
dataset_fn=dataset_fn.pick_dataset_fn(params.file_type),
decoder_fn=decoder.decode,
......
......@@ -19,9 +19,9 @@ from absl import logging
import tensorflow as tf
from official.common import dataset_fn
from official.core import base_task
from official.core import input_reader
from official.core import task_factory
from official.vision.beta.configs import maskrcnn as exp_cfg
from official.vision.beta.dataloaders import input_reader_factory
from official.vision.beta.dataloaders import maskrcnn_input
from official.vision.beta.dataloaders import tf_example_decoder
from official.vision.beta.dataloaders import tf_example_label_map_decoder
......@@ -143,7 +143,7 @@ class MaskRCNNTask(base_task.Task):
include_mask=self._task_config.model.include_mask,
mask_crop_size=params.parser.mask_crop_size)
reader = input_reader.InputReader(
reader = input_reader_factory.input_reader_generator(
params,
dataset_fn=dataset_fn.pick_dataset_fn(params.file_type),
decoder_fn=decoder.decode,
......
......@@ -19,10 +19,10 @@ from absl import logging
import tensorflow as tf
from official.common import dataset_fn
from official.core import base_task
from official.core import input_reader
from official.core import task_factory
from official.vision import keras_cv
from official.vision.beta.configs import retinanet as exp_cfg
from official.vision.beta.dataloaders import input_reader_factory
from official.vision.beta.dataloaders import retinanet_input
from official.vision.beta.dataloaders import tf_example_decoder
from official.vision.beta.dataloaders import tfds_detection_decoders
......@@ -122,7 +122,7 @@ class RetinaNetTask(base_task.Task):
skip_crowd_during_training=params.parser.skip_crowd_during_training,
max_num_instances=params.parser.max_num_instances)
reader = input_reader.InputReader(
reader = input_reader_factory.input_reader_generator(
params,
dataset_fn=dataset_fn.pick_dataset_fn(params.file_type),
decoder_fn=decoder.decode,
......
......@@ -19,9 +19,9 @@ from absl import logging
import tensorflow as tf
from official.common import dataset_fn
from official.core import base_task
from official.core import input_reader
from official.core import task_factory
from official.vision.beta.configs import semantic_segmentation as exp_cfg
from official.vision.beta.dataloaders import input_reader_factory
from official.vision.beta.dataloaders import segmentation_input
from official.vision.beta.dataloaders import tfds_segmentation_decoders
from official.vision.beta.evaluation import segmentation_metrics
......@@ -104,7 +104,7 @@ class SemanticSegmentationTask(base_task.Task):
aug_rand_hflip=params.aug_rand_hflip,
dtype=params.dtype)
reader = input_reader.InputReader(
reader = input_reader_factory.input_reader_generator(
params,
dataset_fn=dataset_fn.pick_dataset_fn(params.file_type),
decoder_fn=decoder.decode,
......@@ -143,15 +143,15 @@ class SemanticSegmentationTask(base_task.Task):
def build_metrics(self, training=True):
"""Gets streaming metrics for training/validation."""
metrics = []
if training:
if training and self.task_config.evaluation.report_train_mean_iou:
metrics.append(segmentation_metrics.MeanIoU(
name='mean_iou',
num_classes=self.task_config.model.num_classes,
rescale_predictions=False,
dtype=tf.float32))
else:
self.miou_metric = segmentation_metrics.MeanIoU(
name='val_mean_iou',
self.iou_metric = segmentation_metrics.PerClassIoU(
name='per_class_iou',
num_classes=self.task_config.model.num_classes,
rescale_predictions=not self.task_config.validation_data
.resize_eval_groundtruth,
......@@ -243,7 +243,7 @@ class SemanticSegmentationTask(base_task.Task):
loss = 0
logs = {self.loss: loss}
logs.update({self.miou_metric.name: (labels, outputs)})
logs.update({self.iou_metric.name: (labels, outputs)})
if metrics:
self.process_metrics(metrics, labels, outputs)
......@@ -257,11 +257,19 @@ class SemanticSegmentationTask(base_task.Task):
def aggregate_logs(self, state=None, step_outputs=None):
if state is None:
self.miou_metric.reset_states()
state = self.miou_metric
self.miou_metric.update_state(step_outputs[self.miou_metric.name][0],
step_outputs[self.miou_metric.name][1])
self.iou_metric.reset_states()
state = self.iou_metric
self.iou_metric.update_state(step_outputs[self.iou_metric.name][0],
step_outputs[self.iou_metric.name][1])
return state
def reduce_aggregated_logs(self, aggregated_logs):
return {self.miou_metric.name: self.miou_metric.result().numpy()}
result = {}
ious = self.iou_metric.result()
# TODO(arashwan): support loading class name from a label map file.
if self.task_config.evaluation.report_per_class_iou:
for i, value in enumerate(ious.numpy()):
result.update({'iou/{}'.format(i): value})
# Computes mean IoU
result.update({'mean_iou': tf.reduce_mean(ious).numpy()})
return result
......@@ -17,10 +17,10 @@
from absl import logging
import tensorflow as tf
from official.core import base_task
from official.core import input_reader
from official.core import task_factory
from official.modeling import tf_utils
from official.vision.beta.configs import video_classification as exp_cfg
from official.vision.beta.dataloaders import input_reader_factory
from official.vision.beta.dataloaders import video_input
from official.vision.beta.modeling import factory_3d
......@@ -74,7 +74,7 @@ class VideoClassificationTask(base_task.Task):
parser = video_input.Parser(input_params=params)
postprocess_fn = video_input.PostBatchProcessor(params)
reader = input_reader.InputReader(
reader = input_reader_factory.input_reader_generator(
params,
dataset_fn=self._get_dataset_fn(params),
decoder_fn=self._get_decoder_fn(params),
......
# Copyright 2021 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
\ No newline at end of file
# Copyright 2021 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Normalization layer definitions."""
import tensorflow as tf
class L2Normalization(tf.keras.layers.Layer):
"""Normalization layer using L2 norm."""
def __init__(self):
"""Initialization of the L2Normalization layer."""
super(L2Normalization, self).__init__()
# A lower bound value for the norm.
self.eps = 1e-6
def call(self, x, axis=1):
"""Invokes the L2Normalization instance.
Args:
x: A Tensor.
axis: Dimension along which to normalize. A scalar or a vector of
integers.
Returns:
norm: A Tensor with the same shape as `x`.
"""
return tf.nn.l2_normalize(x, axis, epsilon=self.eps)
# Lint as: python3
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
# Copyright 2021 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -13,40 +12,25 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for official.tools.build_docs."""
import os
import shutil
"""Tests for normalization layers."""
import tensorflow as tf
from official.utils.docs import build_docs
class BuildDocsTest(tf.test.TestCase):
from delf.python.normalization_layers import normalization
def setUp(self):
super(BuildDocsTest, self).setUp()
self.workdir = self.get_temp_dir()
if os.path.exists(self.workdir):
shutil.rmtree(self.workdir)
os.makedirs(self.workdir)
def test_api_gen(self):
build_docs.gen_api_docs(
code_url_prefix="http://official/nlp/modeling/",
site_path="tf_nlp_modeling/api_docs/python",
output_dir=self.workdir,
gen_report=False,
project_short_name="tf_nlp_modeling",
project_full_name="TensorFlow Modeling - NLP Library",
search_hints=True)
class NormalizationsTest(tf.test.TestCase):
# Check that the "defined in" section is working
with open(os.path.join(self.workdir, "tf_nlp_modeling.md")) as f:
content = f.read()
self.assertIn("__init__.py", content)
def testL2Normalization(self):
x = tf.constant([-4.0, 0.0, 4.0])
layer = normalization.L2Normalization()
# Run tested function.
result = layer(x, axis=0)
# Define expected result.
exp_output = [-0.70710677, 0.0, 0.70710677]
# Compare actual and expected.
self.assertAllClose(exp_output, result)
if __name__ == "__main__":
if __name__ == '__main__':
tf.test.main()
# Copyright 2021 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
\ No newline at end of file
# Copyright 2021 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Pooling layers definitions."""
import tensorflow as tf
class MAC(tf.keras.layers.Layer):
"""Global max pooling (MAC) layer.
Maximum Activations of Convolutions (MAC) is simply constructed by
max-pooling over all dimensions per feature map. See
https://arxiv.org/abs/1511.05879 for a reference.
"""
def call(self, x, axis=None):
"""Invokes the MAC pooling instance.
Args:
x: [B, H, W, D] A float32 Tensor.
axis: Dimensions to reduce. By default, dimensions [1, 2] are reduced.
Returns:
output: [B, D] A float32 Tensor.
"""
if axis is None:
axis = [1, 2]
return mac(x, axis=axis)
class SPoC(tf.keras.layers.Layer):
"""Average pooling (SPoC) layer.
Sum-pooled convolutional features (SPoC) is based on the sum pooling of the
deep features. See https://arxiv.org/pdf/1510.07493.pdf for a reference.
"""
def call(self, x, axis=None):
"""Invokes the SPoC instance.
Args:
x: [B, H, W, D] A float32 Tensor.
axis: Dimensions to reduce. By default, dimensions [1, 2] are reduced.
Returns:
output: [B, D] A float32 Tensor.
"""
if axis is None:
axis = [1, 2]
return spoc(x, axis)
class GeM(tf.keras.layers.Layer):
"""Generalized mean pooling (GeM) layer.
Generalized Mean Pooling (GeM) computes the generalized mean of each
channel in a tensor. See https://arxiv.org/abs/1711.02512 for a reference.
"""
def __init__(self, power=3.):
"""Initialization of the generalized mean pooling (GeM) layer.
Args:
power: Float power > 0 is an inverse exponent parameter, used during the
generalized mean pooling computation. Setting this exponent as power > 1
increases the contrast of the pooled feature map and focuses on the
salient features of the image. GeM is a generalization of the average
pooling commonly used in classification networks (power = 1) and of
spatial max-pooling layer (power = inf).
"""
super(GeM, self).__init__()
self.power = power
self.eps = 1e-6
def call(self, x, axis=None):
"""Invokes the GeM instance.
Args:
x: [B, H, W, D] A float32 Tensor.
axis: Dimensions to reduce. By default, dimensions [1, 2] are reduced.
Returns:
output: [B, D] A float32 Tensor.
"""
if axis is None:
axis = [1, 2]
return gem(x, power=self.power, eps=self.eps, axis=axis)
def mac(x, axis=None):
"""Performs global max pooling (MAC).
Args:
x: [B, H, W, D] A float32 Tensor.
axis: Dimensions to reduce. By default, dimensions [1, 2] are reduced.
Returns:
output: [B, D] A float32 Tensor.
"""
if axis is None:
axis = [1, 2]
return tf.reduce_max(x, axis=axis, keepdims=False)
def spoc(x, axis=None):
"""Performs average pooling (SPoC).
Args:
x: [B, H, W, D] A float32 Tensor.
axis: Dimensions to reduce. By default, dimensions [1, 2] are reduced.
Returns:
output: [B, D] A float32 Tensor.
"""
if axis is None:
axis = [1, 2]
return tf.reduce_mean(x, axis=axis, keepdims=False)
def gem(x, axis=None, power=3., eps=1e-6):
"""Performs generalized mean pooling (GeM).
Args:
x: [B, H, W, D] A float32 Tensor.
axis: Dimensions to reduce. By default, dimensions [1, 2] are reduced.
power: Float, power > 0 is an inverse exponent parameter (GeM power).
eps: Float, parameter for numerical stability.
Returns:
output: [B, D] A float32 Tensor.
"""
if axis is None:
axis = [1, 2]
tmp = tf.pow(tf.maximum(x, eps), power)
out = tf.pow(tf.reduce_mean(tmp, axis=axis, keepdims=False), 1. / power)
return out
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment