Commit c320b6ef authored by zhenyi's avatar zhenyi
Browse files

tf2 detection

parent 0fc002df
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
def nearest_upsampling(data, scale):
"""Nearest neighbor upsampling implementation.
Args:
data: A tensor with a shape of [batch, height_in, width_in, channels].
scale: An integer multiple to scale resolution of input data.
Returns:
data_up: A tensor with a shape of
[batch, height_in*scale, width_in*scale, channels]. Same dtype as input
data.
"""
with tf.name_scope('nearest_upsampling'):
bs, h, w, c = tf.unstack(tf.shape(data))
# Use reshape to quickly upsample the input.
# The nearest pixel is selected implicitly via broadcasting.
# data = tf.reshape(data, [bs, h, 1, w, 1, c]) * tf.ones([1, 1, scale, 1, scale, 1], dtype=data.dtype)
# Instead of broadcasting with a 6-d tensor, we're using stacking here
# for TfLite compatibity.
output = tf.stack([data] * scale, axis=3)
output = tf.stack([output] * scale, axis=2)
return tf.reshape(output, [bs, h * scale, w * scale, c])
return tf.reshape(data, [bs, h * scale, w * scale, c])
def selective_crop_and_resize(features,
boxes,
box_levels,
boundaries,
output_size=7,
is_gpu_inference=False):
"""Crop and resize boxes on a set of feature maps.
Given multiple features maps indexed by different levels, and a set of boxes
where each box is mapped to a certain level, it selectively crops and resizes
boxes from the corresponding feature maps to generate the box features.
We follow the ROIAlign technique (see https://arxiv.org/pdf/1703.06870.pdf,
figure 3 for reference). Specifically, for each feature map, we select an
(output_size, output_size) set of pixels corresponding to the box location,
and then use bilinear interpolation to select the feature value for each
pixel.
For performance, we perform the gather and interpolation on all layers as a
single operation. This is op the multi-level features are first stacked and
gathered into [2*output_size, 2*output_size] feature points. Then bilinear
interpolation is performed on the gathered feature points to generate
[output_size, output_size] RoIAlign feature map.
Here is the step-by-step algorithm:
1. The multi-level features are gathered into a
[batch_size, num_boxes, output_size*2, output_size*2, num_filters]
Tensor. The Tensor contains four neighboring feature points for each
vertice in the output grid.
2. Compute the interpolation kernel of shape
[batch_size, num_boxes, output_size*2, output_size*2]. The last 2 axis
can be seen as stacking 2x2 interpolation kernels for all vertices in the
output grid.
3. Element-wise multiply the gathered features and interpolation kernel.
Then apply 2x2 average pooling to reduce spatial dimension to
output_size.
Args:
features: a 5-D tensor of shape
[batch_size, num_levels, max_height, max_width, num_filters] where
cropping and resizing are based.
boxes: a 3-D tensor of shape [batch_size, num_boxes, 4] encoding the
information of each box w.r.t. the corresponding feature map.
boxes[:, :, 0:2] are the grid position in (y, x) (float) of the top-left
corner of each box. boxes[:, :, 2:4] are the box sizes in (h, w) (float)
in terms of the number of pixels of the corresponding feature map size.
box_levels: a 3-D tensor of shape [batch_size, num_boxes, 1] representing
the 0-based corresponding feature level index of each box.
boundaries: a 3-D tensor of shape [batch_size, num_boxes, 2] representing
the boundary (in (y, x)) of the corresponding feature map for each box.
Any resampled grid points that go beyond the bounary will be clipped.
output_size: a scalar indicating the output crop size.
is_gpu_inference: whether to build the model for GPU inference.
Returns:
features_per_box: a 5-D tensor of shape
[batch_size, num_boxes, output_size, output_size, num_filters]
representing the cropped features.
"""
(batch_size, num_levels, max_feature_height, max_feature_width,
num_filters) = features.get_shape().as_list()
_, num_boxes, _ = boxes.get_shape().as_list()
# Compute the grid position w.r.t. the corresponding feature map.
box_grid_x = []
box_grid_y = []
for i in range(output_size):
box_grid_x.append(boxes[:, :, 1:2] +
(i + 0.5) * boxes[:, :, 3:4] / output_size)
box_grid_y.append(boxes[:, :, 0:1] +
(i + 0.5) * boxes[:, :, 2:3] / output_size)
box_grid_x = tf.concat(box_grid_x, axis=-1)
box_grid_y = tf.concat(box_grid_y, axis=-1)
# Compute indices for gather operation.
box_grid_y0 = tf.floor(box_grid_y)
box_grid_x0 = tf.floor(box_grid_x)
box_grid_x0 = tf.maximum(0., box_grid_x0)
box_grid_y0 = tf.maximum(0., box_grid_y0)
box_gridx0x1 = tf.stack([
tf.minimum(box_grid_x0, boundaries[:, :, 1:2]),
tf.minimum(box_grid_x0 + 1, boundaries[:, :, 1:2])
],
axis=3)
box_gridy0y1 = tf.stack([
tf.minimum(box_grid_y0, boundaries[:, :, 0:1]),
tf.minimum(box_grid_y0 + 1, boundaries[:, :, 0:1])
],
axis=3)
x_indices = tf.reshape(box_gridx0x1, [batch_size, num_boxes, output_size * 2])
y_indices = tf.reshape(box_gridy0y1, [batch_size, num_boxes, output_size * 2])
# If using GPU for inference, delay the cast until when Gather ops show up
# since GPU inference supports float point better.
# TODO(laigd): revisit this when newer versions of GPU libraries is released.
indices_dtype = tf.float32 if is_gpu_inference else tf.int32
if not is_gpu_inference:
x_indices = tf.cast(x_indices, tf.int32)
y_indices = tf.cast(y_indices, tf.int32)
height_dim_offset = max_feature_width
level_dim_offset = max_feature_height * height_dim_offset
batch_dim_offset = num_levels * level_dim_offset
batch_dim_indices = (
tf.reshape(tf.range(batch_size, dtype=indices_dtype) * batch_dim_offset, [batch_size, 1, 1, 1]) *
tf.ones([1, num_boxes, output_size * 2, output_size * 2], dtype=indices_dtype)
)
box_level_indices = (
tf.reshape(box_levels * level_dim_offset, [batch_size, num_boxes, 1, 1]) *
tf.ones([1, 1, output_size * 2, output_size * 2], dtype=indices_dtype)
)
height_indices = (
tf.reshape(y_indices * height_dim_offset, [batch_size, num_boxes, output_size * 2, 1]) *
tf.ones([1, 1, 1, output_size * 2], dtype=indices_dtype)
)
width_indices = (
tf.reshape(x_indices, [batch_size, num_boxes, 1, output_size * 2]) *
tf.ones([1, 1, output_size * 2, 1], dtype=indices_dtype)
)
# TODO(hongjunchoi): Remove the need for temporary variables as
# temporary variables with
if True:
batch_dim_indices = tf.cast(batch_dim_indices, tf.float32)
box_level_indices = tf.cast(box_level_indices, tf.float32)
height_indices = tf.cast(height_indices, tf.float32)
width_indices = tf.cast(width_indices, tf.float32)
indices = tf.add_n([
batch_dim_indices,
box_level_indices,
height_indices,
width_indices,
])
indices = tf.cast(indices, tf.int32)
else: # TODO: Restore this API int32 dtype will be supported on GPUs.
indices = tf.add_n([
batch_dim_indices,
box_level_indices,
height_indices,
width_indices,
])
if batch_size == 1:
# Special handling for single batch input to make it friendly for GPU
# inference.
indices = tf.reshape(indices, [1, -1])
if is_gpu_inference:
indices = tf.cast(indices, dtype=tf.int32)
features = tf.reshape(features, [1, -1, num_filters])
# Cast should happen at last since GPU has better support for floating point
# operations.
features_per_box = tf.gather(features, indices, axis=1)
else:
indices = tf.reshape(indices, [-1])
if is_gpu_inference:
indices = tf.cast(indices, dtype=tf.int32)
features = tf.reshape(features, [-1, num_filters])
features_per_box = tf.gather(features, indices)
features_per_box = tf.reshape(
features_per_box,
[batch_size, num_boxes, output_size * 2, output_size * 2, num_filters]
)
# The RoIAlign feature f can be computed by bilinear interpolation of four
# neighboring feature points f0, f1, f2, and f3.
# f(y, x) = [hy, ly] * [[f00, f01], * [hx, lx]^T
# [f10, f11]]
# f(y, x) = (hy*hx)f00 + (hy*lx)f01 + (ly*hx)f10 + (lx*ly)f11
# f(y, x) = w00*f00 + w01*f01 + w10*f10 + w11*f11
ly = box_grid_y - box_grid_y0
lx = box_grid_x - box_grid_x0
hy = 1.0 - ly
hx = 1.0 - lx
kernel_x = tf.reshape(tf.stack([hx, lx], axis=3), [batch_size, num_boxes, 1, output_size * 2])
kernel_y = tf.reshape(tf.stack([hy, ly], axis=3), [batch_size, num_boxes, output_size * 2, 1])
# Use implicit broadcast to generate the interpolation kernel. The
# multiplier `4` is for avg pooling.
interpolation_kernel = kernel_y * kernel_x * 4
# Interpolate the gathered features with computed interpolation kernels.
features_per_box *= tf.cast(tf.expand_dims(interpolation_kernel, axis=4), dtype=features_per_box.dtype)
features_per_box = tf.reshape(
features_per_box,
[batch_size * num_boxes, output_size * 2, output_size * 2, num_filters]
)
features_per_box = tf.nn.avg_pool2d(features_per_box, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
features_per_box = tf.reshape(features_per_box, [batch_size, num_boxes, output_size, output_size, num_filters])
return features_per_box
def multilevel_crop_and_resize(features,
boxes,
output_size=7,
is_gpu_inference=False):
"""Crop and resize on multilevel feature pyramid.
Generate the (output_size, output_size) set of pixels for each input box
by first locating the box into the correct feature level, and then cropping
and resizing it using the correspoding feature map of that level.
Args:
features: A dictionary with key as pyramid level and value as features. The
features are in shape of [batch_size, height_l, width_l, num_filters].
boxes: A 3-D Tensor of shape [batch_size, num_boxes, 4]. Each row represents
a box with [y1, x1, y2, x2] in un-normalized coordinates.
output_size: A scalar to indicate the output crop size.
is_gpu_inference: whether to build the model for GPU inference.
Returns:
A 5-D tensor representing feature crop of shape
[batch_size, num_boxes, output_size, output_size, num_filters].
"""
with tf.name_scope('multilevel_crop_and_resize'):
levels = features.keys()
min_level = min(levels)
max_level = max(levels)
_, max_feature_height, max_feature_width, _ = (
features[min_level].get_shape().as_list())
# Stack feature pyramid into a features_all of shape
# [batch_size, levels, height, width, num_filters].
features_all = []
for level in range(min_level, max_level + 1):
features_all.append(tf.image.pad_to_bounding_box(features[level], 0, 0, max_feature_height, max_feature_width))
features_all = tf.stack(features_all, axis=1)
# Assign boxes to the right level.
box_width = tf.squeeze(boxes[:, :, 3:4] - boxes[:, :, 1:2], axis=-1)
box_height = tf.squeeze(boxes[:, :, 2:3] - boxes[:, :, 0:1], axis=-1)
areas_sqrt = tf.sqrt(box_height * box_width)
levels = tf.math.floordiv(tf.math.log(tf.divide(areas_sqrt, 224.0)), tf.math.log(2.0)) + 4.0
if not is_gpu_inference:
levels = tf.cast(levels, dtype=tf.int32)
# Map levels between [min_level, max_level].
levels = tf.minimum(
float(max_level) if is_gpu_inference else max_level,
tf.maximum(levels, float(min_level) if is_gpu_inference else min_level)
)
# Project box location and sizes to corresponding feature levels.
scale_to_level = tf.cast(
tf.pow(tf.constant(2.0), levels if is_gpu_inference else tf.cast(levels, tf.float32)),
dtype=boxes.dtype
)
boxes /= tf.expand_dims(scale_to_level, axis=2)
box_width /= scale_to_level
box_height /= scale_to_level
boxes = tf.concat(
[boxes[:, :, 0:2],
tf.expand_dims(box_height, -1),
tf.expand_dims(box_width, -1)],
axis=-1
)
# Map levels to [0, max_level-min_level].
levels -= min_level
level_strides = tf.pow([[2.0]], levels if is_gpu_inference else tf.cast(levels, tf.float32))
boundary = tf.cast(
tf.concat(
[
tf.expand_dims([[tf.cast(max_feature_height, tf.float32)]] / level_strides - 1, axis=-1),
tf.expand_dims([[tf.cast(max_feature_width, tf.float32)]] / level_strides - 1, axis=-1),
],
axis=-1
),
boxes.dtype
)
return selective_crop_and_resize(features_all, boxes, levels, boundary, output_size, is_gpu_inference)
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Training specific ops, including sampling, building targets, etc."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from mask_rcnn.utils import box_utils
from mask_rcnn.ops import spatial_transform_ops
from mask_rcnn.object_detection import balanced_positive_negative_sampler
_EPSILON = 1e-8
def _add_class_assignments(iou, gt_boxes, gt_labels):
"""Computes object category assignment for each box.
Args:
iou: a tensor for the iou matrix with a shape of
[batch_size, K, MAX_NUM_INSTANCES]. K is the number of post-nms RoIs
(i.e., rpn_post_nms_topn).
gt_boxes: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES, 4].
This tensor might have paddings with negative values. The coordinates
of gt_boxes are in the pixel coordinates of the scaled image scale.
gt_labels: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES]. This
tensor might have paddings with a value of -1.
Returns:
max_boxes: a tensor with a shape of [batch_size, K, 4], representing
the ground truth coordinates of each roi.
max_classes: a int32 tensor with a shape of [batch_size, K], representing
the ground truth class of each roi.
max_overlap: a tensor with a shape of [batch_size, K], representing
the maximum overlap of each roi.
argmax_iou: a tensor with a shape of [batch_size, K], representing the iou
argmax.
"""
with tf.name_scope('add_class_assignments'):
batch_size, _, _ = iou.get_shape().as_list()
argmax_iou = tf.argmax(input=iou, axis=2, output_type=tf.int32)
indices = tf.reshape(
argmax_iou + tf.expand_dims(tf.range(batch_size) * tf.shape(input=gt_labels)[1], 1),
shape=[-1]
)
max_classes = tf.reshape(tf.gather(tf.reshape(gt_labels, [-1, 1]), indices), [batch_size, -1])
max_overlap = tf.reduce_max(input_tensor=iou, axis=2)
bg_mask = tf.equal(max_overlap, tf.zeros_like(max_overlap))
max_classes = tf.where(bg_mask, tf.zeros_like(max_classes), max_classes)
max_boxes = tf.reshape(
tf.gather(tf.reshape(gt_boxes, [-1, 4]), indices),
[batch_size, -1, 4]
)
max_boxes = tf.where(
tf.tile(tf.expand_dims(bg_mask, axis=2), [1, 1, 4]),
tf.zeros_like(max_boxes),
max_boxes
)
return max_boxes, max_classes, max_overlap, argmax_iou
def encode_box_targets(boxes, gt_boxes, gt_labels, bbox_reg_weights):
"""Encodes predicted boxes with respect to ground truth boxes."""
with tf.name_scope('encode_box_targets'):
box_targets = box_utils.encode_boxes(boxes=gt_boxes, anchors=boxes, weights=bbox_reg_weights)
# If a target is background, the encoded box target should be zeros.
mask = tf.tile(tf.expand_dims(tf.equal(gt_labels, tf.zeros_like(gt_labels)), axis=2), [1, 1, 4])
box_targets = tf.where(mask, tf.zeros_like(box_targets), box_targets)
return box_targets
def proposal_label_op(boxes, gt_boxes, gt_labels,
batch_size_per_im=512, fg_fraction=0.25, fg_thresh=0.5,
bg_thresh_hi=0.5, bg_thresh_lo=0.):
"""Assigns the proposals with ground truth labels and performs subsmpling.
Given proposal `boxes`, `gt_boxes`, and `gt_labels`, the function uses the
following algorithm to generate the final `batch_size_per_im` RoIs.
1. Calculates the IoU between each proposal box and each gt_boxes.
2. Assigns each proposal box with a ground truth class and box label by
choosing the largest overlap.
3. Samples `batch_size_per_im` boxes from all proposal boxes, and returns
box_targets, class_targets, and RoIs.
The reference implementations of #1 and #2 are here:
https://github.com/facebookresearch/Detectron/blob/master/detectron/datasets/json_dataset.py
The reference implementation of #3 is here:
https://github.com/facebookresearch/Detectron/blob/master/detectron/roi_data/fast_rcnn.py
Args:
boxes: a tensor with a shape of [batch_size, N, 4]. N is the number of
proposals before groundtruth assignment (e.g., rpn_post_nms_topn). The
last dimension is the pixel coordinates of scaled images in
[ymin, xmin, ymax, xmax] form.
gt_boxes: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES, 4]. This
tensor might have paddings with a value of -1. The coordinates of gt_boxes
are in the pixel coordinates of the scaled image.
gt_labels: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES]. This
tensor might have paddings with a value of -1.
batch_size_per_im: a integer represents RoI minibatch size per image.
fg_fraction: a float represents the target fraction of RoI minibatch that
is labeled foreground (i.e., class > 0).
fg_thresh: a float represents the overlap threshold for an RoI to be
considered foreground (if >= fg_thresh).
bg_thresh_hi: a float represents the overlap threshold for an RoI to be
considered background (class = 0 if overlap in [LO, HI)).
bg_thresh_lo: a float represents the overlap threshold for an RoI to be
considered background (class = 0 if overlap in [LO, HI)).
Returns:
box_targets: a tensor with a shape of [batch_size, K, 4]. The tensor
contains the ground truth pixel coordinates of the scaled images for each
roi. K is the number of sample RoIs (e.g., batch_size_per_im).
class_targets: a integer tensor with a shape of [batch_size, K]. The tensor
contains the ground truth class for each roi.
rois: a tensor with a shape of [batch_size, K, 4], representing the
coordinates of the selected RoI.
proposal_to_label_map: a tensor with a shape of [batch_size, K]. This tensor
keeps the mapping between proposal to labels. proposal_to_label_map[i]
means the index of the ground truth instance for the i-th proposal.
"""
with tf.name_scope('proposal_label'):
batch_size = boxes.shape[0]
# The reference implementation intentionally includes ground truth boxes in
# the proposals.
# see https://github.com/facebookresearch/Detectron/blob/master/detectron/datasets/json_dataset.py#L359
boxes = tf.concat([boxes, gt_boxes], axis=1)
iou = box_utils.bbox_overlap(boxes, gt_boxes)
(pre_sample_box_targets, pre_sample_class_targets, max_overlap,
proposal_to_label_map) = _add_class_assignments(iou, gt_boxes, gt_labels)
# Generates a random sample of RoIs comprising foreground and background
# examples.
# reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/roi_data/fast_rcnn.py#L132
positives = tf.greater(max_overlap,
fg_thresh * tf.ones_like(max_overlap))
negatives = tf.logical_and(
tf.greater_equal(max_overlap, bg_thresh_lo * tf.ones_like(max_overlap)),
tf.less(max_overlap, bg_thresh_hi * tf.ones_like(max_overlap))
)
pre_sample_class_targets = tf.where(
negatives,
tf.zeros_like(pre_sample_class_targets),
pre_sample_class_targets
)
proposal_to_label_map = tf.where(
negatives,
tf.zeros_like(proposal_to_label_map),
proposal_to_label_map
)
# Handles ground truth paddings.
ignore_mask = tf.less(tf.reduce_min(input_tensor=iou, axis=2), tf.zeros_like(max_overlap))
# indicator includes both positive and negative labels.
# labels includes only positives labels.
# positives = indicator & labels.
# negatives = indicator & !labels.
# ignore = !indicator.
labels = positives
pos_or_neg = tf.logical_or(positives, negatives)
indicator = tf.logical_and(pos_or_neg, tf.logical_not(ignore_mask))
all_samples = []
sampler = balanced_positive_negative_sampler.BalancedPositiveNegativeSampler(
positive_fraction=fg_fraction,
is_static=True
)
# Batch-unroll the sub-sampling process.
for i in range(batch_size):
samples = sampler.subsample(indicator[i], batch_size_per_im, labels[i])
all_samples.append(samples)
all_samples = tf.stack([all_samples], axis=0)[0]
# A workaround to get the indices from the boolean tensors.
_, samples_indices = tf.nn.top_k(tf.cast(all_samples, dtype=tf.int32), k=batch_size_per_im, sorted=True)
# Contructs indices for gather.
samples_indices = tf.reshape(
samples_indices + tf.expand_dims(tf.range(batch_size) * tf.shape(input=boxes)[1], 1),
[-1]
)
rois = tf.reshape(
tf.gather(tf.reshape(boxes, [-1, 4]), samples_indices),
[batch_size, -1, 4]
)
class_targets = tf.reshape(
tf.gather(tf.reshape(pre_sample_class_targets, [-1, 1]), samples_indices),
[batch_size, -1]
)
sample_box_targets = tf.reshape(
tf.gather(tf.reshape(pre_sample_box_targets, [-1, 4]), samples_indices),
[batch_size, -1, 4]
)
sample_proposal_to_label_map = tf.reshape(
tf.gather(tf.reshape(proposal_to_label_map, [-1, 1]), samples_indices),
[batch_size, -1]
)
return sample_box_targets, class_targets, rois, sample_proposal_to_label_map
def select_fg_for_masks(class_targets, box_targets, boxes, proposal_to_label_map, max_num_fg=128):
"""Selects the fore ground objects for mask branch during training.
Args:
class_targets: a tensor of shape [batch_size, num_boxes] representing the
class label for each box.
box_targets: a tensor with a shape of [batch_size, num_boxes, 4]. The tensor
contains the ground truth pixel coordinates of the scaled images for each
roi.
boxes: A 3-D Tensor of shape [batch_size, num_boxes, 4]. Each row
represents a box with [y1, x1, y2, x2] in un-normalized coordinates.
proposal_to_label_map: a tensor with a shape of [batch_size, num_boxes].
This tensor keeps the mapping between proposal to labels.
proposal_to_label_map[i] means the index of the ground truth instance for
the i-th proposal.
max_num_fg: a integer represents the number of masks per image.
Returns:
class_targets, boxes, proposal_to_label_map, box_targets that have
foreground objects.
"""
# Masks are for positive (fg) objects only.
# Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/roi_data/mask_rcnn.py
batch_size = boxes.shape[0]
_, fg_indices = tf.nn.top_k(tf.cast(tf.greater(class_targets, 0), dtype=tf.float32), k=max_num_fg)
# Contructs indices for gather.
indices = tf.reshape(fg_indices + tf.expand_dims(tf.range(batch_size) * tf.shape(input=class_targets)[1], 1), [-1])
fg_class_targets = tf.reshape(
tf.gather(tf.reshape(class_targets, [-1, 1]), indices),
[batch_size, -1]
)
fg_box_targets = tf.reshape(
tf.gather(tf.reshape(box_targets, [-1, 4]), indices),
[batch_size, -1, 4]
)
fg_box_rois = tf.reshape(
tf.gather(tf.reshape(boxes, [-1, 4]), indices), [batch_size, -1, 4]
)
fg_proposal_to_label_map = tf.reshape(
tf.gather(tf.reshape(proposal_to_label_map, [-1, 1]), indices),
[batch_size, -1]
)
return (fg_class_targets, fg_box_targets, fg_box_rois,
fg_proposal_to_label_map)
def get_mask_targets(fg_boxes, fg_proposal_to_label_map, fg_box_targets, mask_gt_labels, output_size=28):
"""Crop and resize on multilevel feature pyramid.
Args:
fg_boxes: A 3-D tensor of shape [batch_size, num_masks, 4]. Each row
represents a box with [y1, x1, y2, x2] in un-normalized coordinates.
fg_proposal_to_label_map: A tensor of shape [batch_size, num_masks].
fg_box_targets: a float tensor representing the box label for each box
with a shape of [batch_size, num_masks, 4].
mask_gt_labels: A tensor with a shape of [batch_size, M, H+4, W+4]. M is
NUM_MAX_INSTANCES (i.e., 100 in this implementation) in each image, while
H and W are ground truth mask size. The `+4` comes from padding of two
zeros in both directions of height and width dimension.
output_size: A scalar to indicate the output crop size.
Returns:
A 4-D tensor representing feature crop of shape
[batch_size, num_boxes, output_size, output_size].
"""
_, _, max_feature_height, max_feature_width = mask_gt_labels.get_shape().as_list()
# proposal_to_label_map might have a -1 paddings.
levels = tf.maximum(fg_proposal_to_label_map, 0)
# Projects box location and sizes to corresponding cropped ground truth
# mask coordinates.
bb_y_min, bb_x_min, bb_y_max, bb_x_max = tf.split(value=fg_boxes, num_or_size_splits=4, axis=2)
gt_y_min, gt_x_min, gt_y_max, gt_x_max = tf.split(value=fg_box_targets, num_or_size_splits=4, axis=2)
valid_feature_width = max_feature_width - 4
valid_feature_height = max_feature_height - 4
y_transform = (bb_y_min - gt_y_min) * valid_feature_height / (gt_y_max - gt_y_min + _EPSILON) + 2
x_transform = (bb_x_min - gt_x_min) * valid_feature_width / (gt_x_max - gt_x_min + _EPSILON) + 2
h_transform = (bb_y_max - bb_y_min) * valid_feature_height / (gt_y_max - gt_y_min + _EPSILON)
w_transform = (bb_x_max - bb_x_min) * valid_feature_width / (gt_x_max - gt_x_min + _EPSILON)
boundaries = tf.concat(
[
tf.cast(tf.ones_like(y_transform) * (max_feature_height - 1), dtype=tf.float32),
tf.cast(tf.ones_like(x_transform) * (max_feature_width - 1), dtype=tf.float32)
],
axis=-1
)
features_per_box = spatial_transform_ops.selective_crop_and_resize(
tf.expand_dims(mask_gt_labels, -1),
tf.concat([y_transform, x_transform, h_transform, w_transform], -1),
tf.expand_dims(levels, -1),
boundaries,
output_size
)
features_per_box = tf.squeeze(features_per_box, axis=-1)
# Masks are binary outputs.
features_per_box = tf.where(
tf.greater_equal(features_per_box, 0.5),
tf.ones_like(features_per_box),
tf.zeros_like(features_per_box)
)
# mask_targets depend on box RoIs, which have gradients. This stop_gradient
# prevents the flow of gradient to box RoIs.
features_per_box = tf.stop_gradient(features_per_box)
return features_per_box
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Learning rate schedule."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
def step_learning_rate_with_linear_warmup(
global_step,
init_learning_rate,
warmup_learning_rate,
warmup_steps,
learning_rate_levels,
learning_rate_steps
):
"""Creates the step learning rate tensor with linear warmup."""
def warmup_lr_fn():
return warmup_learning_rate + \
tf.cast(global_step, dtype=tf.float32) / warmup_steps * (init_learning_rate - warmup_learning_rate)
def learning_rate_fn():
return tf.compat.v1.train.piecewise_constant(
global_step,
boundaries=learning_rate_steps,
values=[init_learning_rate] + learning_rate_levels
)
learning_rate = tf.where(
global_step < warmup_steps,
warmup_lr_fn(),
learning_rate_fn()
)
return learning_rate
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Losses used for Mask-RCNN."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from distutils.version import LooseVersion
import tensorflow as tf
DEBUG_LOSS_IMPLEMENTATION = False
if LooseVersion(tf.__version__) < LooseVersion("2.0.0"):
from tensorflow.python.keras.utils import losses_utils
ReductionV2 = losses_utils.ReductionV2
else:
ReductionV2 = tf.keras.losses.Reduction
def _huber_loss(y_true, y_pred, weights, delta):
num_non_zeros = tf.math.count_nonzero(weights, dtype=tf.float32)
huber_keras_loss = tf.keras.losses.Huber(
delta=delta,
reduction=ReductionV2.SUM,
name='huber_loss'
)
if LooseVersion(tf.__version__) >= LooseVersion("2.2.0"):
y_true = tf.expand_dims(y_true, axis=-1)
y_pred = tf.expand_dims(y_pred, axis=-1)
huber_loss = huber_keras_loss(
y_true,
y_pred,
sample_weight=weights
)
assert huber_loss.dtype == tf.float32
huber_loss = tf.math.divide_no_nan(huber_loss, num_non_zeros, name="huber_loss")
assert huber_loss.dtype == tf.float32
if DEBUG_LOSS_IMPLEMENTATION:
mlperf_loss = tf.compat.v1.losses.huber_loss(
y_true,
y_pred,
weights=weights,
delta=delta,
reduction=tf.compat.v1.losses.Reduction.SUM_BY_NONZERO_WEIGHTS
)
print_op = tf.print("Huber Loss - MLPerf:", mlperf_loss, " && Legacy Loss:", huber_loss)
with tf.control_dependencies([print_op]):
huber_loss = tf.identity(huber_loss)
return huber_loss
def _sigmoid_cross_entropy(multi_class_labels, logits, weights, sum_by_non_zeros_weights=False):
assert weights.dtype == tf.float32
sigmoid_cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(
labels=multi_class_labels,
logits=logits,
name="x-entropy"
)
assert sigmoid_cross_entropy.dtype == tf.float32
sigmoid_cross_entropy = tf.math.multiply(sigmoid_cross_entropy, weights)
sigmoid_cross_entropy = tf.math.reduce_sum(sigmoid_cross_entropy)
assert sigmoid_cross_entropy.dtype == tf.float32
if sum_by_non_zeros_weights:
num_non_zeros = tf.math.count_nonzero(weights, dtype=tf.float32)
sigmoid_cross_entropy = tf.math.divide_no_nan(
sigmoid_cross_entropy,
num_non_zeros,
name="sum_by_non_zeros_weights"
)
assert sigmoid_cross_entropy.dtype == tf.float32
if DEBUG_LOSS_IMPLEMENTATION:
if sum_by_non_zeros_weights:
reduction = tf.compat.v1.losses.Reduction.SUM_BY_NONZERO_WEIGHTS
else:
reduction = tf.compat.v1.losses.Reduction.SUM
mlperf_loss = tf.compat.v1.losses.sigmoid_cross_entropy(
multi_class_labels=multi_class_labels,
logits=logits,
weights=weights,
reduction=reduction
)
print_op = tf.print(
"Sigmoid X-Entropy Loss (%s) - MLPerf:" % reduction, mlperf_loss, " && Legacy Loss:", sigmoid_cross_entropy
)
with tf.control_dependencies([print_op]):
sigmoid_cross_entropy = tf.identity(sigmoid_cross_entropy)
return sigmoid_cross_entropy
def _softmax_cross_entropy(onehot_labels, logits):
num_non_zeros = tf.math.count_nonzero(onehot_labels, dtype=tf.float32)
softmax_cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
labels=onehot_labels,
logits=logits
)
assert softmax_cross_entropy.dtype == tf.float32
softmax_cross_entropy = tf.math.reduce_sum(softmax_cross_entropy)
softmax_cross_entropy = tf.math.divide_no_nan(softmax_cross_entropy, num_non_zeros, name="softmax_cross_entropy")
assert softmax_cross_entropy.dtype == tf.float32
if DEBUG_LOSS_IMPLEMENTATION:
mlperf_loss = tf.compat.v1.losses.softmax_cross_entropy(
onehot_labels=onehot_labels,
logits=logits,
reduction=tf.compat.v1.losses.Reduction.SUM_BY_NONZERO_WEIGHTS
)
print_op = tf.print("Softmax X-Entropy Loss - MLPerf:", mlperf_loss, " && Legacy Loss:", softmax_cross_entropy)
with tf.control_dependencies([print_op]):
softmax_cross_entropy = tf.identity(softmax_cross_entropy)
return softmax_cross_entropy
def _rpn_score_loss(score_outputs, score_targets, normalizer=1.0):
"""Computes score loss."""
with tf.name_scope('rpn_score_loss'):
# score_targets has three values:
# * (1) score_targets[i]=1, the anchor is a positive sample.
# * (2) score_targets[i]=0, negative.
# * (3) score_targets[i]=-1, the anchor is don't care (ignore).
mask = tf.math.greater_equal(score_targets, 0)
mask = tf.cast(mask, dtype=tf.float32)
score_targets = tf.maximum(score_targets, tf.zeros_like(score_targets))
score_targets = tf.cast(score_targets, dtype=tf.float32)
assert score_outputs.dtype == tf.float32
assert score_targets.dtype == tf.float32
score_loss = _sigmoid_cross_entropy(
multi_class_labels=score_targets,
logits=score_outputs,
weights=mask,
sum_by_non_zeros_weights=False
)
assert score_loss.dtype == tf.float32
if isinstance(normalizer, tf.Tensor) or normalizer != 1.0:
score_loss /= normalizer
assert score_loss.dtype == tf.float32
return score_loss
def _rpn_box_loss(box_outputs, box_targets, normalizer=1.0, delta=1. / 9):
"""Computes box regression loss."""
# delta is typically around the mean value of regression target.
# for instances, the regression targets of 512x512 input with 6 anchors on
# P2-P6 pyramid is about [0.1, 0.1, 0.2, 0.2].
with tf.name_scope('rpn_box_loss'):
mask = tf.not_equal(box_targets, 0.0)
mask = tf.cast(mask, tf.float32)
assert mask.dtype == tf.float32
# The loss is normalized by the sum of non-zero weights before additional
# normalizer provided by the function caller.
box_loss = _huber_loss(y_true=box_targets, y_pred=box_outputs, weights=mask, delta=delta)
assert box_loss.dtype == tf.float32
if isinstance(normalizer, tf.Tensor) or normalizer != 1.0:
box_loss /= normalizer
assert box_loss.dtype == tf.float32
return box_loss
def rpn_loss(score_outputs, box_outputs, labels, params):
"""Computes total RPN detection loss.
Computes total RPN detection loss including box and score from all levels.
Args:
score_outputs: an OrderDict with keys representing levels and values
representing scores in [batch_size, height, width, num_anchors].
box_outputs: an OrderDict with keys representing levels and values
representing box regression targets in
[batch_size, height, width, num_anchors * 4].
labels: the dictionary that returned from dataloader that includes
groundturth targets.
params: the dictionary including training parameters specified in
default_haprams function in this file.
Returns:
total_rpn_loss: a float tensor representing total loss reduced from
score and box losses from all levels.
rpn_score_loss: a float tensor representing total score loss.
rpn_box_loss: a float tensor representing total box regression loss.
"""
with tf.name_scope('rpn_loss'):
score_losses = []
box_losses = []
for level in range(int(params['min_level']), int(params['max_level'] + 1)):
score_targets_at_level = labels['score_targets_%d' % level]
box_targets_at_level = labels['box_targets_%d' % level]
score_losses.append(
_rpn_score_loss(
score_outputs=score_outputs[level],
score_targets=score_targets_at_level,
normalizer=tf.cast(params['train_batch_size'] * params['rpn_batch_size_per_im'], dtype=tf.float32)
)
)
box_losses.append(_rpn_box_loss(
box_outputs=box_outputs[level],
box_targets=box_targets_at_level,
normalizer=1.0
))
# Sum per level losses to total loss.
rpn_score_loss = tf.add_n(score_losses)
rpn_box_loss = params['rpn_box_loss_weight'] * tf.add_n(box_losses)
total_rpn_loss = rpn_score_loss + rpn_box_loss
return total_rpn_loss, rpn_score_loss, rpn_box_loss
def _fast_rcnn_class_loss(class_outputs, class_targets_one_hot, normalizer=1.0):
"""Computes classification loss."""
with tf.name_scope('fast_rcnn_class_loss'):
# The loss is normalized by the sum of non-zero weights before additional
# normalizer provided by the function caller.
class_loss = _softmax_cross_entropy(onehot_labels=class_targets_one_hot, logits=class_outputs)
if isinstance(normalizer, tf.Tensor) or normalizer != 1.0:
class_loss /= normalizer
return class_loss
def _fast_rcnn_box_loss(box_outputs, box_targets, class_targets, normalizer=1.0, delta=1.):
"""Computes box regression loss."""
# delta is typically around the mean value of regression target.
# for instances, the regression targets of 512x512 input with 6 anchors on
# P2-P6 pyramid is about [0.1, 0.1, 0.2, 0.2].
with tf.name_scope('fast_rcnn_box_loss'):
mask = tf.tile(tf.expand_dims(tf.greater(class_targets, 0), axis=2), [1, 1, 4])
# The loss is normalized by the sum of non-zero weights before additional
# normalizer provided by the function caller.
box_loss = _huber_loss(y_true=box_targets, y_pred=box_outputs, weights=mask, delta=delta)
if isinstance(normalizer, tf.Tensor) or normalizer != 1.0:
box_loss /= normalizer
return box_loss
def fast_rcnn_loss(class_outputs, box_outputs, class_targets, box_targets, params):
"""Computes the box and class loss (Fast-RCNN branch) of Mask-RCNN.
This function implements the classification and box regression loss of the
Fast-RCNN branch in Mask-RCNN. As the `box_outputs` produces `num_classes`
boxes for each RoI, the reference model expands `box_targets` to match the
shape of `box_outputs` and selects only the target that the RoI has a maximum
overlap. (Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/roi_data/fast_rcnn.py)
Instead, this function selects the `box_outputs` by the `class_targets` so
that it doesn't expand `box_targets`.
The loss computation has two parts: (1) classification loss is softmax on all
RoIs. (2) box loss is smooth L1-loss on only positive samples of RoIs.
Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/modeling/fast_rcnn_heads.py
Args:
class_outputs: a float tensor representing the class prediction for each box
with a shape of [batch_size, num_boxes, num_classes].
box_outputs: a float tensor representing the box prediction for each box
with a shape of [batch_size, num_boxes, num_classes * 4].
class_targets: a float tensor representing the class label for each box
with a shape of [batch_size, num_boxes].
box_targets: a float tensor representing the box label for each box
with a shape of [batch_size, num_boxes, 4].
params: the dictionary including training parameters specified in
default_haprams function in this file.
Returns:
total_loss: a float tensor representing total loss reducing from
class and box losses from all levels.
cls_loss: a float tensor representing total class loss.
box_loss: a float tensor representing total box regression loss.
"""
with tf.name_scope('fast_rcnn_loss'):
class_targets = tf.cast(class_targets, dtype=tf.int32)
# Selects the box from `box_outputs` based on `class_targets`, with which
# the box has the maximum overlap.
batch_size, num_rois, _ = box_outputs.get_shape().as_list()
box_outputs = tf.reshape(box_outputs, [batch_size, num_rois, params['num_classes'], 4])
box_indices = tf.reshape(
class_targets +
tf.tile(tf.expand_dims(tf.range(batch_size) * num_rois * params['num_classes'], 1), [1, num_rois]) +
tf.tile(tf.expand_dims(tf.range(num_rois) * params['num_classes'], 0), [batch_size, 1]),
[-1]
)
box_outputs = tf.matmul(
tf.one_hot(
box_indices,
batch_size * num_rois * params['num_classes'],
dtype=box_outputs.dtype
),
tf.reshape(box_outputs, [-1, 4])
)
box_outputs = tf.reshape(box_outputs, [batch_size, -1, 4])
box_loss = _fast_rcnn_box_loss(
box_outputs=box_outputs,
box_targets=box_targets,
class_targets=class_targets,
normalizer=1.0
)
box_loss *= params['fast_rcnn_box_loss_weight']
use_sparse_x_entropy = False
_class_targets = class_targets if use_sparse_x_entropy else tf.one_hot(class_targets, params['num_classes'])
class_loss = _fast_rcnn_class_loss(
class_outputs=class_outputs,
class_targets_one_hot=_class_targets,
normalizer=1.0
)
total_loss = class_loss + box_loss
return total_loss, class_loss, box_loss
def mask_rcnn_loss(mask_outputs, mask_targets, select_class_targets, params):
"""Computes the mask loss of Mask-RCNN.
This function implements the mask loss of Mask-RCNN. As the `mask_outputs`
produces `num_classes` masks for each RoI, the reference model expands
`mask_targets` to match the shape of `mask_outputs` and selects only the
target that the RoI has a maximum overlap.
(Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/roi_data/mask_rcnn.py)
Instead, this implementation selects the `mask_outputs` by the `class_targets`
so that it doesn't expand `mask_targets`. Note that the selection logic is
done in the post-processing of mask_rcnn_fn in mask_rcnn_architecture.py.
Args:
mask_outputs: a float tensor representing the prediction for each mask,
with a shape of
[batch_size, num_masks, mask_height, mask_width].
mask_targets: a float tensor representing the binary mask of ground truth
labels for each mask with a shape of
[batch_size, num_masks, mask_height, mask_width].
select_class_targets: a tensor with a shape of [batch_size, num_masks],
representing the foreground mask targets.
params: the dictionary including training parameters specified in
default_haprams function in this file.
Returns:
mask_loss: a float tensor representing total mask loss.
"""
with tf.name_scope('mask_loss'):
batch_size, num_masks, mask_height, mask_width = mask_outputs.get_shape().as_list()
weights = tf.tile(
tf.reshape(tf.greater(select_class_targets, 0), [batch_size, num_masks, 1, 1]),
[1, 1, mask_height, mask_width]
)
weights = tf.cast(weights, tf.float32)
loss = _sigmoid_cross_entropy(
multi_class_labels=mask_targets,
logits=mask_outputs,
weights=weights,
sum_by_non_zeros_weights=True
)
mrcnn_loss = params['mrcnn_weight_loss_mask'] * loss
return mrcnn_loss
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Util functions to manipulate boxes."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
# Standard Imports
import numpy as np
import tensorflow as tf
BBOX_XFORM_CLIP = np.log(1000. / 16.)
NMS_TILE_SIZE = 512
def bbox_overlap(boxes, gt_boxes):
"""Calculates the overlap between proposal and ground truth boxes.
Some `gt_boxes` may have been padded. The returned `iou` tensor for these
boxes will be -1.
Args:
boxes: a tensor with a shape of [batch_size, N, 4]. N is the number of
proposals before groundtruth assignment (e.g., rpn_post_nms_topn). The
last dimension is the pixel coordinates in [ymin, xmin, ymax, xmax] form.
gt_boxes: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES, 4]. This
tensor might have paddings with a negative value.
Returns:
iou: a tensor with as a shape of [batch_size, N, MAX_NUM_INSTANCES].
"""
with tf.name_scope('bbox_overlap'):
bb_y_min, bb_x_min, bb_y_max, bb_x_max = tf.split(value=boxes, num_or_size_splits=4, axis=2)
gt_y_min, gt_x_min, gt_y_max, gt_x_max = tf.split(value=gt_boxes, num_or_size_splits=4, axis=2)
# Calculates the intersection area.
i_xmin = tf.maximum(bb_x_min, tf.transpose(a=gt_x_min, perm=[0, 2, 1]))
i_xmax = tf.minimum(bb_x_max, tf.transpose(a=gt_x_max, perm=[0, 2, 1]))
i_ymin = tf.maximum(bb_y_min, tf.transpose(a=gt_y_min, perm=[0, 2, 1]))
i_ymax = tf.minimum(bb_y_max, tf.transpose(a=gt_y_max, perm=[0, 2, 1]))
i_area = tf.maximum((i_xmax - i_xmin), 0) * tf.maximum((i_ymax - i_ymin), 0)
# Calculates the union area.
bb_area = (bb_y_max - bb_y_min) * (bb_x_max - bb_x_min)
gt_area = (gt_y_max - gt_y_min) * (gt_x_max - gt_x_min)
# Adds a small epsilon to avoid divide-by-zero.
u_area = bb_area + tf.transpose(a=gt_area, perm=[0, 2, 1]) - i_area + 1e-8
# Calculates IoU.
iou = i_area / u_area
# Fills -1 for padded ground truth boxes.
padding_mask = tf.less(i_xmin, tf.zeros_like(i_xmin))
iou = tf.where(padding_mask, -tf.ones_like(iou), iou)
return iou
def top_k(scores, k, boxes_list):
"""A wrapper that returns top-k scores and correponding boxes.
This functions selects the top-k scores and boxes as follows.
indices = argsort(scores)[:k]
scores = scores[indices]
outputs = []
for boxes in boxes_list:
outputs.append(boxes[indices, :])
return scores, outputs
Args:
scores: a tensor with a shape of [batch_size, N]. N is the number of scores.
k: an integer for selecting the top-k elements.
boxes_list: a list containing at least one element. Each element has a shape
of [batch_size, N, 4].
Returns:
scores: the selected top-k scores with a shape of [batch_size, k].
outputs: the list containing the corresponding boxes in the order of the
input `boxes_list`.
"""
assert isinstance(boxes_list, list)
assert boxes_list # not empty list
batch_size, _ = scores.get_shape().as_list()
scores, top_k_indices = tf.nn.top_k(scores, k=k)
outputs = []
for boxes in boxes_list:
if batch_size == 1:
boxes = tf.squeeze(tf.gather(boxes, top_k_indices, axis=1), axis=1)
else:
boxes_index_offsets = tf.range(batch_size) * tf.shape(input=boxes)[1]
boxes_indices = tf.reshape(
top_k_indices + tf.expand_dims(boxes_index_offsets, 1), [-1])
boxes = tf.reshape(
tf.gather(tf.reshape(boxes, [-1, 4]), boxes_indices),
[batch_size, -1, 4])
outputs.append(boxes)
return scores, outputs
def _self_suppression(iou, _, iou_sum):
batch_size = tf.shape(input=iou)[0]
can_suppress_others = tf.cast(
tf.reshape(tf.reduce_max(input_tensor=iou, axis=1) <= 0.5, [batch_size, -1, 1]), iou.dtype)
iou_suppressed = tf.reshape(
tf.cast(tf.reduce_max(input_tensor=can_suppress_others * iou, axis=1) <= 0.5, iou.dtype),
[batch_size, -1, 1]) * iou
iou_sum_new = tf.reduce_sum(input_tensor=iou_suppressed, axis=[1, 2])
return [
iou_suppressed,
tf.reduce_any(input_tensor=iou_sum - iou_sum_new > 0.5), iou_sum_new
]
def _cross_suppression(boxes, box_slice, iou_threshold, inner_idx):
batch_size = tf.shape(input=boxes)[0]
new_slice = tf.slice(boxes, [0, inner_idx * NMS_TILE_SIZE, 0],
[batch_size, NMS_TILE_SIZE, 4])
iou = bbox_overlap(new_slice, box_slice)
ret_slice = tf.expand_dims(
tf.cast(tf.reduce_all(input_tensor=iou < iou_threshold, axis=[1]), box_slice.dtype),
2) * box_slice
return boxes, ret_slice, iou_threshold, inner_idx + 1
def _suppression_loop_body(boxes, iou_threshold, output_size, idx):
"""Process boxes in the range [idx*NMS_TILE_SIZE, (idx+1)*NMS_TILE_SIZE).
Args:
boxes: a tensor with a shape of [batch_size, anchors, 4].
iou_threshold: a float representing the threshold for deciding whether boxes
overlap too much with respect to IOU.
output_size: an int32 tensor of size [batch_size]. Representing the number
of selected boxes for each batch.
idx: an integer scalar representing induction variable.
Returns:
boxes: updated boxes.
iou_threshold: pass down iou_threshold to the next iteration.
output_size: the updated output_size.
idx: the updated induction variable.
"""
num_tiles = tf.shape(input=boxes)[1] // NMS_TILE_SIZE
batch_size = tf.shape(input=boxes)[0]
# Iterates over tiles that can possibly suppress the current tile.
box_slice = tf.slice(boxes, [0, idx * NMS_TILE_SIZE, 0],
[batch_size, NMS_TILE_SIZE, 4])
_, box_slice, _, _ = tf.while_loop(
cond=lambda _boxes, _box_slice, _threshold, inner_idx: inner_idx < idx,
body=_cross_suppression, loop_vars=[boxes, box_slice, iou_threshold,
tf.constant(0)])
# Iterates over the current tile to compute self-suppression.
iou = bbox_overlap(box_slice, box_slice)
mask = tf.expand_dims(
tf.reshape(tf.range(NMS_TILE_SIZE), [1, -1]) > tf.reshape(
tf.range(NMS_TILE_SIZE), [-1, 1]), 0)
iou *= tf.cast(tf.logical_and(mask, iou >= iou_threshold), iou.dtype)
suppressed_iou, _, _ = tf.while_loop(
cond=lambda _iou, loop_condition, _iou_sum: loop_condition, body=_self_suppression,
loop_vars=[iou, tf.constant(True),
tf.reduce_sum(input_tensor=iou, axis=[1, 2])])
suppressed_box = tf.reduce_sum(input_tensor=suppressed_iou, axis=1) > 0
box_slice *= tf.expand_dims(1.0 - tf.cast(suppressed_box, box_slice.dtype), 2)
# Uses box_slice to update the input boxes.
mask = tf.reshape(
tf.cast(tf.equal(tf.range(num_tiles), idx), boxes.dtype), [1, -1, 1, 1])
boxes = tf.tile(tf.expand_dims(
box_slice, [1]), [1, num_tiles, 1, 1]) * mask + tf.reshape(
boxes, [batch_size, num_tiles, NMS_TILE_SIZE, 4]) * (1 - mask)
boxes = tf.reshape(boxes, [batch_size, -1, 4])
# Updates output_size.
output_size += tf.reduce_sum(
input_tensor=tf.cast(tf.reduce_any(input_tensor=box_slice > 0, axis=[2]), tf.int32), axis=[1])
return boxes, iou_threshold, output_size, idx + 1
def sorted_non_max_suppression_padded(scores,
boxes,
max_output_size,
iou_threshold):
"""A wrapper that handles non-maximum suppression.
Assumption:
* The boxes are sorted by scores unless the box is a dot (all coordinates
are zero).
* Boxes with higher scores can be used to suppress boxes with lower scores.
The overal design of the algorithm is to handle boxes tile-by-tile:
boxes = boxes.pad_to_multiply_of(tile_size)
num_tiles = len(boxes) // tile_size
output_boxes = []
for i in range(num_tiles):
box_tile = boxes[i*tile_size : (i+1)*tile_size]
for j in range(i - 1):
suppressing_tile = boxes[j*tile_size : (j+1)*tile_size]
iou = bbox_overlap(box_tile, suppressing_tile)
# if the box is suppressed in iou, clear it to a dot
box_tile *= _update_boxes(iou)
# Iteratively handle the diagnal tile.
iou = _box_overlap(box_tile, box_tile)
iou_changed = True
while iou_changed:
# boxes that are not suppressed by anything else
suppressing_boxes = _get_suppressing_boxes(iou)
# boxes that are suppressed by suppressing_boxes
suppressed_boxes = _get_suppressed_boxes(iou, suppressing_boxes)
# clear iou to 0 for boxes that are suppressed, as they cannot be used
# to suppress other boxes any more
new_iou = _clear_iou(iou, suppressed_boxes)
iou_changed = (new_iou != iou)
iou = new_iou
# remaining boxes that can still suppress others, are selected boxes.
output_boxes.append(_get_suppressing_boxes(iou))
if len(output_boxes) >= max_output_size:
break
Args:
scores: a tensor with a shape of [batch_size, anchors].
boxes: a tensor with a shape of [batch_size, anchors, 4].
max_output_size: a scalar integer `Tensor` representing the maximum number
of boxes to be selected by non max suppression.
iou_threshold: a float representing the threshold for deciding whether boxes
overlap too much with respect to IOU.
Returns:
nms_scores: a tensor with a shape of [batch_size, anchors]. It has same
dtype as input scores.
nms_proposals: a tensor with a shape of [batch_size, anchors, 4]. It has
same dtype as input boxes.
"""
batch_size = tf.shape(input=boxes)[0]
num_boxes = tf.shape(input=boxes)[1]
pad = tf.cast(
tf.math.ceil(tf.cast(num_boxes, tf.float32) / NMS_TILE_SIZE),
tf.int32) * NMS_TILE_SIZE - num_boxes
boxes = tf.pad(tensor=tf.cast(boxes, tf.float32), paddings=[[0, 0], [0, pad], [0, 0]])
scores = tf.pad(tensor=tf.cast(scores, tf.float32), paddings=[[0, 0], [0, pad]])
num_boxes += pad
def _loop_cond(unused_boxes, unused_threshold, output_size, idx):
return tf.logical_and(
tf.reduce_min(input_tensor=output_size) < max_output_size,
idx < num_boxes // NMS_TILE_SIZE)
selected_boxes, _, output_size, _ = tf.while_loop(
cond=_loop_cond, body=_suppression_loop_body, loop_vars=[
boxes, iou_threshold,
tf.zeros([batch_size], tf.int32),
tf.constant(0)
])
idx = num_boxes - tf.cast(
tf.nn.top_k(
tf.cast(tf.reduce_any(input_tensor=selected_boxes > 0, axis=[2]), tf.int32) *
tf.expand_dims(tf.range(num_boxes, 0, -1), 0), max_output_size)[0],
tf.int32)
idx = tf.minimum(idx, num_boxes - 1)
idx = tf.reshape(
idx + tf.reshape(tf.range(batch_size) * num_boxes, [-1, 1]), [-1])
boxes = tf.reshape(
tf.gather(tf.reshape(boxes, [-1, 4]), idx),
[batch_size, max_output_size, 4])
boxes = boxes * tf.cast(
tf.reshape(tf.range(max_output_size), [1, -1, 1]) < tf.reshape(
output_size, [-1, 1, 1]), boxes.dtype)
scores = tf.reshape(
tf.gather(tf.reshape(scores, [-1, 1]), idx),
[batch_size, max_output_size])
scores = scores * tf.cast(
tf.reshape(tf.range(max_output_size), [1, -1]) < tf.reshape(
output_size, [-1, 1]), scores.dtype)
return scores, boxes
def encode_boxes(boxes, anchors, weights=None):
"""Encode boxes to targets.
Args:
boxes: a tensor whose last dimension is 4 representing the coordinates
of boxes in ymin, xmin, ymax, xmax order.
anchors: a tensor whose shape is the same as `boxes` representing the
coordinates of anchors in ymin, xmin, ymax, xmax order.
weights: None or a list of four float numbers used to scale coordinates.
Returns:
encoded_boxes: a tensor whose shape is the same as `boxes` representing the
encoded box targets.
"""
with tf.name_scope('encode_box'):
boxes = tf.cast(boxes, dtype=anchors.dtype)
y_min, x_min, y_max, x_max = tf.split(boxes, 4, axis=-1)
# y_min = boxes[..., 0:1]
# x_min = boxes[..., 1:2]
# y_max = boxes[..., 2:3]
# x_max = boxes[..., 3:4]
box_h = y_max - y_min + 1.0
box_w = x_max - x_min + 1.0
box_yc = y_min + 0.5 * box_h
box_xc = x_min + 0.5 * box_w
anchor_ymin, anchor_xmin, anchor_ymax, anchor_xmax = tf.split(anchors, 4, axis=-1)
# anchor_ymin = anchors[..., 0:1]
# anchor_xmin = anchors[..., 1:2]
# anchor_ymax = anchors[..., 2:3]
# anchor_xmax = anchors[..., 3:4]
anchor_h = anchor_ymax - anchor_ymin + 1.0
anchor_w = anchor_xmax - anchor_xmin + 1.0
anchor_yc = anchor_ymin + 0.5 * anchor_h
anchor_xc = anchor_xmin + 0.5 * anchor_w
encoded_dy = (box_yc - anchor_yc) / anchor_h
encoded_dx = (box_xc - anchor_xc) / anchor_w
encoded_dh = tf.math.log(box_h / anchor_h)
encoded_dw = tf.math.log(box_w / anchor_w)
if weights:
encoded_dy *= weights[0]
encoded_dx *= weights[1]
encoded_dh *= weights[2]
encoded_dw *= weights[3]
encoded_boxes = tf.concat([encoded_dy, encoded_dx, encoded_dh, encoded_dw], axis=-1)
return encoded_boxes
def decode_boxes(encoded_boxes, anchors, weights=None):
"""Decode boxes.
Args:
encoded_boxes: a tensor whose last dimension is 4 representing the
coordinates of encoded boxes in ymin, xmin, ymax, xmax order.
anchors: a tensor whose shape is the same as `boxes` representing the
coordinates of anchors in ymin, xmin, ymax, xmax order.
weights: None or a list of four float numbers used to scale coordinates.
Returns:
encoded_boxes: a tensor whose shape is the same as `boxes` representing the
decoded box targets.
"""
with tf.name_scope('decode_box'):
encoded_boxes = tf.cast(encoded_boxes, dtype=anchors.dtype)
dy, dx, dh, dw = tf.split(encoded_boxes, 4, axis=-1)
# dy = encoded_boxes[..., 0:1]
# dx = encoded_boxes[..., 1:2]
# dh = encoded_boxes[..., 2:3]
# dw = encoded_boxes[..., 3:4]
if weights:
dy /= weights[0]
dx /= weights[1]
dh /= weights[2]
dw /= weights[3]
dh = tf.minimum(dh, BBOX_XFORM_CLIP)
dw = tf.minimum(dw, BBOX_XFORM_CLIP)
anchor_ymin, anchor_xmin, anchor_ymax, anchor_xmax = tf.split(anchors, 4, axis=-1)
# anchor_ymin = anchors[..., 0:1]
# anchor_xmin = anchors[..., 1:2]
# anchor_ymax = anchors[..., 2:3]
# anchor_xmax = anchors[..., 3:4]
anchor_h = anchor_ymax - anchor_ymin + 1.0
anchor_w = anchor_xmax - anchor_xmin + 1.0
anchor_yc = anchor_ymin + 0.5 * anchor_h
anchor_xc = anchor_xmin + 0.5 * anchor_w
decoded_boxes_yc = dy * anchor_h + anchor_yc
decoded_boxes_xc = dx * anchor_w + anchor_xc
decoded_boxes_h = tf.exp(dh) * anchor_h
decoded_boxes_w = tf.exp(dw) * anchor_w
decoded_boxes_ymin = decoded_boxes_yc - 0.5 * decoded_boxes_h
decoded_boxes_xmin = decoded_boxes_xc - 0.5 * decoded_boxes_w
decoded_boxes_ymax = decoded_boxes_ymin + decoded_boxes_h - 1.0
decoded_boxes_xmax = decoded_boxes_xmin + decoded_boxes_w - 1.0
decoded_boxes = tf.concat(
[decoded_boxes_ymin, decoded_boxes_xmin, decoded_boxes_ymax, decoded_boxes_xmax],
axis=-1
)
return decoded_boxes
def clip_boxes(boxes, height, width):
"""Clip boxes.
Args:
boxes: a tensor whose last dimension is 4 representing the coordinates
of boxes in ymin, xmin, ymax, xmax order.
height: an integer, a scalar or a tensor such as all but the last dimensions
are the same as `boxes`. The last dimension is 1. It represents the height
of the image.
width: an integer, a scalar or a tensor such as all but the last dimensions
are the same as `boxes`. The last dimension is 1. It represents the width
of the image.
Returns:
clipped_boxes: a tensor whose shape is the same as `boxes` representing the
clipped boxes.
"""
with tf.name_scope('clip_box'):
y_min, x_min, y_max, x_max = tf.split(boxes, 4, axis=-1)
# y_min = boxes[..., 0:1]
# x_min = boxes[..., 1:2]
# y_max = boxes[..., 2:3]
# x_max = boxes[..., 3:4]
height = tf.cast(height, dtype=boxes.dtype)
width = tf.cast(width, dtype=boxes.dtype)
clipped_y_min = tf.maximum(tf.minimum(y_min, height - 1.0), 0.0)
clipped_y_max = tf.maximum(tf.minimum(y_max, height - 1.0), 0.0)
clipped_x_min = tf.maximum(tf.minimum(x_min, width - 1.0), 0.0)
clipped_x_max = tf.maximum(tf.minimum(x_max, width - 1.0), 0.0)
clipped_boxes = tf.concat([clipped_y_min, clipped_x_min, clipped_y_max, clipped_x_max], axis=-1)
return clipped_boxes
def filter_boxes(boxes, scores, min_size, height, width, scale):
"""Filter out boxes that are too small.
Args:
boxes: a tensor whose last dimension is 4 representing the coordinates
of boxes in ymin, xmin, ymax, xmax order.
scores: a tensor such as all but the last dimensions are the same as
`boxes`. The last dimension is 1. It represents the scores.
min_size: an integer specifying the minimal size.
height: an integer, a scalar or a tensor such as all but the last dimensions
are the same as `boxes`. The last dimension is 1. It represents the height
of the image.
width: an integer, a scalar or a tensor such as all but the last dimensions
are the same as `boxes`. The last dimension is 1. It represents the width
of the image.
scale: an integer, a scalar or a tensor such as all but the last dimensions
are the same as `boxes`. The last dimension is 1. It represents the scale
of the image.
Returns:
filtered_boxes: a tensor whose shape is the same as `boxes` representing the
filtered boxes.
filtered_scores: a tensor whose shape is the same as `scores` representing
the filtered scores.
"""
with tf.name_scope('filter_box'):
y_min, x_min, y_max, x_max = tf.split(boxes, 4, axis=-1)
# y_min = boxes[..., 0:1]
# x_min = boxes[..., 1:2]
# y_max = boxes[..., 2:3]
# x_max = boxes[..., 3:4]
h = y_max - y_min + 1.0
w = x_max - x_min + 1.0
yc = y_min + h / 2.0
xc = x_min + w / 2.0
height = tf.cast(height, dtype=boxes.dtype)
width = tf.cast(width, dtype=boxes.dtype)
scale = tf.cast(scale, dtype=boxes.dtype)
min_size = tf.cast(tf.maximum(min_size, 1), dtype=boxes.dtype)
size_mask = tf.logical_and(
tf.greater_equal(h, min_size * scale),
tf.greater_equal(w, min_size * scale)
)
center_mask = tf.logical_and(tf.less(yc, height), tf.less(xc, width))
selected_mask = tf.logical_and(size_mask, center_mask)
filtered_scores = tf.where(selected_mask, scores, tf.zeros_like(scores))
filtered_boxes = tf.cast(selected_mask, dtype=boxes.dtype) * boxes
return filtered_boxes, filtered_scores
def to_normalized_coordinates(boxes, height, width):
"""Converted absolute box coordinates to normalized ones.
Args:
boxes: a tensor whose last dimension is 4 representing the coordinates
of boxes in ymin, xmin, ymax, xmax order.
height: an integer, a scalar or a tensor such as all but the last dimensions
are the same as `boxes`. The last dimension is 1. It represents the height
of the image.
width: an integer, a scalar or a tensor such as all but the last dimensions
are the same as `boxes`. The last dimension is 1. It represents the width
of the image.
Returns:
normalized_boxes: a tensor whose shape is the same as `boxes` representing
the boxes in normalized coordinates.
"""
with tf.name_scope('normalize_box'):
height = tf.cast(height, dtype=boxes.dtype)
width = tf.cast(width, dtype=boxes.dtype)
y_min, x_min, y_max, x_max = tf.split(boxes, 4, axis=-1)
y_min = y_min / height
x_min = x_min / width
y_max = y_max / height
x_max = x_max / width
# y_min = boxes[..., 0:1] / height
# x_min = boxes[..., 1:2] / width
# y_max = boxes[..., 2:3] / height
# x_max = boxes[..., 3:4] / width
normalized_boxes = tf.concat([y_min, x_min, y_max, x_max], axis=-1)
return normalized_boxes
def to_absolute_coordinates(boxes, height, width):
"""Converted normalized box coordinates to absolute ones.
Args:
boxes: a tensor whose last dimension is 4 representing the coordinates
of boxes in ymin, xmin, ymax, xmax order.
height: an integer, a scalar or a tensor such as all but the last dimensions
are the same as `boxes`. The last dimension is 1. It represents the height
of the image.
width: an integer, a scalar or a tensor such as all but the last dimensions
are the same as `boxes`. The last dimension is 1. It represents the width
of the image.
Returns:
absolute_boxes: a tensor whose shape is the same as `boxes` representing the
boxes in absolute coordinates.
"""
with tf.name_scope('denormalize_box'):
height = tf.cast(height, dtype=boxes.dtype)
width = tf.cast(width, dtype=boxes.dtype)
y_min, x_min, y_max, x_max = tf.split(boxes, 4, axis=-1)
y_min = y_min * height
x_min = x_min * width
y_max = y_max * height
x_max = x_max * width
# y_min = boxes[..., 0:1] * height
# x_min = boxes[..., 1:2] * width
# y_max = boxes[..., 2:3] * height
# x_max = boxes[..., 3:4] * width
absolute_boxes = tf.concat([y_min, x_min, y_max, x_max], axis=-1)
return absolute_boxes
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Util functions to manipulate masks."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import pycocotools.mask as coco_mask
POLYGON_PAD_VALUE = -3
POLYGON_SEPARATOR = -1
MASK_SEPARATOR = -2
def _np_array_split(a, v):
"""Split numpy array by separator value.
Args:
a: 1-D numpy.array.
v: number. Separator value. e.g -1.
Returns:
2-D list of clean separated arrays.
Example:
a = [1, 2, 3, 4, -1, 5, 6, 7, 8]
b = _np_array_split(a, -1)
# Output: b = [[1, 2, 3, 4], [5, 6, 7, 8]]
"""
a = np.array(a)
arrs = np.split(a, np.where(a[:] == v)[0])
return [e if (len(e) <= 0 or e[0] != v) else e[1:] for e in arrs]
def _unflat_polygons(x):
"""Unflats/recovers 1-d padded polygons to 3-d polygon list.
Args:
x: numpay.array. shape [num_elements, 1], num_elements = num_obj *
num_vertex + padding.
Returns:
A list of three dimensions: [#obj, #polygon, #vertex]
"""
num_segs = _np_array_split(x, MASK_SEPARATOR)
polygons = []
for s in num_segs:
polygons.append(_np_array_split(s, POLYGON_SEPARATOR))
polygons = [[polygon.tolist() for polygon in obj] for obj in polygons] # pylint: disable=g-complex-comprehension
return polygons
def _denormalize_to_coco_bbox(bbox, height, width):
"""Denormalize bounding box.
Args:
bbox: numpy.array[float]. Normalized bounding box. Format: ['ymin', 'xmin',
'ymax', 'xmax'].
height: int. image height.
width: int. image width.
Returns:
[x, y, width, height]
"""
y1, x1, y2, x2 = bbox
y1 *= height
x1 *= width
y2 *= height
x2 *= width
box_height = y2 - y1
box_width = x2 - x1
return [float(x1), float(y1), float(box_width), float(box_height)]
def _extract_image_info(prediction, b):
return {
'id': int(prediction['source_id'][b]),
'width': int(prediction['width'][b]),
'height': int(prediction['height'][b]),
}
def _extract_bbox_annotation(prediction, b, obj_i):
"""Constructs COCO format bounding box annotation."""
height = prediction['height'][b]
width = prediction['width'][b]
bbox = _denormalize_to_coco_bbox(
prediction['groundtruth_boxes'][b][obj_i, :], height, width)
if 'groundtruth_area' in prediction:
area = float(prediction['groundtruth_area'][b][obj_i])
else:
# Using the box area to replace the polygon area. This value will not affect
# real evaluation but may fail the unit test.
area = bbox[2] * bbox[3]
annotation = {
'id': b * 1000 + obj_i, # place holder of annotation id.
'image_id': int(prediction['source_id'][b]), # source_id,
'category_id': int(prediction['groundtruth_classes'][b][obj_i]),
'bbox': bbox,
'iscrowd': int(prediction['groundtruth_is_crowd'][b][obj_i]),
'area': area,
'segmentation': [],
}
return annotation
def _extract_polygon_info(prediction, polygons, b, obj_i):
"""Constructs 'area' and 'segmentation' fields.
Args:
prediction: dict[str, numpy.array]. Model outputs. The value dimension is
[batch_size, #objects, #features, ...]
polygons: list[list[list]]. Dimensions are [#objects, #polygon, #vertex].
b: batch index.
obj_i: object index.
Returns:
dict[str, numpy.array]. COCO format annotation with 'area' and
'segmentation'.
"""
annotation = {}
if 'groundtruth_area' in prediction:
groundtruth_area = float(prediction['groundtruth_area'][b][obj_i])
else:
height = prediction['height'][b]
width = prediction['width'][b]
rles = coco_mask.frPyObjects(polygons[obj_i], height, width)
groundtruth_area = coco_mask.area(rles)
annotation['area'] = groundtruth_area
annotation['segmentation'] = polygons[obj_i]
# Add dummy polygon to is_crowd instance.
if not annotation['segmentation'][0]:
# Adds a dummy polygon in case there is no segmentation.
# Note that this could affect eval number in a very tiny amount since
# for the instance without masks, it creates a fake single pixel mask
# in the center of the box.
height = prediction['height'][b]
width = prediction['width'][b]
bbox = _denormalize_to_coco_bbox(
prediction['groundtruth_boxes'][b][obj_i, :], height, width)
xcenter = bbox[0] + bbox[2] / 2.0
ycenter = bbox[1] + bbox[3] / 2.0
annotation['segmentation'] = [[
xcenter, ycenter, xcenter, ycenter, xcenter, ycenter, xcenter, ycenter
]]
return annotation
def _extract_categories(annotations):
"""Extract categories from annotations."""
categories = {}
for anno in annotations:
category_id = int(anno['category_id'])
categories[category_id] = {'id': category_id}
return list(categories.values())
def extract_coco_groundtruth(prediction, include_mask=False):
"""Extract COCO format groundtruth.
Args:
prediction: dictionary of batch of prediction result. the first dimension
each element is the batch.
include_mask: True for including masks in the output annotations.
Returns:
Tuple of (images, annotations).
images: list[dict].Required keys: 'id', 'width' and 'height'. The values are
image id, width and height.
annotations: list[dict]. Required keys: {'id', 'source_id', 'category_id',
'bbox', 'iscrowd'} when include_mask=False. If include_mask=True, also
required {'area', 'segmentation'}. The 'id' value is the annotation id
and can be any **positive** number (>=1).
Refer to http://cocodataset.org/#format-data for more details.
Raises:
ValueError: If any groundtruth fields is missing.
"""
required_fields = [
'source_id', 'width', 'height', 'num_groundtruth_labels',
'groundtruth_boxes', 'groundtruth_classes'
]
if include_mask:
required_fields += ['groundtruth_polygons', 'groundtruth_area']
for key in required_fields:
if key not in prediction.keys():
raise ValueError('Missing groundtruth field: "{}" keys: {}'.format(
key, prediction.keys()))
images = []
annotations = []
for b in range(prediction['source_id'].shape[0]):
# Constructs image info.
image = _extract_image_info(prediction, b)
images.append(image)
if include_mask:
flatten_padded_polygons = prediction['groundtruth_polygons'][b]
flatten_polygons = np.delete(
flatten_padded_polygons,
np.where(flatten_padded_polygons[:] == POLYGON_PAD_VALUE)[0])
polygons = _unflat_polygons(flatten_polygons)
# Constructs annotations.
num_labels = prediction['num_groundtruth_labels'][b]
for obj_i in range(num_labels):
annotation = _extract_bbox_annotation(prediction, b, obj_i)
if include_mask:
polygon_info = _extract_polygon_info(prediction, polygons, b, obj_i)
annotation.update(polygon_info)
annotations.append(annotation)
return images, annotations
def create_coco_format_dataset(images,
annotations,
regenerate_annotation_id=True):
"""Creates COCO format dataset with COCO format images and annotations."""
if regenerate_annotation_id:
for i in range(len(annotations)):
# WARNING: The annotation id must be positive.
annotations[i]['id'] = i + 1
categories = _extract_categories(annotations)
dataset = {
'images': images,
'annotations': annotations,
'categories': categories,
}
return dataset
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
import atexit
import functools
import inspect
import signal
import wrapt
__all__ = ["atexit_hook"]
_executed_exit_fns = set()
_registered_exit_fns = set()
_registered_objects = set()
def register_atexit_fn(fun=None, signals=None, logfun=lambda s: print(s, file=sys.stderr)):
"""Register a function which will be executed on "normal"
interpreter exit or in case one of the `signals` is received
by this process (differently from atexit.register()).
Also, it makes sure to execute any other function which was
previously registered via signal.signal(). If any, it will be
executed after our own `fun`.
Functions which were already registered or executed via this
function will be ignored.
Note: there's no way to escape SIGKILL, SIGSTOP or os._exit(0)
so don't bother trying.
You can use this either as a function or as a decorator:
@register_atexit_fn
def cleanup():
pass
# ...or
register_atexit_fn(cleanup)
Note about Windows: I tested this some time ago and didn't work
exactly the same as on UNIX, then I didn't care about it
anymore and didn't test since then so may not work on Windows.
Parameters:
- fun: a callable
- signals: a list of signals for which this function will be
executed (default SIGTERM)
- logfun: a logging function which is called when a signal is
received. Default: print to standard error. May be set to
None if no logging is desired.
"""
'''
Source: https://github.com/torvalds/linux/blob/master/include/linux/signal.h
* +--------------------+-----------------+
* | POSIX signal | default action |
* +--------------------+-----------------+
* | SIGHUP | terminate |
* | SIGINT | terminate |
* | SIGQUIT | coredump |
* | SIGILL | coredump |
* | SIGTRAP | coredump |
* | SIGABRT/SIGIOT | coredump |
* | SIGBUS | coredump |
* | SIGFPE | coredump |
* | SIGKILL | terminate(+) |
* | SIGUSR1 | terminate |
* | SIGSEGV | coredump |
* | SIGUSR2 | terminate |
* | SIGPIPE | terminate |
* | SIGALRM | terminate |
* | SIGTERM | terminate |
* | SIGCHLD | ignore |
* | SIGCONT | ignore(*) |
* | SIGSTOP | stop(*)(+) |
* | SIGTSTP | stop(*) |
* | SIGTTIN | stop(*) |
* | SIGTTOU | stop(*) |
* | SIGURG | ignore |
* | SIGXCPU | coredump |
* | SIGXFSZ | coredump |
* | SIGVTALRM | terminate |
* | SIGPROF | terminate |
* | SIGPOLL/SIGIO | terminate |
* | SIGSYS/SIGUNUSED | coredump |
* | SIGSTKFLT | terminate |
* | SIGWINCH | ignore |
* | SIGPWR | terminate |
* | SIGRTMIN-SIGRTMAX | terminate |
* +--------------------+-----------------+
* | non-POSIX signal | default action |
* +--------------------+-----------------+
* | SIGEMT | coredump |
* +--------------------+-----------------+
'''
if signals is None:
signals = [signal.SIGTERM]
def stringify_sig(signum):
if sys.version_info < (3, 5):
smap = dict([(getattr(signal, x), x) for x in dir(signal) if x.startswith('SIG')])
return smap.get(signum, signum)
else:
return signum
def fun_wrapper():
if fun not in _executed_exit_fns:
try:
fun()
finally:
_executed_exit_fns.add(fun)
def signal_wrapper(signum=None, frame=None):
if signum is not None:
if logfun is not None:
logfun("signal {} received by process with PID {}".format(stringify_sig(signum), os.getpid()))
fun_wrapper()
# Only return the original signal this process was hit with
# in case fun returns with no errors, otherwise process will
# return with sig 1.
if signum is not None:
if signum == signal.SIGINT:
raise KeyboardInterrupt
# XXX - should we do the same for SIGTERM / SystemExit?
sys.exit(signum)
def register_fun(fun, signals):
if not callable(fun):
raise TypeError("{!r} is not callable".format(fun))
set([fun]) # raise exc if obj is not hash-able
signals = set(signals)
for sig in signals:
# Register function for this signal and pop() the previously
# registered one (if any). This can either be a callable,
# SIG_IGN (ignore signal) or SIG_DFL (perform default action
# for signal).
old_handler = signal.signal(sig, signal_wrapper)
if old_handler not in (signal.SIG_DFL, signal.SIG_IGN):
# ...just for extra safety.
if not callable(old_handler):
continue
# This is needed otherwise we'll get a KeyboardInterrupt
# strace on interpreter exit, even if the process exited
# with sig 0.
if (sig == signal.SIGINT and old_handler is signal.default_int_handler):
continue
# There was a function which was already registered for this
# signal. Register it again so it will get executed (after our
# new fun).
if old_handler not in _registered_exit_fns:
atexit.register(old_handler)
_registered_exit_fns.add(old_handler)
# This further registration will be executed in case of clean
# interpreter exit (no signals received).
if fun not in _registered_exit_fns or not signals:
atexit.register(fun_wrapper)
_registered_exit_fns.add(fun)
# This piece of machinery handles 3 usage cases. register_atexit_fn()
# used as:
# - a function
# - a decorator without parentheses
# - a decorator with parentheses
if fun is None:
@functools.wraps
def outer(fun):
return register_fun(fun, signals)
return outer
else:
register_fun(fun, signals)
return fun
def atexit_hook(*args, **kwargs):
@wrapt.decorator
def wrapper(wrapped, instance, args, kwargs):
if not hasattr(wrapped, "__atexit__"):
raise AttributeError("The class `%s` does not have an `__atexit__` method" % wrapped.__name__)
def _func():
if instance is None:
if inspect.isclass(wrapped):
# Decorator was applied to a class.
return wrapped(*args, **kwargs)
else:
# Decorator was applied to a function or staticmethod.
return wrapped(*args, **kwargs)
else:
if inspect.isclass(instance):
# Decorator was applied to a classmethod.
return wrapped(*args, **kwargs)
else:
# Decorator was applied to an instancemethod.
return wrapped(*args, **kwargs)
_impl = _func()
object_id = hex(id(_impl))
if object_id not in _registered_objects:
register_atexit_fn(fun=_impl.__atexit__, signals=[signal.SIGTERM, signal.SIGINT])
_registered_objects.add(object_id)
return _impl
return wrapper(*args, **kwargs)
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
__all__ = ["MPI_local_rank", "MPI_rank", "MPI_size", "MPI_rank_and_size", "MPI_is_distributed"]
def MPI_is_distributed():
"""Return a boolean whether a distributed training/inference runtime is being used.
:return: bool
"""
if all([var in os.environ for var in ["OMPI_COMM_WORLD_RANK", "OMPI_COMM_WORLD_SIZE"]]):
return True
elif all([var in os.environ for var in ["SLURM_PROCID", "SLURM_NTASKS"]]):
return True
else:
return False
def MPI_local_rank():
if "OMPI_COMM_WORLD_LOCAL_RANK" in os.environ:
return int(os.environ.get("OMPI_COMM_WORLD_LOCAL_RANK"))
elif "SLURM_LOCALID" in os.environ:
return int(os.environ.get("SLURM_LOCALID"))
else:
return 0
def MPI_rank():
return MPI_rank_and_size()[0]
def MPI_size():
return MPI_rank_and_size()[1]
def MPI_rank_and_size():
if "tensorflow" in sys.modules:
return mpi_env_MPI_rank_and_size()
else:
return 0, 1
# Source: https://github.com/horovod/horovod/blob/c3626e/test/common.py#L25
def mpi_env_MPI_rank_and_size():
"""Get MPI rank and size from environment variables and return them as a
tuple of integers.
Most MPI implementations have an `mpirun` or `mpiexec` command that will
run an MPI executable and set up all communication necessary between the
different processors. As part of that set up, they will set environment
variables that contain the rank and size of the MPI_COMM_WORLD
communicator. We can read those environment variables from Python in order
to ensure that `hvd.rank()` and `hvd.size()` return the expected values.
Since MPI is just a standard, not an implementation, implementations
typically choose their own environment variable names. This function tries
to support several different implementation, but really it only needs to
support whatever implementation we want to use for the TensorFlow test
suite.
If this is not running under MPI, then defaults of rank zero and size one
are returned. (This is appropriate because when you call MPI_Init in an
application not started with mpirun, it will create a new independent
communicator with only one process in it.)
Source: https://github.com/horovod/horovod/blob/c3626e/test/common.py#L25
"""
rank_env = 'PMI_RANK SLURM_PROCID OMPI_COMM_WORLD_RANK'.split()
size_env = 'PMI_SIZE SLURM_NTASKS OMPI_COMM_WORLD_SIZE'.split()
for rank_var, size_var in zip(rank_env, size_env):
rank = os.environ.get(rank_var)
size = os.environ.get(size_var)
if rank is not None and size is not None:
return int(rank), int(size)
# Default to rank zero and size one if there are no environment variables
return 0, 1
\ No newline at end of file
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2006-2011, NIPY Developers
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following
# disclaimer in the documentation and/or other materials provided
# with the distribution.
#
# * Neither the name of the NIPY Developers nor the names of any
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Source: https://github.com/nipy/nitime/blob/c8eb314/nitime/lazyimports.py
"""This module provides lazy import functionality to improve the import
performance of nitime. For example, some parts of nitime leverage and import
matplotlib, which is quite a big package, yet most of the nitime code does not
depend on matplotlib. By lazily-loading a module, we defer the overhead of
importing it until the first time it is actually used, thereby speeding up
nitime imports.
A generic :class:`LazyImport` class is implemented which takes the module name
as a parameter, and acts as a proxy for that module, importing it only when
the module is used, but effectively acting as the module in every other way
(including inside IPython with respect to introspection and tab completion)
with the *exception* of reload() - reloading a :class:`LazyImport` raises an
:class:`ImportError`.
Commonly used nitime lazy imports are also defined in :mod:`nitime.lazy`, so
they can be reused throughout nitime.
"""
import os
import sys
import types
class LazyImport(types.ModuleType):
"""
This class takes the module name as a parameter, and acts as a proxy for
that module, importing it only when the module is used, but effectively
acting as the module in every other way (including inside IPython with
respect to introspection and tab completion) with the *exception* of
reload()- reloading a :class:`LazyImport` raises an :class:`ImportError`.
>>> mlab = LazyImport('matplotlib.mlab')
No import happens on the above line, until we do something like call an
``mlab`` method or try to do tab completion or introspection on ``mlab``
in IPython.
>>> mlab
<module 'matplotlib.mlab' will be lazily loaded>
Now the :class:`LazyImport` will do an actual import, and call the dist
function of the imported module.
>>> mlab.dist(1969,2011)
42.0
"""
def __getattribute__(self, x):
# This method will be called only once, since we'll change
# self.__class__ to LoadedLazyImport, and __getattribute__ will point
# to module.__getattribute__
name = object.__getattribute__(self, '__name__')
__import__(name)
# if name above is 'package.foo.bar', package is returned, the docs
# recommend that in order to get back the full thing, that we import
# and then lookup the full name is sys.modules, see:
# http://docs.python.org/library/functions.html#__import__
module = sys.modules[name]
# Now that we've done the import, cutout the middleman and make self
# act as the imported module
class LoadedLazyImport(types.ModuleType):
__getattribute__ = module.__getattribute__
__repr__ = module.__repr__
object.__setattr__(self, '__class__', LoadedLazyImport)
# The next line will make "reload(l)" a silent no-op
return module.__getattribute__(x)
def __repr__(self):
return "<module '%s' will be lazily loaded>" % object.__getattribute__(self, '__name__')
if 'READTHEDOCS' in os.environ:
lazy_doc = """
WARNING: To get Sphinx documentation to build we disable
LazyImports, which makes Sphinx incorrectly report this
class as having a base class of object. In reality,
:class:`LazyImport`'s base class is
:class:`types.ModuleType`.
"""
lazy_doc += LazyImport.__doc__
class LazyImport(object):
__doc__ = lazy_doc
def __init__(self, x):
__import__(x)
self.module = sys.modules[x]
def __getattr__(self, x):
return self.module.__getattribute__(x)
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
import inspect
import operator
import six
import subprocess
import time
from enum import Enum
from mask_rcnn.utils.logging_formatter import logging
from mask_rcnn.utils.decorators import atexit_hook
from mask_rcnn.utils.metaclasses import SingletonMetaClass
from mask_rcnn.utils.meters import ACCEPTED_INT_NUMBER_FORMATS
from mask_rcnn.utils.meters import ACCEPTED_FLOAT_NUMBER_FORMATS
import dllogger
from dllogger import Verbosity
__all__ = ["LoggingBackend", "LoggingScope", "DistributedStrategy", "RuntimeMode"]
class _BaseEnum(Enum):
@classmethod
def __values__(cls):
return [getattr(cls, m.name) for m in cls]
class LoggingScope(_BaseEnum):
ITER = 'Iteration'
EPOCH = 'AllReduce'
class DistributedStrategy(_BaseEnum):
REDUCE_SUM = 'AllGather'
REDUCE_MEAN = 'AllReduce'
NONE = None
class RuntimeMode(_BaseEnum):
TRAIN = 'train'
INFERENCE = 'inference'
VALIDATION = 'validation'
TEST = 'test'
def validate_runtime_mode(requested_mode):
cls_attributes = inspect.getmembers(RuntimeMode, lambda a: not (inspect.isroutine(a)))
authorized_modes = [a for a in cls_attributes if not (a[0].startswith('__') and a[0].endswith('__'))]
for _, mode in authorized_modes:
if mode == requested_mode:
return
else:
raise ValueError(
"Unknown requested mode: `%s` - Authorized: %s" % (requested_mode, [name for name, _ in authorized_modes])
)
@atexit_hook
@six.add_metaclass(SingletonMetaClass)
class LoggingBackend(object):
SEP_TARGET_LENGTH = 50
# ================= Logging Methods ================= #
LOGGING_PREFIX = ""
def __init__(self):
# super(LoggingBackend, self).__init__()
self.runtime_initialized = {"train": False, "evaluation": False}
# ================= Constructor/Destructor Methods ================= #
def __atexit__(self):
is_success = not (hasattr(sys, "last_traceback") and sys.last_traceback is not None)
print() # Visual spacing
if is_success:
self.log_info("Job finished with status: `SUCCESS`")
else:
logging.error("Job finished with an uncaught exception: `FAILURE`")
def log_debug(self, message):
logging.debug("%s%s" % (self.LOGGING_PREFIX, message))
def log_info(self, message):
logging.info("%s%s" % (self.LOGGING_PREFIX, message))
def log_warning(self, message):
logging.warning("%s%s" % (self.LOGGING_PREFIX, message))
def log_error(self, message):
logging.error("%s%s" % (self.LOGGING_PREFIX, message))
def log_critical(self, message):
logging.critical("%s%s" % (self.LOGGING_PREFIX, message))
# ================= Automated Logging Methods ================= #
@staticmethod
def format_metric_value(value):
if isinstance(value, ACCEPTED_FLOAT_NUMBER_FORMATS):
if value < 1e-4 or value > 1e4:
print_value = "%.4e" % value
else:
print_value = "{}".format(round(value, 5))
elif isinstance(value, ACCEPTED_INT_NUMBER_FORMATS):
print_value = "%d" % value
else:
print_value = value
return print_value
# ================= Runtime Logging Method ================= #
def log_runtime(self, is_train=False):
if is_train:
if not self.runtime_initialized["train"]:
self.runtime_initialized["train"] = True
_message = " Start Training "
else:
_message = " Restart Training "
else:
if not self.runtime_initialized["evaluation"]:
self.runtime_initialized["evaluation"] = True
_message = " Start Evaluation "
else:
_message = " Restart Evaluation "
print() # Visual Spacing
self.log_info("# ============================================= #")
self.log_info(_message)
self.log_info("# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #")
print() # Visual Spacing
# ================= Automated Logging Methods ================= #
def log_git_status(self):
git_metadata = dict()
def get_cmd_result(cmd):
return subprocess.check_output(cmd, shell=True).decode("utf-8").strip()
try:
git_metadata["branch_name"] = get_cmd_result("git symbolic-ref -q HEAD | cut -d/ -f3-") # current branch
git_metadata["commit_id"] = get_cmd_result("git rev-parse HEAD") # current commit ID
git_metadata["remote_url"] = get_cmd_result("git remote get-url origin") # git origin url
if git_metadata["branch_name"] == "":
del git_metadata["branch_name"]
except subprocess.CalledProcessError: # Not a git repository
pass
if git_metadata is None:
raise ValueError("`git_metadata` value received is `None`")
self.log_info("===================================== GIT REPOSITORY =====================================")
for key, value in sorted(git_metadata.items(), key=operator.itemgetter(0)):
self.log_info("%s: %s" % (key.replace("_", " ").upper(), value))
self.log_info("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n")
def log_model_statistics(self, model_statistics=None):
if model_statistics is None:
raise ValueError("`model_statistics` value received is `None`")
if not isinstance(model_statistics, dict):
raise ValueError("`model_statistics` should be a `dict`")
self.log_info("==================================== MODEL STATISTICS ====================================")
for key, value in sorted(model_statistics.items(), key=operator.itemgetter(0)):
self.log_info("%s: %s" % (key, value))
self.log_info("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n")
def log_trainable_variables(self, var_list=None):
if var_list is None:
raise ValueError("`var_list` value received is `None`")
self.log_info("=================================== TRAINABLE VARIABLES ==================================")
for idx, (var_name, var_shape) in enumerate(var_list):
self.log_info(
"[#{idx:04d}] {name:<60s} => {shape}".format(idx=idx + 1, name=var_name, shape=str(var_shape))
)
self.log_info("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n")
# ================= Step Logging Method ================= #
def log_step(self, iteration, throughput, gpu_stats):
# print() # Visual Spacing
self.log_info("timestamp: %s" % time.time())
self.log_info("iteration: %d" % int(iteration))
if throughput is not None:
self.log_info("throughput: %.1f samples/sec" % float(throughput))
else:
self.log_info("throughput: None")
def log_amp_runtime(self, current_loss_scale, steps_non_skipped, steps_since_last_scale):
header_name = " AMP Statistics "
reference_len = int((LoggingBackend.SEP_TARGET_LENGTH - len(header_name)) / 2)
if current_loss_scale is not None or steps_since_last_scale is not None:
self.log_info(
"%s%s%s" % (
"=" * reference_len, header_name, "=" *
(LoggingBackend.SEP_TARGET_LENGTH - len(header_name) - reference_len)
)
)
self.log_info("Steps - Non Skipped: %s" % steps_non_skipped)
if steps_since_last_scale is not None:
self.log_info("Steps - Since last loss scale: %s" % steps_since_last_scale)
if current_loss_scale is not None:
self.log_info("Loss Scale: %s" % current_loss_scale)
# ================= Metric Logging Methods ================= #
def log_metrics(self, metric_data, iteration, runtime_mode):
validate_runtime_mode(runtime_mode)
if not isinstance(metric_data, dict):
raise ValueError("`metric_data` should be a dictionary. Received: %s" % type(metric_data))
if not isinstance(iteration, ACCEPTED_INT_NUMBER_FORMATS):
raise ValueError("`iteration` should be an integer. Received: %s" % type(iteration))
header_name = " Metrics "
reference_len = int((LoggingBackend.SEP_TARGET_LENGTH - len(header_name)) / 2)
self.log_info(
"%s%s%s" % (
"=" * reference_len, header_name, "=" *
(LoggingBackend.SEP_TARGET_LENGTH - len(header_name) - reference_len)
)
)
for key, value in sorted(metric_data.items(), key=operator.itemgetter(0)):
print_value = LoggingBackend.format_metric_value(value)
self.log_info("%s: %s" % (key, print_value))
def log_final_metrics(self, metric_data, runtime_mode):
validate_runtime_mode(runtime_mode)
for key, value in sorted(metric_data.items(), key=operator.itemgetter(0)):
print_value = LoggingBackend.format_metric_value(value)
self.log_info("%s: %s" % (key, print_value))
dllogger.log(step=(), data=metric_data, verbosity=Verbosity.DEFAULT)
# ================= Summary Logging Method ================= #
def log_summary(self, is_train, total_steps, total_processing_time, avg_throughput):
if is_train:
_message = " Training Performance Summary "
else:
_message = " Evaluation Performance Summary "
print() # Visual Spacing
self.log_info("# @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ #")
self.log_info(_message)
self.log_info("# @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ #")
total_processing_hours, rem = divmod(total_processing_time, 3600)
total_processing_minutes, total_processing_seconds = divmod(rem, 60)
print() # Visual Spacing
total_processing_time = total_processing_hours * 3600 + int(total_processing_minutes) * 60 + int(total_processing_seconds)
dllogger.log(step=(), data={
"Average_throughput": avg_throughput,
"Total processed steps": int(total_steps),
"Total_processing_time": total_processing_time }, verbosity=Verbosity.DEFAULT)
self.log_info("Average throughput: {throughput:.1f} samples/sec".format(throughput=avg_throughput))
self.log_info("Total processed steps: {total_steps}".format(total_steps=total_steps))
self.log_info(
"Total processing time: {hours}h {minutes:02d}m {seconds:02d}s".format(
hours=total_processing_hours,
minutes=int(total_processing_minutes),
seconds=int(total_processing_seconds)
)
)
self.log_info("==================== Metrics ====================")
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
import inspect
from contextlib import contextmanager
from six import add_metaclass
import threading
import logging as _logging
import warnings
from mask_rcnn.utils.distributed_utils import MPI_rank_and_size
from mask_rcnn.utils.metaclasses import SingletonMetaClass
__all__ = [
"logging",
"log_cleaning"
]
MODEL_NAME = "MaskRCNN"
class StdOutFormatter(_logging.Formatter):
"""
Log formatter used in Tornado. Key features of this formatter are:
* Color support when logging to a terminal that supports it.
* Timestamps on every log line.
* Robust against str/bytes encoding problems.
"""
DEFAULT_FORMAT = '%(color)s[{model_name}] %(levelname)-8s: %(end_color)s%(message)s'.format(
model_name=MODEL_NAME
)
DEFAULT_DATE_FORMAT = '%Y-%m-%d %H:%M:%S'
def __init__(self, fmt=None, datefmt=None, style='%'):
r"""
:arg bool color: Enables color support.
:arg string fmt: Log message format.
It will be applied to the attributes dict of log records. The
text between ``%(color)s`` and ``%(end_color)s`` will be colored
depending on the level if color support is on.
:arg dict colors: color mappings from logging level to terminal color
code
:arg string datefmt: Datetime format.
Used for formatting ``(asctime)`` placeholder in ``prefix_fmt``.
.. versionchanged:: 3.2
Added ``fmt`` and ``datefmt`` arguments.
"""
if fmt is None:
fmt = self.DEFAULT_FORMAT
if datefmt is None:
datefmt = self.DEFAULT_DATE_FORMAT
# _logging.Formatter.__init__(self, datefmt=datefmt)
super(StdOutFormatter, self).__init__(fmt=fmt, datefmt=datefmt, style=style)
self._fmt = fmt
self._colors = {}
self._normal = ''
def format(self, record):
try:
message = record.getMessage()
assert isinstance(message, str) # guaranteed by logging
# Encoding notes: The logging module prefers to work with character
# strings, but only enforces that log messages are instances of
# basestring. In python 2, non-ascii bytestrings will make
# their way through the logging framework until they blow up with
# an unhelpful decoding error (with this formatter it happens
# when we attach the prefix, but there are other opportunities for
# exceptions further along in the framework).
#
# If a byte string makes it this far, convert it to unicode to
# ensure it will make it out to the logs. Use repr() as a fallback
# to ensure that all byte strings can be converted successfully,
# but don't do it by default so we don't add extra quotes to ascii
# bytestrings. This is a bit of a hacky place to do this, but
# it's worth it since the encoding errors that would otherwise
# result are so useless (and tornado is fond of using utf8-encoded
# byte strings wherever possible).
record.message = self.to_unicode(message)
except Exception as e:
record.message = "Bad message (%r): %r" % (e, record.__dict__)
record.asctime = self.formatTime(record, self.datefmt)
if record.levelno in self._colors:
record.color = self._colors[record.levelno]
record.end_color = self._normal
else:
record.color = record.end_color = ''
formatted = self._fmt % record.__dict__
if record.exc_info:
if not record.exc_text:
record.exc_text = self.formatException(record.exc_info)
if record.exc_text:
# exc_text contains multiple lines. We need to _safe_unicode
# each line separately so that non-utf8 bytes don't cause
# all the newlines to turn into '\n'.
lines = [formatted.rstrip()]
lines.extend(self.to_unicode(ln) for ln in record.exc_text.split('\n'))
formatted = '\n'.join(lines)
return formatted.replace("\n", "\n ")
@staticmethod
def to_unicode(value):
"""
Converts a string argument to a unicode string.
If the argument is already a unicode string or None, it is returned
unchanged. Otherwise it must be a byte string and is decoded as utf8.
"""
try:
if isinstance(value, (str, type(None))):
return value
if not isinstance(value, bytes):
raise TypeError("Expected bytes, unicode, or None; got %r" % type(value))
return value.decode("utf-8")
except UnicodeDecodeError:
return repr(value)
@add_metaclass(SingletonMetaClass)
class _Logger(object):
# Level 0
NOTSET = _logging.NOTSET
# Level 10
DEBUG = _logging.DEBUG
# Level 20
INFO = _logging.INFO
# Level 30
WARNING = _logging.WARNING
# Level 40
ERROR = _logging.ERROR
# Level 50
CRITICAL = _logging.CRITICAL
_level_names = {
0: 'NOTSET',
10: 'DEBUG',
20: 'INFO',
30: 'WARNING',
40: 'ERROR',
50: 'CRITICAL',
}
def __init__(self, capture_io=True):
self._logger = None
self._logger_lock = threading.Lock()
self._handlers = dict()
self.old_warnings_showwarning = None
if MPI_rank_and_size()[0] == 0:
self._define_logger()
def _define_logger(self):
# Use double-checked locking to avoid taking lock unnecessarily.
if self._logger is not None:
return self._logger
with self._logger_lock:
try:
# Scope the TensorFlow logger to not conflict with users' loggers.
self._logger = _logging.getLogger(MODEL_NAME)
self.reset_stream_handler()
finally:
self.set_verbosity(verbosity_level=_Logger.INFO)
self._logger.propagate = False
def reset_stream_handler(self):
if self._logger is None:
raise RuntimeError("Impossible to set handlers if the Logger is not predefined")
# ======== Remove Handler if already existing ========
try:
self._logger.removeHandler(self._handlers["stream_stdout"])
except KeyError:
pass
try:
self._logger.removeHandler(self._handlers["stream_stderr"])
except KeyError:
pass
# ================= Streaming Handler =================
# Add the output handler.
self._handlers["stream_stdout"] = _logging.StreamHandler(sys.stdout)
self._handlers["stream_stdout"].addFilter(lambda record: record.levelno <= _logging.INFO)
self._handlers["stream_stderr"] = _logging.StreamHandler(sys.stderr)
self._handlers["stream_stderr"].addFilter(lambda record: record.levelno > _logging.INFO)
Formatter = StdOutFormatter
self._handlers["stream_stdout"].setFormatter(Formatter())
self._logger.addHandler(self._handlers["stream_stdout"])
try:
self._handlers["stream_stderr"].setFormatter(Formatter())
self._logger.addHandler(self._handlers["stream_stderr"])
except KeyError:
pass
def get_verbosity(self):
"""Return how much logging output will be produced."""
if self._logger is not None:
return self._logger.getEffectiveLevel()
def set_verbosity(self, verbosity_level):
"""Sets the threshold for what messages will be logged."""
if self._logger is not None:
self._logger.setLevel(verbosity_level)
for handler in self._logger.handlers:
handler.setLevel(verbosity_level)
@contextmanager
def temp_verbosity(self, verbosity_level):
"""Sets the a temporary threshold for what messages will be logged."""
if self._logger is not None:
old_verbosity = self.get_verbosity()
try:
self.set_verbosity(verbosity_level)
yield
finally:
self.set_verbosity(old_verbosity)
else:
try:
yield
finally:
pass
def captureWarnings(self, capture):
"""
If capture is true, redirect all warnings to the logging package.
If capture is False, ensure that warnings are not redirected to logging
but to their original destinations.
"""
if self._logger is not None:
if capture and self.old_warnings_showwarning is None:
self.old_warnings_showwarning = warnings.showwarning # Backup Method
warnings.showwarning = self._showwarning
elif not capture and self.old_warnings_showwarning is not None:
warnings.showwarning = self.old_warnings_showwarning # Restore Method
self.old_warnings_showwarning = None
def _showwarning(self, message, category, filename, lineno, file=None, line=None):
"""
Implementation of showwarnings which redirects to logging.
It will call warnings.formatwarning and will log the resulting string
with level logging.WARNING.
"""
s = warnings.formatwarning(message, category, filename, lineno, line)
self.warning("%s", s)
def debug(self, msg, *args, **kwargs):
"""
Log 'msg % args' with severity 'DEBUG'.
To pass exception information, use the keyword argument exc_info with
a true value, e.g.
logger.debug("Houston, we have a %s", "thorny problem", exc_info=1)
"""
if self._logger is not None and self._logger.isEnabledFor(_Logger.DEBUG):
self._logger._log(_Logger.DEBUG, msg, args, **kwargs)
def info(self, msg, *args, **kwargs):
"""
Log 'msg % args' with severity 'INFO'.
To pass exception information, use the keyword argument exc_info with
a true value, e.g.
logger.info("Houston, we have a %s", "interesting problem", exc_info=1)
"""
if self._logger is not None and self._logger.isEnabledFor(_Logger.INFO):
self._logger._log(_Logger.INFO, msg, args, **kwargs)
def warning(self, msg, *args, **kwargs):
"""
Log 'msg % args' with severity 'WARNING'.
To pass exception information, use the keyword argument exc_info with
a true value, e.g.
logger.warning("Houston, we have a %s", "bit of a problem", exc_info=1)
"""
if self._logger is not None and self._logger.isEnabledFor(_Logger.WARNING):
self._logger._log(_Logger.WARNING, msg, args, **kwargs)
def error(self, msg, *args, **kwargs):
"""
Log 'msg % args' with severity 'ERROR'.
To pass exception information, use the keyword argument exc_info with
a true value, e.g.
logger.error("Houston, we have a %s", "major problem", exc_info=1)
"""
if self._logger is not None and self._logger.isEnabledFor(_Logger.ERROR):
self._logger._log(_Logger.ERROR, msg, args, **kwargs)
def critical(self, msg, *args, **kwargs):
"""
Log 'msg % args' with severity 'CRITICAL'.
To pass exception information, use the keyword argument exc_info with
a true value, e.g.
logger.critical("Houston, we have a %s", "major disaster", exc_info=1)
"""
if self._logger is not None and self._logger.isEnabledFor(_Logger.CRITICAL):
self._logger._log(_Logger.CRITICAL, msg, args, **kwargs)
def log_cleaning(hide_deprecation_warnings=False):
if hide_deprecation_warnings:
warnings.simplefilter("ignore")
from tensorflow.python.util import deprecation
from tensorflow.python.util import deprecation_wrapper
deprecation._PRINT_DEPRECATION_WARNINGS = False
deprecation_wrapper._PER_MODULE_WARNING_LIMIT = 0
formatter = _logging.Formatter('[%(levelname)s] %(message)s')
from tensorflow.python.platform import tf_logging
tf_logging.get_logger().propagate = False
_logging.getLogger().propagate = False
for handler in _logging.getLogger().handlers:
handler.setFormatter(formatter)
# Necessary to catch the correct caller
_logging._srcfile = os.path.normcase(inspect.getfile(_Logger.__class__))
logging = _Logger()
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
__all__ = [
"SingletonMetaClass",
]
class SingletonMetaClass(type):
_instances = {}
def __call__(cls, *args, **kwargs):
if cls not in cls._instances:
cls._instances[cls] = super(SingletonMetaClass, cls).__call__(*args, **kwargs)
return cls._instances[cls]
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from abc import ABCMeta
from abc import abstractmethod
import six
import collections
from functools import lru_cache
import numpy as np
__all__ = ["MetricMeter", "StandardMeter", "AverageMeter", "MovingAverageMeter", "MemoryLessMovingAverageMeter"]
# Supported Numpy DTypes: `np.sctypes`
ACCEPTED_INT_NUMBER_FORMATS = (
int,
np.uint8,
np.uint16,
np.uint32,
np.uint64,
np.int,
np.int8,
np.int16,
np.int32,
np.int64,
)
ACCEPTED_FLOAT_NUMBER_FORMATS = (
float,
np.float,
np.float16,
np.float32,
np.float64,
np.float128,
)
ACCEPTED_STR_NUMBER_FORMATS = (
str,
np.str,
)
ACCEPTED_NUMBER_FORMATS = \
ACCEPTED_INT_NUMBER_FORMATS + \
ACCEPTED_FLOAT_NUMBER_FORMATS + \
ACCEPTED_STR_NUMBER_FORMATS
@six.add_metaclass(ABCMeta)
class AbstractMeterMixin(object):
@abstractmethod
def AUTHORIZED_DTYPES(self):
pass
@six.add_metaclass(ABCMeta)
class MetricMeter(AbstractMeterMixin):
# Supported Numpy DTypes: `np.sctypes`
AUTHORIZED_DTYPES = tuple(ACCEPTED_NUMBER_FORMATS)
@lru_cache(maxsize=128)
def __init__(self):
self._values = np.array([])
def reset(self):
self._values = np.array([])
@lru_cache(maxsize=128)
def __str__(self):
return self.__class__.__name__
def get_last(self):
try:
return self._values[-1]
except IndexError:
raise ValueError("Impossible to get the last value. No value has been recorded yet")
def record(self, val):
if not isinstance(val, MetricMeter.AUTHORIZED_DTYPES):
raise TypeError("Unsupported datatype received: %s" % str(type(val)))
if np.isnan(val) or np.isinf(val):
raise ValueError("invalid value received: %s" % str(val))
self._values = np.append(self._values, val)
@abstractmethod
def read(self):
raise NotImplementedError()
class StandardMeter(MetricMeter):
def read(self):
return self.get_last()
class AverageMeter(MetricMeter):
def read(self):
if len(self._values):
return np.mean(self._values)
else:
raise ValueError("NaN Result, Impossible to compute the average of an empty list")
class MovingAverageMeter(MetricMeter):
def __init__(self, window_size):
super(MovingAverageMeter, self).__init__()
if not isinstance(window_size, int):
raise ValueError("`window_size` must be an integer")
if window_size < 1:
raise ValueError("`window_size` must be superior or equal to 1")
self._window_size = window_size
@lru_cache(maxsize=128)
def __str__(self):
return "%s(window_size=%d)" % (super(MovingAverageMeter, self).__str__(), self._window_size)
def read(self):
if len(self._values):
return np.mean(self._values[-self._window_size:])
else:
raise ValueError("NaN Result, Impossible to compute the moving average of an empty list")
class MemoryLessMovingAverageMeter(MetricMeter):
def __init__(self, window_size):
super(MemoryLessMovingAverageMeter, self).__init__()
self._values = collections.deque(maxlen=window_size)
if not isinstance(window_size, int):
raise ValueError("`window_size` must be an integer")
if window_size < 1:
raise ValueError("`window_size` must be superior or equal to 1")
self._window_size = window_size
def reset(self):
self._values = collections.deque(maxlen=self._window_size)
@lru_cache(maxsize=128)
def __str__(self):
return "%s(window_size=%d)" % (super(MemoryLessMovingAverageMeter, self).__str__(), self._window_size)
def read(self):
if len(self._values):
return np.mean(self._values)
else:
raise ValueError("NaN Result, Impossible to compute the moving average of an empty list")
\ No newline at end of file
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import weakref
from mask_rcnn.utils.logging_backend import DistributedStrategy
from mask_rcnn.utils.logging_backend import LoggingScope
from mask_rcnn.utils.logging_formatter import logging
from mask_rcnn.utils import meters
__all__ = ["TF_METRICS", "KERAS_MODELS", "KERAS_OPTIMIZERS", "register_metric", "clear_registered_metrics"]
class WeakRefList(object):
def __init__(self):
self._items = list()
def _clean_iternal_list(self):
self._items = [s for s in self._items if s() is not None]
def __iter__(self):
self._clean_iternal_list()
for obj in self._items:
if obj() is None:
continue
yield obj()
def __len__(self):
self._clean_iternal_list()
return len(self._items)
def clear(self):
self._items.clear()
def append(self, new_item):
self._items.append(weakref.ref(new_item))
self._clean_iternal_list()
TF_METRICS = dict()
KERAS_MODELS = WeakRefList()
KERAS_OPTIMIZERS = WeakRefList()
def register_metric(
name,
tensor,
aggregator=meters.StandardMeter(),
metric_scope=LoggingScope.ITER,
distributed_strategy=DistributedStrategy.NONE
):
if name in TF_METRICS.keys():
raise ValueError("A metric with the name `%s` has already been registered" % name)
if not issubclass(aggregator.__class__, meters.AbstractMeterMixin):
raise ValueError("Unknown `aggregator` received: %s" % aggregator.__class__.__name__)
if metric_scope not in LoggingScope.__values__():
raise ValueError(
"Unknown `metric_scope` received: %s, authorized: %s" %
(metric_scope, LoggingScope.__values__())
)
if distributed_strategy not in DistributedStrategy.__values__():
raise ValueError(
"Unknown `distributed_strategy` received: %s, authorized: %s" %
(distributed_strategy, DistributedStrategy.__values__())
)
TF_METRICS[name] = {
"tensor": tensor,
"aggregator": aggregator,
"distributed_strategy": distributed_strategy,
"scope": metric_scope,
}
logging.debug(
"New Metric Registered: `{metric_name}`, Aggregator: {aggregator}, "
"Scope: {scope}, Distributed Strategy: {distributed_strategy}".format(
metric_name=name, aggregator=str(aggregator), distributed_strategy=distributed_strategy, scope=metric_scope
)
)
def clear_registered_metrics():
TF_METRICS.clear()
logging.debug("All registered metrics have been cleared")
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Training script for Mask-RCNN."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # or any {'0', '1', '2'}
os.environ["TF_CPP_VMODULE"] = 'non_max_suppression_op=0,generate_box_proposals_op=0,executor=0'
# os.environ["TF_XLA_FLAGS"] = 'tf_xla_print_cluster_outputs=1'
from absl import app
import tensorflow as tf
from tensorflow.python.framework.ops import disable_eager_execution
from mask_rcnn.utils.logging_formatter import logging
from mask_rcnn.utils.distributed_utils import MPI_is_distributed
from mask_rcnn import dataloader
from mask_rcnn import distributed_executer
from mask_rcnn import mask_rcnn_model
from mask_rcnn.hyperparameters import mask_rcnn_params
from mask_rcnn.hyperparameters import params_io
from mask_rcnn.hyperparameters.cmdline_utils import define_hparams_flags
from mask_rcnn.utils.logging_formatter import log_cleaning
import dllogger
FLAGS = define_hparams_flags()
def run_executer(runtime_config, train_input_fn=None, eval_input_fn=None):
"""Runs Mask RCNN model on distribution strategy defined by the user."""
if runtime_config.use_tf_distributed:
executer = distributed_executer.TFDistributedExecuter(runtime_config, mask_rcnn_model.mask_rcnn_model_fn)
else:
executer = distributed_executer.EstimatorExecuter(runtime_config, mask_rcnn_model.mask_rcnn_model_fn)
if runtime_config.mode == 'train':
executer.train(
train_input_fn=train_input_fn,
run_eval_after_train=FLAGS.eval_after_training,
eval_input_fn=eval_input_fn
)
elif runtime_config.mode == 'eval':
executer.eval(eval_input_fn=eval_input_fn)
elif runtime_config.mode == 'train_and_eval':
executer.train_and_eval(train_input_fn=train_input_fn, eval_input_fn=eval_input_fn)
else:
raise ValueError('Mode must be one of `train`, `eval`, or `train_and_eval`')
def main(argv):
del argv # Unused.
# ============================ Configure parameters ============================ #
RUN_CONFIG = mask_rcnn_params.default_config()
temp_config = FLAGS.flag_values_dict()
temp_config['learning_rate_decay_levels'] = [float(decay) for decay in temp_config['learning_rate_decay_levels']]
temp_config['learning_rate_levels'] = [
decay * temp_config['init_learning_rate'] for decay in temp_config['learning_rate_decay_levels']
]
temp_config['learning_rate_steps'] = [int(step) for step in temp_config['learning_rate_steps']]
RUN_CONFIG = params_io.override_hparams(RUN_CONFIG, temp_config)
# ============================ Configure parameters ============================ #
if RUN_CONFIG.use_tf_distributed and MPI_is_distributed():
raise RuntimeError("Incompatible Runtime. Impossible to use `--use_tf_distributed` with MPIRun Horovod")
if RUN_CONFIG.mode in ('train', 'train_and_eval') and not RUN_CONFIG.eval_samples:
raise RuntimeError('You must specify `training_file_pattern` for training.')
if RUN_CONFIG.mode in ('eval', 'train_and_eval'):
if not RUN_CONFIG.validation_file_pattern:
raise RuntimeError('You must specify `validation_file_pattern` for evaluation.')
if RUN_CONFIG.val_json_file == "" and not RUN_CONFIG.include_groundtruth_in_features:
raise RuntimeError(
'You must specify `val_json_file` or include_groundtruth_in_features=True for evaluation.')
if not RUN_CONFIG.include_groundtruth_in_features and not os.path.isfile(RUN_CONFIG.val_json_file):
raise FileNotFoundError("Validation JSON File not found: %s" % RUN_CONFIG.val_json_file)
dllogger.init(backends=[dllogger.JSONStreamBackend(verbosity=dllogger.Verbosity.VERBOSE,
filename=RUN_CONFIG.log_path)])
if RUN_CONFIG.mode in ('train', 'train_and_eval'):
train_input_fn = dataloader.InputReader(
file_pattern=RUN_CONFIG.training_file_pattern,
mode=tf.estimator.ModeKeys.TRAIN,
num_examples=None,
use_fake_data=RUN_CONFIG.use_fake_data,
use_instance_mask=RUN_CONFIG.include_mask,
seed=RUN_CONFIG.seed
)
else:
train_input_fn = None
if RUN_CONFIG.mode in ('eval', 'train_and_eval' or (RUN_CONFIG.mode == 'train' and RUN_CONFIG.eval_after_training)):
eval_input_fn = dataloader.InputReader(
file_pattern=RUN_CONFIG.validation_file_pattern,
mode=tf.estimator.ModeKeys.PREDICT,
num_examples=RUN_CONFIG.eval_samples,
use_fake_data=False,
use_instance_mask=RUN_CONFIG.include_mask,
seed=RUN_CONFIG.seed
)
else:
eval_input_fn = None
run_executer(RUN_CONFIG, train_input_fn, eval_input_fn)
if __name__ == '__main__':
logging.set_verbosity(logging.INFO)
disable_eager_execution()
logging.set_verbosity(logging.DEBUG)
tf.autograph.set_verbosity(0)
log_cleaning(hide_deprecation_warnings=True)
app.run(main)
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" Scripts that simplifies running evaluation benchmark """
import argparse
import os
import shutil
import subprocess
def main():
# CLI flags
parser = argparse.ArgumentParser(description="MaskRCNN evaluation benchmark")
parser.add_argument('--batch_size', type=int, required=True)
parser.add_argument('--amp', action='store_true')
parser.add_argument('--data_dir', type=str, default='/data')
parser.add_argument('--model_dir', type=str, default='/tmp/model')
parser.add_argument('--weights_dir', type=str, default='/model')
flags = parser.parse_args()
main_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../mask_rcnn_main.py'))
# build command
cmd = (
f'python {main_path}'
f' --mode eval'
f' --model_dir "{flags.model_dir}"'
f' --checkpoint "{os.path.join(flags.weights_dir, "resnet/resnet-nhwc-2018-02-07/model.ckpt-112603")}"'
f' --validation_file_pattern "{os.path.join(flags.data_dir, "val*.tfrecord")}"'
f' --val_json_file "{os.path.join(flags.data_dir, "annotations/instances_val2017.json")}"'
f' --num_steps_per_eval 200'
f' --eval_samples 1200'
f' --use_batched_nms'
f' --nouse_custom_box_proposals_op'
f' --xla'
f' --eval_batch_size {flags.batch_size}'
f' {"--amp" if flags.amp else ""}'
)
# print command
line = '-' * shutil.get_terminal_size()[0]
print(line, cmd, line, sep='\n')
# run model
subprocess.call(cmd, shell=True)
if __name__ == '__main__':
main()
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" Scripts that simplifies running training benchmark """
import argparse
import os
import shutil
import subprocess
def main():
# CLI flags
parser = argparse.ArgumentParser(description="MaskRCNN train benchmark")
parser.add_argument('--gpus', type=int, required=True)
parser.add_argument('--batch_size', type=int, required=True)
parser.add_argument('--amp', action='store_true')
parser.add_argument('--data_dir', type=str, default='/data')
parser.add_argument('--model_dir', type=str, default='/tmp/model')
parser.add_argument('--weights_dir', type=str, default='/model')
flags = parser.parse_args()
main_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../mask_rcnn_main.py'))
# build command
cmd = (
f'horovodrun -np {flags.gpus} '
f'python {main_path}'
f' --mode train'
f' --model_dir "{flags.model_dir}"'
f' --checkpoint "{os.path.join(flags.weights_dir, "resnet/resnet-nhwc-2018-02-07/model.ckpt-112603")}"'
f' --training_file_pattern "{os.path.join(flags.data_dir, "train*.tfrecord")}"'
f' --init_learning_rate 0.04'
f' --total_steps 200'
f' --use_batched_nms'
f' --noeval_after_training'
f' --nouse_custom_box_proposals_op'
f' --xla'
f' --train_batch_size {flags.batch_size}'
f' {"--amp" if flags.amp else ""}'
)
# print command
line = '-' * shutil.get_terminal_size()[0]
print(line, cmd, line, sep='\n')
# run model
subprocess.call(cmd, shell=True)
if __name__ == '__main__':
main()
#!/bin/bash
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
CONTAINER_TF1x_BASE="nvcr.io/nvidia/tensorflow"
CONTAINER_TF1x_TAG="20.06-tf1-py3"
# ======================== Refresh base image ======================== #
docker pull "${CONTAINER_TF1x_BASE}:${CONTAINER_TF1x_TAG}"
# ========================== Build container ========================= #
echo -e "\n\nBuilding NVIDIA TF 1.x Container\n\n"
sleep 1
docker build -t joc_tensorflow_maskrcnn:tf1.x-py3 \
--build-arg BASE_CONTAINER="${CONTAINER_TF1x_BASE}" \
--build-arg IMG_TAG="${CONTAINER_TF1x_TAG}" \
--build-arg FROM_IMAGE_NAME="nvcr.io/nvidia/tensorflow:20.06-tf1-py3" \
.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment