Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
dcuai
dlexamples
Commits
c320b6ef
Commit
c320b6ef
authored
Apr 15, 2022
by
zhenyi
Browse files
tf2 detection
parent
0fc002df
Changes
195
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
3851 additions
and
0 deletions
+3851
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/ops/spatial_transform_ops.py
...Detection/MaskRCNN/mask_rcnn/ops/spatial_transform_ops.py
+348
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/ops/training_ops.py
...teVision/Detection/MaskRCNN/mask_rcnn/ops/training_ops.py
+356
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/training/__init__.py
...eVision/Detection/MaskRCNN/mask_rcnn/training/__init__.py
+0
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/training/learning_rates.py
...n/Detection/MaskRCNN/mask_rcnn/training/learning_rates.py
+55
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/training/losses.py
...uteVision/Detection/MaskRCNN/mask_rcnn/training/losses.py
+439
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/utils/__init__.py
...puteVision/Detection/MaskRCNN/mask_rcnn/utils/__init__.py
+0
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/utils/box_utils.py
...uteVision/Detection/MaskRCNN/mask_rcnn/utils/box_utils.py
+588
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/utils/coco_utils.py
...teVision/Detection/MaskRCNN/mask_rcnn/utils/coco_utils.py
+256
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/utils/decorators.py
...teVision/Detection/MaskRCNN/mask_rcnn/utils/decorators.py
+232
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/utils/distributed_utils.py
...n/Detection/MaskRCNN/mask_rcnn/utils/distributed_utils.py
+100
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/utils/lazy_imports.py
...Vision/Detection/MaskRCNN/mask_rcnn/utils/lazy_imports.py
+134
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/utils/logging_backend.py
...ion/Detection/MaskRCNN/mask_rcnn/utils/logging_backend.py
+323
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/utils/logging_formatter.py
...n/Detection/MaskRCNN/mask_rcnn/utils/logging_formatter.py
+398
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/utils/metaclasses.py
...eVision/Detection/MaskRCNN/mask_rcnn/utils/metaclasses.py
+32
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/utils/meters.py
...omputeVision/Detection/MaskRCNN/mask_rcnn/utils/meters.py
+178
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/utils/metric_tracking.py
...ion/Detection/MaskRCNN/mask_rcnn/utils/metric_tracking.py
+106
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn_main.py
...Flow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn_main.py
+151
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/scripts/benchmark_inference.py
...eVision/Detection/MaskRCNN/scripts/benchmark_inference.py
+60
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/scripts/benchmark_training.py
...teVision/Detection/MaskRCNN/scripts/benchmark_training.py
+62
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/scripts/docker/build_tf1.sh
...puteVision/Detection/MaskRCNN/scripts/docker/build_tf1.sh
+33
-0
No files found.
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/ops/spatial_transform_ops.py
0 → 100644
View file @
c320b6ef
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
tensorflow
as
tf
def
nearest_upsampling
(
data
,
scale
):
"""Nearest neighbor upsampling implementation.
Args:
data: A tensor with a shape of [batch, height_in, width_in, channels].
scale: An integer multiple to scale resolution of input data.
Returns:
data_up: A tensor with a shape of
[batch, height_in*scale, width_in*scale, channels]. Same dtype as input
data.
"""
with
tf
.
name_scope
(
'nearest_upsampling'
):
bs
,
h
,
w
,
c
=
tf
.
unstack
(
tf
.
shape
(
data
))
# Use reshape to quickly upsample the input.
# The nearest pixel is selected implicitly via broadcasting.
# data = tf.reshape(data, [bs, h, 1, w, 1, c]) * tf.ones([1, 1, scale, 1, scale, 1], dtype=data.dtype)
# Instead of broadcasting with a 6-d tensor, we're using stacking here
# for TfLite compatibity.
output
=
tf
.
stack
([
data
]
*
scale
,
axis
=
3
)
output
=
tf
.
stack
([
output
]
*
scale
,
axis
=
2
)
return
tf
.
reshape
(
output
,
[
bs
,
h
*
scale
,
w
*
scale
,
c
])
return
tf
.
reshape
(
data
,
[
bs
,
h
*
scale
,
w
*
scale
,
c
])
def
selective_crop_and_resize
(
features
,
boxes
,
box_levels
,
boundaries
,
output_size
=
7
,
is_gpu_inference
=
False
):
"""Crop and resize boxes on a set of feature maps.
Given multiple features maps indexed by different levels, and a set of boxes
where each box is mapped to a certain level, it selectively crops and resizes
boxes from the corresponding feature maps to generate the box features.
We follow the ROIAlign technique (see https://arxiv.org/pdf/1703.06870.pdf,
figure 3 for reference). Specifically, for each feature map, we select an
(output_size, output_size) set of pixels corresponding to the box location,
and then use bilinear interpolation to select the feature value for each
pixel.
For performance, we perform the gather and interpolation on all layers as a
single operation. This is op the multi-level features are first stacked and
gathered into [2*output_size, 2*output_size] feature points. Then bilinear
interpolation is performed on the gathered feature points to generate
[output_size, output_size] RoIAlign feature map.
Here is the step-by-step algorithm:
1. The multi-level features are gathered into a
[batch_size, num_boxes, output_size*2, output_size*2, num_filters]
Tensor. The Tensor contains four neighboring feature points for each
vertice in the output grid.
2. Compute the interpolation kernel of shape
[batch_size, num_boxes, output_size*2, output_size*2]. The last 2 axis
can be seen as stacking 2x2 interpolation kernels for all vertices in the
output grid.
3. Element-wise multiply the gathered features and interpolation kernel.
Then apply 2x2 average pooling to reduce spatial dimension to
output_size.
Args:
features: a 5-D tensor of shape
[batch_size, num_levels, max_height, max_width, num_filters] where
cropping and resizing are based.
boxes: a 3-D tensor of shape [batch_size, num_boxes, 4] encoding the
information of each box w.r.t. the corresponding feature map.
boxes[:, :, 0:2] are the grid position in (y, x) (float) of the top-left
corner of each box. boxes[:, :, 2:4] are the box sizes in (h, w) (float)
in terms of the number of pixels of the corresponding feature map size.
box_levels: a 3-D tensor of shape [batch_size, num_boxes, 1] representing
the 0-based corresponding feature level index of each box.
boundaries: a 3-D tensor of shape [batch_size, num_boxes, 2] representing
the boundary (in (y, x)) of the corresponding feature map for each box.
Any resampled grid points that go beyond the bounary will be clipped.
output_size: a scalar indicating the output crop size.
is_gpu_inference: whether to build the model for GPU inference.
Returns:
features_per_box: a 5-D tensor of shape
[batch_size, num_boxes, output_size, output_size, num_filters]
representing the cropped features.
"""
(
batch_size
,
num_levels
,
max_feature_height
,
max_feature_width
,
num_filters
)
=
features
.
get_shape
().
as_list
()
_
,
num_boxes
,
_
=
boxes
.
get_shape
().
as_list
()
# Compute the grid position w.r.t. the corresponding feature map.
box_grid_x
=
[]
box_grid_y
=
[]
for
i
in
range
(
output_size
):
box_grid_x
.
append
(
boxes
[:,
:,
1
:
2
]
+
(
i
+
0.5
)
*
boxes
[:,
:,
3
:
4
]
/
output_size
)
box_grid_y
.
append
(
boxes
[:,
:,
0
:
1
]
+
(
i
+
0.5
)
*
boxes
[:,
:,
2
:
3
]
/
output_size
)
box_grid_x
=
tf
.
concat
(
box_grid_x
,
axis
=-
1
)
box_grid_y
=
tf
.
concat
(
box_grid_y
,
axis
=-
1
)
# Compute indices for gather operation.
box_grid_y0
=
tf
.
floor
(
box_grid_y
)
box_grid_x0
=
tf
.
floor
(
box_grid_x
)
box_grid_x0
=
tf
.
maximum
(
0.
,
box_grid_x0
)
box_grid_y0
=
tf
.
maximum
(
0.
,
box_grid_y0
)
box_gridx0x1
=
tf
.
stack
([
tf
.
minimum
(
box_grid_x0
,
boundaries
[:,
:,
1
:
2
]),
tf
.
minimum
(
box_grid_x0
+
1
,
boundaries
[:,
:,
1
:
2
])
],
axis
=
3
)
box_gridy0y1
=
tf
.
stack
([
tf
.
minimum
(
box_grid_y0
,
boundaries
[:,
:,
0
:
1
]),
tf
.
minimum
(
box_grid_y0
+
1
,
boundaries
[:,
:,
0
:
1
])
],
axis
=
3
)
x_indices
=
tf
.
reshape
(
box_gridx0x1
,
[
batch_size
,
num_boxes
,
output_size
*
2
])
y_indices
=
tf
.
reshape
(
box_gridy0y1
,
[
batch_size
,
num_boxes
,
output_size
*
2
])
# If using GPU for inference, delay the cast until when Gather ops show up
# since GPU inference supports float point better.
# TODO(laigd): revisit this when newer versions of GPU libraries is released.
indices_dtype
=
tf
.
float32
if
is_gpu_inference
else
tf
.
int32
if
not
is_gpu_inference
:
x_indices
=
tf
.
cast
(
x_indices
,
tf
.
int32
)
y_indices
=
tf
.
cast
(
y_indices
,
tf
.
int32
)
height_dim_offset
=
max_feature_width
level_dim_offset
=
max_feature_height
*
height_dim_offset
batch_dim_offset
=
num_levels
*
level_dim_offset
batch_dim_indices
=
(
tf
.
reshape
(
tf
.
range
(
batch_size
,
dtype
=
indices_dtype
)
*
batch_dim_offset
,
[
batch_size
,
1
,
1
,
1
])
*
tf
.
ones
([
1
,
num_boxes
,
output_size
*
2
,
output_size
*
2
],
dtype
=
indices_dtype
)
)
box_level_indices
=
(
tf
.
reshape
(
box_levels
*
level_dim_offset
,
[
batch_size
,
num_boxes
,
1
,
1
])
*
tf
.
ones
([
1
,
1
,
output_size
*
2
,
output_size
*
2
],
dtype
=
indices_dtype
)
)
height_indices
=
(
tf
.
reshape
(
y_indices
*
height_dim_offset
,
[
batch_size
,
num_boxes
,
output_size
*
2
,
1
])
*
tf
.
ones
([
1
,
1
,
1
,
output_size
*
2
],
dtype
=
indices_dtype
)
)
width_indices
=
(
tf
.
reshape
(
x_indices
,
[
batch_size
,
num_boxes
,
1
,
output_size
*
2
])
*
tf
.
ones
([
1
,
1
,
output_size
*
2
,
1
],
dtype
=
indices_dtype
)
)
# TODO(hongjunchoi): Remove the need for temporary variables as
# temporary variables with
if
True
:
batch_dim_indices
=
tf
.
cast
(
batch_dim_indices
,
tf
.
float32
)
box_level_indices
=
tf
.
cast
(
box_level_indices
,
tf
.
float32
)
height_indices
=
tf
.
cast
(
height_indices
,
tf
.
float32
)
width_indices
=
tf
.
cast
(
width_indices
,
tf
.
float32
)
indices
=
tf
.
add_n
([
batch_dim_indices
,
box_level_indices
,
height_indices
,
width_indices
,
])
indices
=
tf
.
cast
(
indices
,
tf
.
int32
)
else
:
# TODO: Restore this API int32 dtype will be supported on GPUs.
indices
=
tf
.
add_n
([
batch_dim_indices
,
box_level_indices
,
height_indices
,
width_indices
,
])
if
batch_size
==
1
:
# Special handling for single batch input to make it friendly for GPU
# inference.
indices
=
tf
.
reshape
(
indices
,
[
1
,
-
1
])
if
is_gpu_inference
:
indices
=
tf
.
cast
(
indices
,
dtype
=
tf
.
int32
)
features
=
tf
.
reshape
(
features
,
[
1
,
-
1
,
num_filters
])
# Cast should happen at last since GPU has better support for floating point
# operations.
features_per_box
=
tf
.
gather
(
features
,
indices
,
axis
=
1
)
else
:
indices
=
tf
.
reshape
(
indices
,
[
-
1
])
if
is_gpu_inference
:
indices
=
tf
.
cast
(
indices
,
dtype
=
tf
.
int32
)
features
=
tf
.
reshape
(
features
,
[
-
1
,
num_filters
])
features_per_box
=
tf
.
gather
(
features
,
indices
)
features_per_box
=
tf
.
reshape
(
features_per_box
,
[
batch_size
,
num_boxes
,
output_size
*
2
,
output_size
*
2
,
num_filters
]
)
# The RoIAlign feature f can be computed by bilinear interpolation of four
# neighboring feature points f0, f1, f2, and f3.
# f(y, x) = [hy, ly] * [[f00, f01], * [hx, lx]^T
# [f10, f11]]
# f(y, x) = (hy*hx)f00 + (hy*lx)f01 + (ly*hx)f10 + (lx*ly)f11
# f(y, x) = w00*f00 + w01*f01 + w10*f10 + w11*f11
ly
=
box_grid_y
-
box_grid_y0
lx
=
box_grid_x
-
box_grid_x0
hy
=
1.0
-
ly
hx
=
1.0
-
lx
kernel_x
=
tf
.
reshape
(
tf
.
stack
([
hx
,
lx
],
axis
=
3
),
[
batch_size
,
num_boxes
,
1
,
output_size
*
2
])
kernel_y
=
tf
.
reshape
(
tf
.
stack
([
hy
,
ly
],
axis
=
3
),
[
batch_size
,
num_boxes
,
output_size
*
2
,
1
])
# Use implicit broadcast to generate the interpolation kernel. The
# multiplier `4` is for avg pooling.
interpolation_kernel
=
kernel_y
*
kernel_x
*
4
# Interpolate the gathered features with computed interpolation kernels.
features_per_box
*=
tf
.
cast
(
tf
.
expand_dims
(
interpolation_kernel
,
axis
=
4
),
dtype
=
features_per_box
.
dtype
)
features_per_box
=
tf
.
reshape
(
features_per_box
,
[
batch_size
*
num_boxes
,
output_size
*
2
,
output_size
*
2
,
num_filters
]
)
features_per_box
=
tf
.
nn
.
avg_pool2d
(
features_per_box
,
ksize
=
[
1
,
2
,
2
,
1
],
strides
=
[
1
,
2
,
2
,
1
],
padding
=
'VALID'
)
features_per_box
=
tf
.
reshape
(
features_per_box
,
[
batch_size
,
num_boxes
,
output_size
,
output_size
,
num_filters
])
return
features_per_box
def
multilevel_crop_and_resize
(
features
,
boxes
,
output_size
=
7
,
is_gpu_inference
=
False
):
"""Crop and resize on multilevel feature pyramid.
Generate the (output_size, output_size) set of pixels for each input box
by first locating the box into the correct feature level, and then cropping
and resizing it using the correspoding feature map of that level.
Args:
features: A dictionary with key as pyramid level and value as features. The
features are in shape of [batch_size, height_l, width_l, num_filters].
boxes: A 3-D Tensor of shape [batch_size, num_boxes, 4]. Each row represents
a box with [y1, x1, y2, x2] in un-normalized coordinates.
output_size: A scalar to indicate the output crop size.
is_gpu_inference: whether to build the model for GPU inference.
Returns:
A 5-D tensor representing feature crop of shape
[batch_size, num_boxes, output_size, output_size, num_filters].
"""
with
tf
.
name_scope
(
'multilevel_crop_and_resize'
):
levels
=
features
.
keys
()
min_level
=
min
(
levels
)
max_level
=
max
(
levels
)
_
,
max_feature_height
,
max_feature_width
,
_
=
(
features
[
min_level
].
get_shape
().
as_list
())
# Stack feature pyramid into a features_all of shape
# [batch_size, levels, height, width, num_filters].
features_all
=
[]
for
level
in
range
(
min_level
,
max_level
+
1
):
features_all
.
append
(
tf
.
image
.
pad_to_bounding_box
(
features
[
level
],
0
,
0
,
max_feature_height
,
max_feature_width
))
features_all
=
tf
.
stack
(
features_all
,
axis
=
1
)
# Assign boxes to the right level.
box_width
=
tf
.
squeeze
(
boxes
[:,
:,
3
:
4
]
-
boxes
[:,
:,
1
:
2
],
axis
=-
1
)
box_height
=
tf
.
squeeze
(
boxes
[:,
:,
2
:
3
]
-
boxes
[:,
:,
0
:
1
],
axis
=-
1
)
areas_sqrt
=
tf
.
sqrt
(
box_height
*
box_width
)
levels
=
tf
.
math
.
floordiv
(
tf
.
math
.
log
(
tf
.
divide
(
areas_sqrt
,
224.0
)),
tf
.
math
.
log
(
2.0
))
+
4.0
if
not
is_gpu_inference
:
levels
=
tf
.
cast
(
levels
,
dtype
=
tf
.
int32
)
# Map levels between [min_level, max_level].
levels
=
tf
.
minimum
(
float
(
max_level
)
if
is_gpu_inference
else
max_level
,
tf
.
maximum
(
levels
,
float
(
min_level
)
if
is_gpu_inference
else
min_level
)
)
# Project box location and sizes to corresponding feature levels.
scale_to_level
=
tf
.
cast
(
tf
.
pow
(
tf
.
constant
(
2.0
),
levels
if
is_gpu_inference
else
tf
.
cast
(
levels
,
tf
.
float32
)),
dtype
=
boxes
.
dtype
)
boxes
/=
tf
.
expand_dims
(
scale_to_level
,
axis
=
2
)
box_width
/=
scale_to_level
box_height
/=
scale_to_level
boxes
=
tf
.
concat
(
[
boxes
[:,
:,
0
:
2
],
tf
.
expand_dims
(
box_height
,
-
1
),
tf
.
expand_dims
(
box_width
,
-
1
)],
axis
=-
1
)
# Map levels to [0, max_level-min_level].
levels
-=
min_level
level_strides
=
tf
.
pow
([[
2.0
]],
levels
if
is_gpu_inference
else
tf
.
cast
(
levels
,
tf
.
float32
))
boundary
=
tf
.
cast
(
tf
.
concat
(
[
tf
.
expand_dims
([[
tf
.
cast
(
max_feature_height
,
tf
.
float32
)]]
/
level_strides
-
1
,
axis
=-
1
),
tf
.
expand_dims
([[
tf
.
cast
(
max_feature_width
,
tf
.
float32
)]]
/
level_strides
-
1
,
axis
=-
1
),
],
axis
=-
1
),
boxes
.
dtype
)
return
selective_crop_and_resize
(
features_all
,
boxes
,
levels
,
boundary
,
output_size
,
is_gpu_inference
)
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/ops/training_ops.py
0 → 100644
View file @
c320b6ef
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Training specific ops, including sampling, building targets, etc."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
tensorflow
as
tf
from
mask_rcnn.utils
import
box_utils
from
mask_rcnn.ops
import
spatial_transform_ops
from
mask_rcnn.object_detection
import
balanced_positive_negative_sampler
_EPSILON
=
1e-8
def
_add_class_assignments
(
iou
,
gt_boxes
,
gt_labels
):
"""Computes object category assignment for each box.
Args:
iou: a tensor for the iou matrix with a shape of
[batch_size, K, MAX_NUM_INSTANCES]. K is the number of post-nms RoIs
(i.e., rpn_post_nms_topn).
gt_boxes: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES, 4].
This tensor might have paddings with negative values. The coordinates
of gt_boxes are in the pixel coordinates of the scaled image scale.
gt_labels: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES]. This
tensor might have paddings with a value of -1.
Returns:
max_boxes: a tensor with a shape of [batch_size, K, 4], representing
the ground truth coordinates of each roi.
max_classes: a int32 tensor with a shape of [batch_size, K], representing
the ground truth class of each roi.
max_overlap: a tensor with a shape of [batch_size, K], representing
the maximum overlap of each roi.
argmax_iou: a tensor with a shape of [batch_size, K], representing the iou
argmax.
"""
with
tf
.
name_scope
(
'add_class_assignments'
):
batch_size
,
_
,
_
=
iou
.
get_shape
().
as_list
()
argmax_iou
=
tf
.
argmax
(
input
=
iou
,
axis
=
2
,
output_type
=
tf
.
int32
)
indices
=
tf
.
reshape
(
argmax_iou
+
tf
.
expand_dims
(
tf
.
range
(
batch_size
)
*
tf
.
shape
(
input
=
gt_labels
)[
1
],
1
),
shape
=
[
-
1
]
)
max_classes
=
tf
.
reshape
(
tf
.
gather
(
tf
.
reshape
(
gt_labels
,
[
-
1
,
1
]),
indices
),
[
batch_size
,
-
1
])
max_overlap
=
tf
.
reduce_max
(
input_tensor
=
iou
,
axis
=
2
)
bg_mask
=
tf
.
equal
(
max_overlap
,
tf
.
zeros_like
(
max_overlap
))
max_classes
=
tf
.
where
(
bg_mask
,
tf
.
zeros_like
(
max_classes
),
max_classes
)
max_boxes
=
tf
.
reshape
(
tf
.
gather
(
tf
.
reshape
(
gt_boxes
,
[
-
1
,
4
]),
indices
),
[
batch_size
,
-
1
,
4
]
)
max_boxes
=
tf
.
where
(
tf
.
tile
(
tf
.
expand_dims
(
bg_mask
,
axis
=
2
),
[
1
,
1
,
4
]),
tf
.
zeros_like
(
max_boxes
),
max_boxes
)
return
max_boxes
,
max_classes
,
max_overlap
,
argmax_iou
def
encode_box_targets
(
boxes
,
gt_boxes
,
gt_labels
,
bbox_reg_weights
):
"""Encodes predicted boxes with respect to ground truth boxes."""
with
tf
.
name_scope
(
'encode_box_targets'
):
box_targets
=
box_utils
.
encode_boxes
(
boxes
=
gt_boxes
,
anchors
=
boxes
,
weights
=
bbox_reg_weights
)
# If a target is background, the encoded box target should be zeros.
mask
=
tf
.
tile
(
tf
.
expand_dims
(
tf
.
equal
(
gt_labels
,
tf
.
zeros_like
(
gt_labels
)),
axis
=
2
),
[
1
,
1
,
4
])
box_targets
=
tf
.
where
(
mask
,
tf
.
zeros_like
(
box_targets
),
box_targets
)
return
box_targets
def
proposal_label_op
(
boxes
,
gt_boxes
,
gt_labels
,
batch_size_per_im
=
512
,
fg_fraction
=
0.25
,
fg_thresh
=
0.5
,
bg_thresh_hi
=
0.5
,
bg_thresh_lo
=
0.
):
"""Assigns the proposals with ground truth labels and performs subsmpling.
Given proposal `boxes`, `gt_boxes`, and `gt_labels`, the function uses the
following algorithm to generate the final `batch_size_per_im` RoIs.
1. Calculates the IoU between each proposal box and each gt_boxes.
2. Assigns each proposal box with a ground truth class and box label by
choosing the largest overlap.
3. Samples `batch_size_per_im` boxes from all proposal boxes, and returns
box_targets, class_targets, and RoIs.
The reference implementations of #1 and #2 are here:
https://github.com/facebookresearch/Detectron/blob/master/detectron/datasets/json_dataset.py
The reference implementation of #3 is here:
https://github.com/facebookresearch/Detectron/blob/master/detectron/roi_data/fast_rcnn.py
Args:
boxes: a tensor with a shape of [batch_size, N, 4]. N is the number of
proposals before groundtruth assignment (e.g., rpn_post_nms_topn). The
last dimension is the pixel coordinates of scaled images in
[ymin, xmin, ymax, xmax] form.
gt_boxes: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES, 4]. This
tensor might have paddings with a value of -1. The coordinates of gt_boxes
are in the pixel coordinates of the scaled image.
gt_labels: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES]. This
tensor might have paddings with a value of -1.
batch_size_per_im: a integer represents RoI minibatch size per image.
fg_fraction: a float represents the target fraction of RoI minibatch that
is labeled foreground (i.e., class > 0).
fg_thresh: a float represents the overlap threshold for an RoI to be
considered foreground (if >= fg_thresh).
bg_thresh_hi: a float represents the overlap threshold for an RoI to be
considered background (class = 0 if overlap in [LO, HI)).
bg_thresh_lo: a float represents the overlap threshold for an RoI to be
considered background (class = 0 if overlap in [LO, HI)).
Returns:
box_targets: a tensor with a shape of [batch_size, K, 4]. The tensor
contains the ground truth pixel coordinates of the scaled images for each
roi. K is the number of sample RoIs (e.g., batch_size_per_im).
class_targets: a integer tensor with a shape of [batch_size, K]. The tensor
contains the ground truth class for each roi.
rois: a tensor with a shape of [batch_size, K, 4], representing the
coordinates of the selected RoI.
proposal_to_label_map: a tensor with a shape of [batch_size, K]. This tensor
keeps the mapping between proposal to labels. proposal_to_label_map[i]
means the index of the ground truth instance for the i-th proposal.
"""
with
tf
.
name_scope
(
'proposal_label'
):
batch_size
=
boxes
.
shape
[
0
]
# The reference implementation intentionally includes ground truth boxes in
# the proposals.
# see https://github.com/facebookresearch/Detectron/blob/master/detectron/datasets/json_dataset.py#L359
boxes
=
tf
.
concat
([
boxes
,
gt_boxes
],
axis
=
1
)
iou
=
box_utils
.
bbox_overlap
(
boxes
,
gt_boxes
)
(
pre_sample_box_targets
,
pre_sample_class_targets
,
max_overlap
,
proposal_to_label_map
)
=
_add_class_assignments
(
iou
,
gt_boxes
,
gt_labels
)
# Generates a random sample of RoIs comprising foreground and background
# examples.
# reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/roi_data/fast_rcnn.py#L132
positives
=
tf
.
greater
(
max_overlap
,
fg_thresh
*
tf
.
ones_like
(
max_overlap
))
negatives
=
tf
.
logical_and
(
tf
.
greater_equal
(
max_overlap
,
bg_thresh_lo
*
tf
.
ones_like
(
max_overlap
)),
tf
.
less
(
max_overlap
,
bg_thresh_hi
*
tf
.
ones_like
(
max_overlap
))
)
pre_sample_class_targets
=
tf
.
where
(
negatives
,
tf
.
zeros_like
(
pre_sample_class_targets
),
pre_sample_class_targets
)
proposal_to_label_map
=
tf
.
where
(
negatives
,
tf
.
zeros_like
(
proposal_to_label_map
),
proposal_to_label_map
)
# Handles ground truth paddings.
ignore_mask
=
tf
.
less
(
tf
.
reduce_min
(
input_tensor
=
iou
,
axis
=
2
),
tf
.
zeros_like
(
max_overlap
))
# indicator includes both positive and negative labels.
# labels includes only positives labels.
# positives = indicator & labels.
# negatives = indicator & !labels.
# ignore = !indicator.
labels
=
positives
pos_or_neg
=
tf
.
logical_or
(
positives
,
negatives
)
indicator
=
tf
.
logical_and
(
pos_or_neg
,
tf
.
logical_not
(
ignore_mask
))
all_samples
=
[]
sampler
=
balanced_positive_negative_sampler
.
BalancedPositiveNegativeSampler
(
positive_fraction
=
fg_fraction
,
is_static
=
True
)
# Batch-unroll the sub-sampling process.
for
i
in
range
(
batch_size
):
samples
=
sampler
.
subsample
(
indicator
[
i
],
batch_size_per_im
,
labels
[
i
])
all_samples
.
append
(
samples
)
all_samples
=
tf
.
stack
([
all_samples
],
axis
=
0
)[
0
]
# A workaround to get the indices from the boolean tensors.
_
,
samples_indices
=
tf
.
nn
.
top_k
(
tf
.
cast
(
all_samples
,
dtype
=
tf
.
int32
),
k
=
batch_size_per_im
,
sorted
=
True
)
# Contructs indices for gather.
samples_indices
=
tf
.
reshape
(
samples_indices
+
tf
.
expand_dims
(
tf
.
range
(
batch_size
)
*
tf
.
shape
(
input
=
boxes
)[
1
],
1
),
[
-
1
]
)
rois
=
tf
.
reshape
(
tf
.
gather
(
tf
.
reshape
(
boxes
,
[
-
1
,
4
]),
samples_indices
),
[
batch_size
,
-
1
,
4
]
)
class_targets
=
tf
.
reshape
(
tf
.
gather
(
tf
.
reshape
(
pre_sample_class_targets
,
[
-
1
,
1
]),
samples_indices
),
[
batch_size
,
-
1
]
)
sample_box_targets
=
tf
.
reshape
(
tf
.
gather
(
tf
.
reshape
(
pre_sample_box_targets
,
[
-
1
,
4
]),
samples_indices
),
[
batch_size
,
-
1
,
4
]
)
sample_proposal_to_label_map
=
tf
.
reshape
(
tf
.
gather
(
tf
.
reshape
(
proposal_to_label_map
,
[
-
1
,
1
]),
samples_indices
),
[
batch_size
,
-
1
]
)
return
sample_box_targets
,
class_targets
,
rois
,
sample_proposal_to_label_map
def
select_fg_for_masks
(
class_targets
,
box_targets
,
boxes
,
proposal_to_label_map
,
max_num_fg
=
128
):
"""Selects the fore ground objects for mask branch during training.
Args:
class_targets: a tensor of shape [batch_size, num_boxes] representing the
class label for each box.
box_targets: a tensor with a shape of [batch_size, num_boxes, 4]. The tensor
contains the ground truth pixel coordinates of the scaled images for each
roi.
boxes: A 3-D Tensor of shape [batch_size, num_boxes, 4]. Each row
represents a box with [y1, x1, y2, x2] in un-normalized coordinates.
proposal_to_label_map: a tensor with a shape of [batch_size, num_boxes].
This tensor keeps the mapping between proposal to labels.
proposal_to_label_map[i] means the index of the ground truth instance for
the i-th proposal.
max_num_fg: a integer represents the number of masks per image.
Returns:
class_targets, boxes, proposal_to_label_map, box_targets that have
foreground objects.
"""
# Masks are for positive (fg) objects only.
# Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/roi_data/mask_rcnn.py
batch_size
=
boxes
.
shape
[
0
]
_
,
fg_indices
=
tf
.
nn
.
top_k
(
tf
.
cast
(
tf
.
greater
(
class_targets
,
0
),
dtype
=
tf
.
float32
),
k
=
max_num_fg
)
# Contructs indices for gather.
indices
=
tf
.
reshape
(
fg_indices
+
tf
.
expand_dims
(
tf
.
range
(
batch_size
)
*
tf
.
shape
(
input
=
class_targets
)[
1
],
1
),
[
-
1
])
fg_class_targets
=
tf
.
reshape
(
tf
.
gather
(
tf
.
reshape
(
class_targets
,
[
-
1
,
1
]),
indices
),
[
batch_size
,
-
1
]
)
fg_box_targets
=
tf
.
reshape
(
tf
.
gather
(
tf
.
reshape
(
box_targets
,
[
-
1
,
4
]),
indices
),
[
batch_size
,
-
1
,
4
]
)
fg_box_rois
=
tf
.
reshape
(
tf
.
gather
(
tf
.
reshape
(
boxes
,
[
-
1
,
4
]),
indices
),
[
batch_size
,
-
1
,
4
]
)
fg_proposal_to_label_map
=
tf
.
reshape
(
tf
.
gather
(
tf
.
reshape
(
proposal_to_label_map
,
[
-
1
,
1
]),
indices
),
[
batch_size
,
-
1
]
)
return
(
fg_class_targets
,
fg_box_targets
,
fg_box_rois
,
fg_proposal_to_label_map
)
def
get_mask_targets
(
fg_boxes
,
fg_proposal_to_label_map
,
fg_box_targets
,
mask_gt_labels
,
output_size
=
28
):
"""Crop and resize on multilevel feature pyramid.
Args:
fg_boxes: A 3-D tensor of shape [batch_size, num_masks, 4]. Each row
represents a box with [y1, x1, y2, x2] in un-normalized coordinates.
fg_proposal_to_label_map: A tensor of shape [batch_size, num_masks].
fg_box_targets: a float tensor representing the box label for each box
with a shape of [batch_size, num_masks, 4].
mask_gt_labels: A tensor with a shape of [batch_size, M, H+4, W+4]. M is
NUM_MAX_INSTANCES (i.e., 100 in this implementation) in each image, while
H and W are ground truth mask size. The `+4` comes from padding of two
zeros in both directions of height and width dimension.
output_size: A scalar to indicate the output crop size.
Returns:
A 4-D tensor representing feature crop of shape
[batch_size, num_boxes, output_size, output_size].
"""
_
,
_
,
max_feature_height
,
max_feature_width
=
mask_gt_labels
.
get_shape
().
as_list
()
# proposal_to_label_map might have a -1 paddings.
levels
=
tf
.
maximum
(
fg_proposal_to_label_map
,
0
)
# Projects box location and sizes to corresponding cropped ground truth
# mask coordinates.
bb_y_min
,
bb_x_min
,
bb_y_max
,
bb_x_max
=
tf
.
split
(
value
=
fg_boxes
,
num_or_size_splits
=
4
,
axis
=
2
)
gt_y_min
,
gt_x_min
,
gt_y_max
,
gt_x_max
=
tf
.
split
(
value
=
fg_box_targets
,
num_or_size_splits
=
4
,
axis
=
2
)
valid_feature_width
=
max_feature_width
-
4
valid_feature_height
=
max_feature_height
-
4
y_transform
=
(
bb_y_min
-
gt_y_min
)
*
valid_feature_height
/
(
gt_y_max
-
gt_y_min
+
_EPSILON
)
+
2
x_transform
=
(
bb_x_min
-
gt_x_min
)
*
valid_feature_width
/
(
gt_x_max
-
gt_x_min
+
_EPSILON
)
+
2
h_transform
=
(
bb_y_max
-
bb_y_min
)
*
valid_feature_height
/
(
gt_y_max
-
gt_y_min
+
_EPSILON
)
w_transform
=
(
bb_x_max
-
bb_x_min
)
*
valid_feature_width
/
(
gt_x_max
-
gt_x_min
+
_EPSILON
)
boundaries
=
tf
.
concat
(
[
tf
.
cast
(
tf
.
ones_like
(
y_transform
)
*
(
max_feature_height
-
1
),
dtype
=
tf
.
float32
),
tf
.
cast
(
tf
.
ones_like
(
x_transform
)
*
(
max_feature_width
-
1
),
dtype
=
tf
.
float32
)
],
axis
=-
1
)
features_per_box
=
spatial_transform_ops
.
selective_crop_and_resize
(
tf
.
expand_dims
(
mask_gt_labels
,
-
1
),
tf
.
concat
([
y_transform
,
x_transform
,
h_transform
,
w_transform
],
-
1
),
tf
.
expand_dims
(
levels
,
-
1
),
boundaries
,
output_size
)
features_per_box
=
tf
.
squeeze
(
features_per_box
,
axis
=-
1
)
# Masks are binary outputs.
features_per_box
=
tf
.
where
(
tf
.
greater_equal
(
features_per_box
,
0.5
),
tf
.
ones_like
(
features_per_box
),
tf
.
zeros_like
(
features_per_box
)
)
# mask_targets depend on box RoIs, which have gradients. This stop_gradient
# prevents the flow of gradient to box RoIs.
features_per_box
=
tf
.
stop_gradient
(
features_per_box
)
return
features_per_box
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/training/__init__.py
0 → 100644
View file @
c320b6ef
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/training/learning_rates.py
0 → 100644
View file @
c320b6ef
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Learning rate schedule."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
tensorflow
as
tf
def
step_learning_rate_with_linear_warmup
(
global_step
,
init_learning_rate
,
warmup_learning_rate
,
warmup_steps
,
learning_rate_levels
,
learning_rate_steps
):
"""Creates the step learning rate tensor with linear warmup."""
def
warmup_lr_fn
():
return
warmup_learning_rate
+
\
tf
.
cast
(
global_step
,
dtype
=
tf
.
float32
)
/
warmup_steps
*
(
init_learning_rate
-
warmup_learning_rate
)
def
learning_rate_fn
():
return
tf
.
compat
.
v1
.
train
.
piecewise_constant
(
global_step
,
boundaries
=
learning_rate_steps
,
values
=
[
init_learning_rate
]
+
learning_rate_levels
)
learning_rate
=
tf
.
where
(
global_step
<
warmup_steps
,
warmup_lr_fn
(),
learning_rate_fn
()
)
return
learning_rate
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/training/losses.py
0 → 100644
View file @
c320b6ef
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Losses used for Mask-RCNN."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
distutils.version
import
LooseVersion
import
tensorflow
as
tf
DEBUG_LOSS_IMPLEMENTATION
=
False
if
LooseVersion
(
tf
.
__version__
)
<
LooseVersion
(
"2.0.0"
):
from
tensorflow.python.keras.utils
import
losses_utils
ReductionV2
=
losses_utils
.
ReductionV2
else
:
ReductionV2
=
tf
.
keras
.
losses
.
Reduction
def
_huber_loss
(
y_true
,
y_pred
,
weights
,
delta
):
num_non_zeros
=
tf
.
math
.
count_nonzero
(
weights
,
dtype
=
tf
.
float32
)
huber_keras_loss
=
tf
.
keras
.
losses
.
Huber
(
delta
=
delta
,
reduction
=
ReductionV2
.
SUM
,
name
=
'huber_loss'
)
if
LooseVersion
(
tf
.
__version__
)
>=
LooseVersion
(
"2.2.0"
):
y_true
=
tf
.
expand_dims
(
y_true
,
axis
=-
1
)
y_pred
=
tf
.
expand_dims
(
y_pred
,
axis
=-
1
)
huber_loss
=
huber_keras_loss
(
y_true
,
y_pred
,
sample_weight
=
weights
)
assert
huber_loss
.
dtype
==
tf
.
float32
huber_loss
=
tf
.
math
.
divide_no_nan
(
huber_loss
,
num_non_zeros
,
name
=
"huber_loss"
)
assert
huber_loss
.
dtype
==
tf
.
float32
if
DEBUG_LOSS_IMPLEMENTATION
:
mlperf_loss
=
tf
.
compat
.
v1
.
losses
.
huber_loss
(
y_true
,
y_pred
,
weights
=
weights
,
delta
=
delta
,
reduction
=
tf
.
compat
.
v1
.
losses
.
Reduction
.
SUM_BY_NONZERO_WEIGHTS
)
print_op
=
tf
.
print
(
"Huber Loss - MLPerf:"
,
mlperf_loss
,
" && Legacy Loss:"
,
huber_loss
)
with
tf
.
control_dependencies
([
print_op
]):
huber_loss
=
tf
.
identity
(
huber_loss
)
return
huber_loss
def
_sigmoid_cross_entropy
(
multi_class_labels
,
logits
,
weights
,
sum_by_non_zeros_weights
=
False
):
assert
weights
.
dtype
==
tf
.
float32
sigmoid_cross_entropy
=
tf
.
nn
.
sigmoid_cross_entropy_with_logits
(
labels
=
multi_class_labels
,
logits
=
logits
,
name
=
"x-entropy"
)
assert
sigmoid_cross_entropy
.
dtype
==
tf
.
float32
sigmoid_cross_entropy
=
tf
.
math
.
multiply
(
sigmoid_cross_entropy
,
weights
)
sigmoid_cross_entropy
=
tf
.
math
.
reduce_sum
(
sigmoid_cross_entropy
)
assert
sigmoid_cross_entropy
.
dtype
==
tf
.
float32
if
sum_by_non_zeros_weights
:
num_non_zeros
=
tf
.
math
.
count_nonzero
(
weights
,
dtype
=
tf
.
float32
)
sigmoid_cross_entropy
=
tf
.
math
.
divide_no_nan
(
sigmoid_cross_entropy
,
num_non_zeros
,
name
=
"sum_by_non_zeros_weights"
)
assert
sigmoid_cross_entropy
.
dtype
==
tf
.
float32
if
DEBUG_LOSS_IMPLEMENTATION
:
if
sum_by_non_zeros_weights
:
reduction
=
tf
.
compat
.
v1
.
losses
.
Reduction
.
SUM_BY_NONZERO_WEIGHTS
else
:
reduction
=
tf
.
compat
.
v1
.
losses
.
Reduction
.
SUM
mlperf_loss
=
tf
.
compat
.
v1
.
losses
.
sigmoid_cross_entropy
(
multi_class_labels
=
multi_class_labels
,
logits
=
logits
,
weights
=
weights
,
reduction
=
reduction
)
print_op
=
tf
.
print
(
"Sigmoid X-Entropy Loss (%s) - MLPerf:"
%
reduction
,
mlperf_loss
,
" && Legacy Loss:"
,
sigmoid_cross_entropy
)
with
tf
.
control_dependencies
([
print_op
]):
sigmoid_cross_entropy
=
tf
.
identity
(
sigmoid_cross_entropy
)
return
sigmoid_cross_entropy
def
_softmax_cross_entropy
(
onehot_labels
,
logits
):
num_non_zeros
=
tf
.
math
.
count_nonzero
(
onehot_labels
,
dtype
=
tf
.
float32
)
softmax_cross_entropy
=
tf
.
nn
.
softmax_cross_entropy_with_logits
(
labels
=
onehot_labels
,
logits
=
logits
)
assert
softmax_cross_entropy
.
dtype
==
tf
.
float32
softmax_cross_entropy
=
tf
.
math
.
reduce_sum
(
softmax_cross_entropy
)
softmax_cross_entropy
=
tf
.
math
.
divide_no_nan
(
softmax_cross_entropy
,
num_non_zeros
,
name
=
"softmax_cross_entropy"
)
assert
softmax_cross_entropy
.
dtype
==
tf
.
float32
if
DEBUG_LOSS_IMPLEMENTATION
:
mlperf_loss
=
tf
.
compat
.
v1
.
losses
.
softmax_cross_entropy
(
onehot_labels
=
onehot_labels
,
logits
=
logits
,
reduction
=
tf
.
compat
.
v1
.
losses
.
Reduction
.
SUM_BY_NONZERO_WEIGHTS
)
print_op
=
tf
.
print
(
"Softmax X-Entropy Loss - MLPerf:"
,
mlperf_loss
,
" && Legacy Loss:"
,
softmax_cross_entropy
)
with
tf
.
control_dependencies
([
print_op
]):
softmax_cross_entropy
=
tf
.
identity
(
softmax_cross_entropy
)
return
softmax_cross_entropy
def
_rpn_score_loss
(
score_outputs
,
score_targets
,
normalizer
=
1.0
):
"""Computes score loss."""
with
tf
.
name_scope
(
'rpn_score_loss'
):
# score_targets has three values:
# * (1) score_targets[i]=1, the anchor is a positive sample.
# * (2) score_targets[i]=0, negative.
# * (3) score_targets[i]=-1, the anchor is don't care (ignore).
mask
=
tf
.
math
.
greater_equal
(
score_targets
,
0
)
mask
=
tf
.
cast
(
mask
,
dtype
=
tf
.
float32
)
score_targets
=
tf
.
maximum
(
score_targets
,
tf
.
zeros_like
(
score_targets
))
score_targets
=
tf
.
cast
(
score_targets
,
dtype
=
tf
.
float32
)
assert
score_outputs
.
dtype
==
tf
.
float32
assert
score_targets
.
dtype
==
tf
.
float32
score_loss
=
_sigmoid_cross_entropy
(
multi_class_labels
=
score_targets
,
logits
=
score_outputs
,
weights
=
mask
,
sum_by_non_zeros_weights
=
False
)
assert
score_loss
.
dtype
==
tf
.
float32
if
isinstance
(
normalizer
,
tf
.
Tensor
)
or
normalizer
!=
1.0
:
score_loss
/=
normalizer
assert
score_loss
.
dtype
==
tf
.
float32
return
score_loss
def
_rpn_box_loss
(
box_outputs
,
box_targets
,
normalizer
=
1.0
,
delta
=
1.
/
9
):
"""Computes box regression loss."""
# delta is typically around the mean value of regression target.
# for instances, the regression targets of 512x512 input with 6 anchors on
# P2-P6 pyramid is about [0.1, 0.1, 0.2, 0.2].
with
tf
.
name_scope
(
'rpn_box_loss'
):
mask
=
tf
.
not_equal
(
box_targets
,
0.0
)
mask
=
tf
.
cast
(
mask
,
tf
.
float32
)
assert
mask
.
dtype
==
tf
.
float32
# The loss is normalized by the sum of non-zero weights before additional
# normalizer provided by the function caller.
box_loss
=
_huber_loss
(
y_true
=
box_targets
,
y_pred
=
box_outputs
,
weights
=
mask
,
delta
=
delta
)
assert
box_loss
.
dtype
==
tf
.
float32
if
isinstance
(
normalizer
,
tf
.
Tensor
)
or
normalizer
!=
1.0
:
box_loss
/=
normalizer
assert
box_loss
.
dtype
==
tf
.
float32
return
box_loss
def
rpn_loss
(
score_outputs
,
box_outputs
,
labels
,
params
):
"""Computes total RPN detection loss.
Computes total RPN detection loss including box and score from all levels.
Args:
score_outputs: an OrderDict with keys representing levels and values
representing scores in [batch_size, height, width, num_anchors].
box_outputs: an OrderDict with keys representing levels and values
representing box regression targets in
[batch_size, height, width, num_anchors * 4].
labels: the dictionary that returned from dataloader that includes
groundturth targets.
params: the dictionary including training parameters specified in
default_haprams function in this file.
Returns:
total_rpn_loss: a float tensor representing total loss reduced from
score and box losses from all levels.
rpn_score_loss: a float tensor representing total score loss.
rpn_box_loss: a float tensor representing total box regression loss.
"""
with
tf
.
name_scope
(
'rpn_loss'
):
score_losses
=
[]
box_losses
=
[]
for
level
in
range
(
int
(
params
[
'min_level'
]),
int
(
params
[
'max_level'
]
+
1
)):
score_targets_at_level
=
labels
[
'score_targets_%d'
%
level
]
box_targets_at_level
=
labels
[
'box_targets_%d'
%
level
]
score_losses
.
append
(
_rpn_score_loss
(
score_outputs
=
score_outputs
[
level
],
score_targets
=
score_targets_at_level
,
normalizer
=
tf
.
cast
(
params
[
'train_batch_size'
]
*
params
[
'rpn_batch_size_per_im'
],
dtype
=
tf
.
float32
)
)
)
box_losses
.
append
(
_rpn_box_loss
(
box_outputs
=
box_outputs
[
level
],
box_targets
=
box_targets_at_level
,
normalizer
=
1.0
))
# Sum per level losses to total loss.
rpn_score_loss
=
tf
.
add_n
(
score_losses
)
rpn_box_loss
=
params
[
'rpn_box_loss_weight'
]
*
tf
.
add_n
(
box_losses
)
total_rpn_loss
=
rpn_score_loss
+
rpn_box_loss
return
total_rpn_loss
,
rpn_score_loss
,
rpn_box_loss
def
_fast_rcnn_class_loss
(
class_outputs
,
class_targets_one_hot
,
normalizer
=
1.0
):
"""Computes classification loss."""
with
tf
.
name_scope
(
'fast_rcnn_class_loss'
):
# The loss is normalized by the sum of non-zero weights before additional
# normalizer provided by the function caller.
class_loss
=
_softmax_cross_entropy
(
onehot_labels
=
class_targets_one_hot
,
logits
=
class_outputs
)
if
isinstance
(
normalizer
,
tf
.
Tensor
)
or
normalizer
!=
1.0
:
class_loss
/=
normalizer
return
class_loss
def
_fast_rcnn_box_loss
(
box_outputs
,
box_targets
,
class_targets
,
normalizer
=
1.0
,
delta
=
1.
):
"""Computes box regression loss."""
# delta is typically around the mean value of regression target.
# for instances, the regression targets of 512x512 input with 6 anchors on
# P2-P6 pyramid is about [0.1, 0.1, 0.2, 0.2].
with
tf
.
name_scope
(
'fast_rcnn_box_loss'
):
mask
=
tf
.
tile
(
tf
.
expand_dims
(
tf
.
greater
(
class_targets
,
0
),
axis
=
2
),
[
1
,
1
,
4
])
# The loss is normalized by the sum of non-zero weights before additional
# normalizer provided by the function caller.
box_loss
=
_huber_loss
(
y_true
=
box_targets
,
y_pred
=
box_outputs
,
weights
=
mask
,
delta
=
delta
)
if
isinstance
(
normalizer
,
tf
.
Tensor
)
or
normalizer
!=
1.0
:
box_loss
/=
normalizer
return
box_loss
def
fast_rcnn_loss
(
class_outputs
,
box_outputs
,
class_targets
,
box_targets
,
params
):
"""Computes the box and class loss (Fast-RCNN branch) of Mask-RCNN.
This function implements the classification and box regression loss of the
Fast-RCNN branch in Mask-RCNN. As the `box_outputs` produces `num_classes`
boxes for each RoI, the reference model expands `box_targets` to match the
shape of `box_outputs` and selects only the target that the RoI has a maximum
overlap. (Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/roi_data/fast_rcnn.py)
Instead, this function selects the `box_outputs` by the `class_targets` so
that it doesn't expand `box_targets`.
The loss computation has two parts: (1) classification loss is softmax on all
RoIs. (2) box loss is smooth L1-loss on only positive samples of RoIs.
Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/modeling/fast_rcnn_heads.py
Args:
class_outputs: a float tensor representing the class prediction for each box
with a shape of [batch_size, num_boxes, num_classes].
box_outputs: a float tensor representing the box prediction for each box
with a shape of [batch_size, num_boxes, num_classes * 4].
class_targets: a float tensor representing the class label for each box
with a shape of [batch_size, num_boxes].
box_targets: a float tensor representing the box label for each box
with a shape of [batch_size, num_boxes, 4].
params: the dictionary including training parameters specified in
default_haprams function in this file.
Returns:
total_loss: a float tensor representing total loss reducing from
class and box losses from all levels.
cls_loss: a float tensor representing total class loss.
box_loss: a float tensor representing total box regression loss.
"""
with
tf
.
name_scope
(
'fast_rcnn_loss'
):
class_targets
=
tf
.
cast
(
class_targets
,
dtype
=
tf
.
int32
)
# Selects the box from `box_outputs` based on `class_targets`, with which
# the box has the maximum overlap.
batch_size
,
num_rois
,
_
=
box_outputs
.
get_shape
().
as_list
()
box_outputs
=
tf
.
reshape
(
box_outputs
,
[
batch_size
,
num_rois
,
params
[
'num_classes'
],
4
])
box_indices
=
tf
.
reshape
(
class_targets
+
tf
.
tile
(
tf
.
expand_dims
(
tf
.
range
(
batch_size
)
*
num_rois
*
params
[
'num_classes'
],
1
),
[
1
,
num_rois
])
+
tf
.
tile
(
tf
.
expand_dims
(
tf
.
range
(
num_rois
)
*
params
[
'num_classes'
],
0
),
[
batch_size
,
1
]),
[
-
1
]
)
box_outputs
=
tf
.
matmul
(
tf
.
one_hot
(
box_indices
,
batch_size
*
num_rois
*
params
[
'num_classes'
],
dtype
=
box_outputs
.
dtype
),
tf
.
reshape
(
box_outputs
,
[
-
1
,
4
])
)
box_outputs
=
tf
.
reshape
(
box_outputs
,
[
batch_size
,
-
1
,
4
])
box_loss
=
_fast_rcnn_box_loss
(
box_outputs
=
box_outputs
,
box_targets
=
box_targets
,
class_targets
=
class_targets
,
normalizer
=
1.0
)
box_loss
*=
params
[
'fast_rcnn_box_loss_weight'
]
use_sparse_x_entropy
=
False
_class_targets
=
class_targets
if
use_sparse_x_entropy
else
tf
.
one_hot
(
class_targets
,
params
[
'num_classes'
])
class_loss
=
_fast_rcnn_class_loss
(
class_outputs
=
class_outputs
,
class_targets_one_hot
=
_class_targets
,
normalizer
=
1.0
)
total_loss
=
class_loss
+
box_loss
return
total_loss
,
class_loss
,
box_loss
def
mask_rcnn_loss
(
mask_outputs
,
mask_targets
,
select_class_targets
,
params
):
"""Computes the mask loss of Mask-RCNN.
This function implements the mask loss of Mask-RCNN. As the `mask_outputs`
produces `num_classes` masks for each RoI, the reference model expands
`mask_targets` to match the shape of `mask_outputs` and selects only the
target that the RoI has a maximum overlap.
(Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/roi_data/mask_rcnn.py)
Instead, this implementation selects the `mask_outputs` by the `class_targets`
so that it doesn't expand `mask_targets`. Note that the selection logic is
done in the post-processing of mask_rcnn_fn in mask_rcnn_architecture.py.
Args:
mask_outputs: a float tensor representing the prediction for each mask,
with a shape of
[batch_size, num_masks, mask_height, mask_width].
mask_targets: a float tensor representing the binary mask of ground truth
labels for each mask with a shape of
[batch_size, num_masks, mask_height, mask_width].
select_class_targets: a tensor with a shape of [batch_size, num_masks],
representing the foreground mask targets.
params: the dictionary including training parameters specified in
default_haprams function in this file.
Returns:
mask_loss: a float tensor representing total mask loss.
"""
with
tf
.
name_scope
(
'mask_loss'
):
batch_size
,
num_masks
,
mask_height
,
mask_width
=
mask_outputs
.
get_shape
().
as_list
()
weights
=
tf
.
tile
(
tf
.
reshape
(
tf
.
greater
(
select_class_targets
,
0
),
[
batch_size
,
num_masks
,
1
,
1
]),
[
1
,
1
,
mask_height
,
mask_width
]
)
weights
=
tf
.
cast
(
weights
,
tf
.
float32
)
loss
=
_sigmoid_cross_entropy
(
multi_class_labels
=
mask_targets
,
logits
=
mask_outputs
,
weights
=
weights
,
sum_by_non_zeros_weights
=
True
)
mrcnn_loss
=
params
[
'mrcnn_weight_loss_mask'
]
*
loss
return
mrcnn_loss
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/utils/__init__.py
0 → 100644
View file @
c320b6ef
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/utils/box_utils.py
0 → 100644
View file @
c320b6ef
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Util functions to manipulate boxes."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
# Standard Imports
import
numpy
as
np
import
tensorflow
as
tf
BBOX_XFORM_CLIP
=
np
.
log
(
1000.
/
16.
)
NMS_TILE_SIZE
=
512
def
bbox_overlap
(
boxes
,
gt_boxes
):
"""Calculates the overlap between proposal and ground truth boxes.
Some `gt_boxes` may have been padded. The returned `iou` tensor for these
boxes will be -1.
Args:
boxes: a tensor with a shape of [batch_size, N, 4]. N is the number of
proposals before groundtruth assignment (e.g., rpn_post_nms_topn). The
last dimension is the pixel coordinates in [ymin, xmin, ymax, xmax] form.
gt_boxes: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES, 4]. This
tensor might have paddings with a negative value.
Returns:
iou: a tensor with as a shape of [batch_size, N, MAX_NUM_INSTANCES].
"""
with
tf
.
name_scope
(
'bbox_overlap'
):
bb_y_min
,
bb_x_min
,
bb_y_max
,
bb_x_max
=
tf
.
split
(
value
=
boxes
,
num_or_size_splits
=
4
,
axis
=
2
)
gt_y_min
,
gt_x_min
,
gt_y_max
,
gt_x_max
=
tf
.
split
(
value
=
gt_boxes
,
num_or_size_splits
=
4
,
axis
=
2
)
# Calculates the intersection area.
i_xmin
=
tf
.
maximum
(
bb_x_min
,
tf
.
transpose
(
a
=
gt_x_min
,
perm
=
[
0
,
2
,
1
]))
i_xmax
=
tf
.
minimum
(
bb_x_max
,
tf
.
transpose
(
a
=
gt_x_max
,
perm
=
[
0
,
2
,
1
]))
i_ymin
=
tf
.
maximum
(
bb_y_min
,
tf
.
transpose
(
a
=
gt_y_min
,
perm
=
[
0
,
2
,
1
]))
i_ymax
=
tf
.
minimum
(
bb_y_max
,
tf
.
transpose
(
a
=
gt_y_max
,
perm
=
[
0
,
2
,
1
]))
i_area
=
tf
.
maximum
((
i_xmax
-
i_xmin
),
0
)
*
tf
.
maximum
((
i_ymax
-
i_ymin
),
0
)
# Calculates the union area.
bb_area
=
(
bb_y_max
-
bb_y_min
)
*
(
bb_x_max
-
bb_x_min
)
gt_area
=
(
gt_y_max
-
gt_y_min
)
*
(
gt_x_max
-
gt_x_min
)
# Adds a small epsilon to avoid divide-by-zero.
u_area
=
bb_area
+
tf
.
transpose
(
a
=
gt_area
,
perm
=
[
0
,
2
,
1
])
-
i_area
+
1e-8
# Calculates IoU.
iou
=
i_area
/
u_area
# Fills -1 for padded ground truth boxes.
padding_mask
=
tf
.
less
(
i_xmin
,
tf
.
zeros_like
(
i_xmin
))
iou
=
tf
.
where
(
padding_mask
,
-
tf
.
ones_like
(
iou
),
iou
)
return
iou
def
top_k
(
scores
,
k
,
boxes_list
):
"""A wrapper that returns top-k scores and correponding boxes.
This functions selects the top-k scores and boxes as follows.
indices = argsort(scores)[:k]
scores = scores[indices]
outputs = []
for boxes in boxes_list:
outputs.append(boxes[indices, :])
return scores, outputs
Args:
scores: a tensor with a shape of [batch_size, N]. N is the number of scores.
k: an integer for selecting the top-k elements.
boxes_list: a list containing at least one element. Each element has a shape
of [batch_size, N, 4].
Returns:
scores: the selected top-k scores with a shape of [batch_size, k].
outputs: the list containing the corresponding boxes in the order of the
input `boxes_list`.
"""
assert
isinstance
(
boxes_list
,
list
)
assert
boxes_list
# not empty list
batch_size
,
_
=
scores
.
get_shape
().
as_list
()
scores
,
top_k_indices
=
tf
.
nn
.
top_k
(
scores
,
k
=
k
)
outputs
=
[]
for
boxes
in
boxes_list
:
if
batch_size
==
1
:
boxes
=
tf
.
squeeze
(
tf
.
gather
(
boxes
,
top_k_indices
,
axis
=
1
),
axis
=
1
)
else
:
boxes_index_offsets
=
tf
.
range
(
batch_size
)
*
tf
.
shape
(
input
=
boxes
)[
1
]
boxes_indices
=
tf
.
reshape
(
top_k_indices
+
tf
.
expand_dims
(
boxes_index_offsets
,
1
),
[
-
1
])
boxes
=
tf
.
reshape
(
tf
.
gather
(
tf
.
reshape
(
boxes
,
[
-
1
,
4
]),
boxes_indices
),
[
batch_size
,
-
1
,
4
])
outputs
.
append
(
boxes
)
return
scores
,
outputs
def
_self_suppression
(
iou
,
_
,
iou_sum
):
batch_size
=
tf
.
shape
(
input
=
iou
)[
0
]
can_suppress_others
=
tf
.
cast
(
tf
.
reshape
(
tf
.
reduce_max
(
input_tensor
=
iou
,
axis
=
1
)
<=
0.5
,
[
batch_size
,
-
1
,
1
]),
iou
.
dtype
)
iou_suppressed
=
tf
.
reshape
(
tf
.
cast
(
tf
.
reduce_max
(
input_tensor
=
can_suppress_others
*
iou
,
axis
=
1
)
<=
0.5
,
iou
.
dtype
),
[
batch_size
,
-
1
,
1
])
*
iou
iou_sum_new
=
tf
.
reduce_sum
(
input_tensor
=
iou_suppressed
,
axis
=
[
1
,
2
])
return
[
iou_suppressed
,
tf
.
reduce_any
(
input_tensor
=
iou_sum
-
iou_sum_new
>
0.5
),
iou_sum_new
]
def
_cross_suppression
(
boxes
,
box_slice
,
iou_threshold
,
inner_idx
):
batch_size
=
tf
.
shape
(
input
=
boxes
)[
0
]
new_slice
=
tf
.
slice
(
boxes
,
[
0
,
inner_idx
*
NMS_TILE_SIZE
,
0
],
[
batch_size
,
NMS_TILE_SIZE
,
4
])
iou
=
bbox_overlap
(
new_slice
,
box_slice
)
ret_slice
=
tf
.
expand_dims
(
tf
.
cast
(
tf
.
reduce_all
(
input_tensor
=
iou
<
iou_threshold
,
axis
=
[
1
]),
box_slice
.
dtype
),
2
)
*
box_slice
return
boxes
,
ret_slice
,
iou_threshold
,
inner_idx
+
1
def
_suppression_loop_body
(
boxes
,
iou_threshold
,
output_size
,
idx
):
"""Process boxes in the range [idx*NMS_TILE_SIZE, (idx+1)*NMS_TILE_SIZE).
Args:
boxes: a tensor with a shape of [batch_size, anchors, 4].
iou_threshold: a float representing the threshold for deciding whether boxes
overlap too much with respect to IOU.
output_size: an int32 tensor of size [batch_size]. Representing the number
of selected boxes for each batch.
idx: an integer scalar representing induction variable.
Returns:
boxes: updated boxes.
iou_threshold: pass down iou_threshold to the next iteration.
output_size: the updated output_size.
idx: the updated induction variable.
"""
num_tiles
=
tf
.
shape
(
input
=
boxes
)[
1
]
//
NMS_TILE_SIZE
batch_size
=
tf
.
shape
(
input
=
boxes
)[
0
]
# Iterates over tiles that can possibly suppress the current tile.
box_slice
=
tf
.
slice
(
boxes
,
[
0
,
idx
*
NMS_TILE_SIZE
,
0
],
[
batch_size
,
NMS_TILE_SIZE
,
4
])
_
,
box_slice
,
_
,
_
=
tf
.
while_loop
(
cond
=
lambda
_boxes
,
_box_slice
,
_threshold
,
inner_idx
:
inner_idx
<
idx
,
body
=
_cross_suppression
,
loop_vars
=
[
boxes
,
box_slice
,
iou_threshold
,
tf
.
constant
(
0
)])
# Iterates over the current tile to compute self-suppression.
iou
=
bbox_overlap
(
box_slice
,
box_slice
)
mask
=
tf
.
expand_dims
(
tf
.
reshape
(
tf
.
range
(
NMS_TILE_SIZE
),
[
1
,
-
1
])
>
tf
.
reshape
(
tf
.
range
(
NMS_TILE_SIZE
),
[
-
1
,
1
]),
0
)
iou
*=
tf
.
cast
(
tf
.
logical_and
(
mask
,
iou
>=
iou_threshold
),
iou
.
dtype
)
suppressed_iou
,
_
,
_
=
tf
.
while_loop
(
cond
=
lambda
_iou
,
loop_condition
,
_iou_sum
:
loop_condition
,
body
=
_self_suppression
,
loop_vars
=
[
iou
,
tf
.
constant
(
True
),
tf
.
reduce_sum
(
input_tensor
=
iou
,
axis
=
[
1
,
2
])])
suppressed_box
=
tf
.
reduce_sum
(
input_tensor
=
suppressed_iou
,
axis
=
1
)
>
0
box_slice
*=
tf
.
expand_dims
(
1.0
-
tf
.
cast
(
suppressed_box
,
box_slice
.
dtype
),
2
)
# Uses box_slice to update the input boxes.
mask
=
tf
.
reshape
(
tf
.
cast
(
tf
.
equal
(
tf
.
range
(
num_tiles
),
idx
),
boxes
.
dtype
),
[
1
,
-
1
,
1
,
1
])
boxes
=
tf
.
tile
(
tf
.
expand_dims
(
box_slice
,
[
1
]),
[
1
,
num_tiles
,
1
,
1
])
*
mask
+
tf
.
reshape
(
boxes
,
[
batch_size
,
num_tiles
,
NMS_TILE_SIZE
,
4
])
*
(
1
-
mask
)
boxes
=
tf
.
reshape
(
boxes
,
[
batch_size
,
-
1
,
4
])
# Updates output_size.
output_size
+=
tf
.
reduce_sum
(
input_tensor
=
tf
.
cast
(
tf
.
reduce_any
(
input_tensor
=
box_slice
>
0
,
axis
=
[
2
]),
tf
.
int32
),
axis
=
[
1
])
return
boxes
,
iou_threshold
,
output_size
,
idx
+
1
def
sorted_non_max_suppression_padded
(
scores
,
boxes
,
max_output_size
,
iou_threshold
):
"""A wrapper that handles non-maximum suppression.
Assumption:
* The boxes are sorted by scores unless the box is a dot (all coordinates
are zero).
* Boxes with higher scores can be used to suppress boxes with lower scores.
The overal design of the algorithm is to handle boxes tile-by-tile:
boxes = boxes.pad_to_multiply_of(tile_size)
num_tiles = len(boxes) // tile_size
output_boxes = []
for i in range(num_tiles):
box_tile = boxes[i*tile_size : (i+1)*tile_size]
for j in range(i - 1):
suppressing_tile = boxes[j*tile_size : (j+1)*tile_size]
iou = bbox_overlap(box_tile, suppressing_tile)
# if the box is suppressed in iou, clear it to a dot
box_tile *= _update_boxes(iou)
# Iteratively handle the diagnal tile.
iou = _box_overlap(box_tile, box_tile)
iou_changed = True
while iou_changed:
# boxes that are not suppressed by anything else
suppressing_boxes = _get_suppressing_boxes(iou)
# boxes that are suppressed by suppressing_boxes
suppressed_boxes = _get_suppressed_boxes(iou, suppressing_boxes)
# clear iou to 0 for boxes that are suppressed, as they cannot be used
# to suppress other boxes any more
new_iou = _clear_iou(iou, suppressed_boxes)
iou_changed = (new_iou != iou)
iou = new_iou
# remaining boxes that can still suppress others, are selected boxes.
output_boxes.append(_get_suppressing_boxes(iou))
if len(output_boxes) >= max_output_size:
break
Args:
scores: a tensor with a shape of [batch_size, anchors].
boxes: a tensor with a shape of [batch_size, anchors, 4].
max_output_size: a scalar integer `Tensor` representing the maximum number
of boxes to be selected by non max suppression.
iou_threshold: a float representing the threshold for deciding whether boxes
overlap too much with respect to IOU.
Returns:
nms_scores: a tensor with a shape of [batch_size, anchors]. It has same
dtype as input scores.
nms_proposals: a tensor with a shape of [batch_size, anchors, 4]. It has
same dtype as input boxes.
"""
batch_size
=
tf
.
shape
(
input
=
boxes
)[
0
]
num_boxes
=
tf
.
shape
(
input
=
boxes
)[
1
]
pad
=
tf
.
cast
(
tf
.
math
.
ceil
(
tf
.
cast
(
num_boxes
,
tf
.
float32
)
/
NMS_TILE_SIZE
),
tf
.
int32
)
*
NMS_TILE_SIZE
-
num_boxes
boxes
=
tf
.
pad
(
tensor
=
tf
.
cast
(
boxes
,
tf
.
float32
),
paddings
=
[[
0
,
0
],
[
0
,
pad
],
[
0
,
0
]])
scores
=
tf
.
pad
(
tensor
=
tf
.
cast
(
scores
,
tf
.
float32
),
paddings
=
[[
0
,
0
],
[
0
,
pad
]])
num_boxes
+=
pad
def
_loop_cond
(
unused_boxes
,
unused_threshold
,
output_size
,
idx
):
return
tf
.
logical_and
(
tf
.
reduce_min
(
input_tensor
=
output_size
)
<
max_output_size
,
idx
<
num_boxes
//
NMS_TILE_SIZE
)
selected_boxes
,
_
,
output_size
,
_
=
tf
.
while_loop
(
cond
=
_loop_cond
,
body
=
_suppression_loop_body
,
loop_vars
=
[
boxes
,
iou_threshold
,
tf
.
zeros
([
batch_size
],
tf
.
int32
),
tf
.
constant
(
0
)
])
idx
=
num_boxes
-
tf
.
cast
(
tf
.
nn
.
top_k
(
tf
.
cast
(
tf
.
reduce_any
(
input_tensor
=
selected_boxes
>
0
,
axis
=
[
2
]),
tf
.
int32
)
*
tf
.
expand_dims
(
tf
.
range
(
num_boxes
,
0
,
-
1
),
0
),
max_output_size
)[
0
],
tf
.
int32
)
idx
=
tf
.
minimum
(
idx
,
num_boxes
-
1
)
idx
=
tf
.
reshape
(
idx
+
tf
.
reshape
(
tf
.
range
(
batch_size
)
*
num_boxes
,
[
-
1
,
1
]),
[
-
1
])
boxes
=
tf
.
reshape
(
tf
.
gather
(
tf
.
reshape
(
boxes
,
[
-
1
,
4
]),
idx
),
[
batch_size
,
max_output_size
,
4
])
boxes
=
boxes
*
tf
.
cast
(
tf
.
reshape
(
tf
.
range
(
max_output_size
),
[
1
,
-
1
,
1
])
<
tf
.
reshape
(
output_size
,
[
-
1
,
1
,
1
]),
boxes
.
dtype
)
scores
=
tf
.
reshape
(
tf
.
gather
(
tf
.
reshape
(
scores
,
[
-
1
,
1
]),
idx
),
[
batch_size
,
max_output_size
])
scores
=
scores
*
tf
.
cast
(
tf
.
reshape
(
tf
.
range
(
max_output_size
),
[
1
,
-
1
])
<
tf
.
reshape
(
output_size
,
[
-
1
,
1
]),
scores
.
dtype
)
return
scores
,
boxes
def
encode_boxes
(
boxes
,
anchors
,
weights
=
None
):
"""Encode boxes to targets.
Args:
boxes: a tensor whose last dimension is 4 representing the coordinates
of boxes in ymin, xmin, ymax, xmax order.
anchors: a tensor whose shape is the same as `boxes` representing the
coordinates of anchors in ymin, xmin, ymax, xmax order.
weights: None or a list of four float numbers used to scale coordinates.
Returns:
encoded_boxes: a tensor whose shape is the same as `boxes` representing the
encoded box targets.
"""
with
tf
.
name_scope
(
'encode_box'
):
boxes
=
tf
.
cast
(
boxes
,
dtype
=
anchors
.
dtype
)
y_min
,
x_min
,
y_max
,
x_max
=
tf
.
split
(
boxes
,
4
,
axis
=-
1
)
# y_min = boxes[..., 0:1]
# x_min = boxes[..., 1:2]
# y_max = boxes[..., 2:3]
# x_max = boxes[..., 3:4]
box_h
=
y_max
-
y_min
+
1.0
box_w
=
x_max
-
x_min
+
1.0
box_yc
=
y_min
+
0.5
*
box_h
box_xc
=
x_min
+
0.5
*
box_w
anchor_ymin
,
anchor_xmin
,
anchor_ymax
,
anchor_xmax
=
tf
.
split
(
anchors
,
4
,
axis
=-
1
)
# anchor_ymin = anchors[..., 0:1]
# anchor_xmin = anchors[..., 1:2]
# anchor_ymax = anchors[..., 2:3]
# anchor_xmax = anchors[..., 3:4]
anchor_h
=
anchor_ymax
-
anchor_ymin
+
1.0
anchor_w
=
anchor_xmax
-
anchor_xmin
+
1.0
anchor_yc
=
anchor_ymin
+
0.5
*
anchor_h
anchor_xc
=
anchor_xmin
+
0.5
*
anchor_w
encoded_dy
=
(
box_yc
-
anchor_yc
)
/
anchor_h
encoded_dx
=
(
box_xc
-
anchor_xc
)
/
anchor_w
encoded_dh
=
tf
.
math
.
log
(
box_h
/
anchor_h
)
encoded_dw
=
tf
.
math
.
log
(
box_w
/
anchor_w
)
if
weights
:
encoded_dy
*=
weights
[
0
]
encoded_dx
*=
weights
[
1
]
encoded_dh
*=
weights
[
2
]
encoded_dw
*=
weights
[
3
]
encoded_boxes
=
tf
.
concat
([
encoded_dy
,
encoded_dx
,
encoded_dh
,
encoded_dw
],
axis
=-
1
)
return
encoded_boxes
def
decode_boxes
(
encoded_boxes
,
anchors
,
weights
=
None
):
"""Decode boxes.
Args:
encoded_boxes: a tensor whose last dimension is 4 representing the
coordinates of encoded boxes in ymin, xmin, ymax, xmax order.
anchors: a tensor whose shape is the same as `boxes` representing the
coordinates of anchors in ymin, xmin, ymax, xmax order.
weights: None or a list of four float numbers used to scale coordinates.
Returns:
encoded_boxes: a tensor whose shape is the same as `boxes` representing the
decoded box targets.
"""
with
tf
.
name_scope
(
'decode_box'
):
encoded_boxes
=
tf
.
cast
(
encoded_boxes
,
dtype
=
anchors
.
dtype
)
dy
,
dx
,
dh
,
dw
=
tf
.
split
(
encoded_boxes
,
4
,
axis
=-
1
)
# dy = encoded_boxes[..., 0:1]
# dx = encoded_boxes[..., 1:2]
# dh = encoded_boxes[..., 2:3]
# dw = encoded_boxes[..., 3:4]
if
weights
:
dy
/=
weights
[
0
]
dx
/=
weights
[
1
]
dh
/=
weights
[
2
]
dw
/=
weights
[
3
]
dh
=
tf
.
minimum
(
dh
,
BBOX_XFORM_CLIP
)
dw
=
tf
.
minimum
(
dw
,
BBOX_XFORM_CLIP
)
anchor_ymin
,
anchor_xmin
,
anchor_ymax
,
anchor_xmax
=
tf
.
split
(
anchors
,
4
,
axis
=-
1
)
# anchor_ymin = anchors[..., 0:1]
# anchor_xmin = anchors[..., 1:2]
# anchor_ymax = anchors[..., 2:3]
# anchor_xmax = anchors[..., 3:4]
anchor_h
=
anchor_ymax
-
anchor_ymin
+
1.0
anchor_w
=
anchor_xmax
-
anchor_xmin
+
1.0
anchor_yc
=
anchor_ymin
+
0.5
*
anchor_h
anchor_xc
=
anchor_xmin
+
0.5
*
anchor_w
decoded_boxes_yc
=
dy
*
anchor_h
+
anchor_yc
decoded_boxes_xc
=
dx
*
anchor_w
+
anchor_xc
decoded_boxes_h
=
tf
.
exp
(
dh
)
*
anchor_h
decoded_boxes_w
=
tf
.
exp
(
dw
)
*
anchor_w
decoded_boxes_ymin
=
decoded_boxes_yc
-
0.5
*
decoded_boxes_h
decoded_boxes_xmin
=
decoded_boxes_xc
-
0.5
*
decoded_boxes_w
decoded_boxes_ymax
=
decoded_boxes_ymin
+
decoded_boxes_h
-
1.0
decoded_boxes_xmax
=
decoded_boxes_xmin
+
decoded_boxes_w
-
1.0
decoded_boxes
=
tf
.
concat
(
[
decoded_boxes_ymin
,
decoded_boxes_xmin
,
decoded_boxes_ymax
,
decoded_boxes_xmax
],
axis
=-
1
)
return
decoded_boxes
def
clip_boxes
(
boxes
,
height
,
width
):
"""Clip boxes.
Args:
boxes: a tensor whose last dimension is 4 representing the coordinates
of boxes in ymin, xmin, ymax, xmax order.
height: an integer, a scalar or a tensor such as all but the last dimensions
are the same as `boxes`. The last dimension is 1. It represents the height
of the image.
width: an integer, a scalar or a tensor such as all but the last dimensions
are the same as `boxes`. The last dimension is 1. It represents the width
of the image.
Returns:
clipped_boxes: a tensor whose shape is the same as `boxes` representing the
clipped boxes.
"""
with
tf
.
name_scope
(
'clip_box'
):
y_min
,
x_min
,
y_max
,
x_max
=
tf
.
split
(
boxes
,
4
,
axis
=-
1
)
# y_min = boxes[..., 0:1]
# x_min = boxes[..., 1:2]
# y_max = boxes[..., 2:3]
# x_max = boxes[..., 3:4]
height
=
tf
.
cast
(
height
,
dtype
=
boxes
.
dtype
)
width
=
tf
.
cast
(
width
,
dtype
=
boxes
.
dtype
)
clipped_y_min
=
tf
.
maximum
(
tf
.
minimum
(
y_min
,
height
-
1.0
),
0.0
)
clipped_y_max
=
tf
.
maximum
(
tf
.
minimum
(
y_max
,
height
-
1.0
),
0.0
)
clipped_x_min
=
tf
.
maximum
(
tf
.
minimum
(
x_min
,
width
-
1.0
),
0.0
)
clipped_x_max
=
tf
.
maximum
(
tf
.
minimum
(
x_max
,
width
-
1.0
),
0.0
)
clipped_boxes
=
tf
.
concat
([
clipped_y_min
,
clipped_x_min
,
clipped_y_max
,
clipped_x_max
],
axis
=-
1
)
return
clipped_boxes
def
filter_boxes
(
boxes
,
scores
,
min_size
,
height
,
width
,
scale
):
"""Filter out boxes that are too small.
Args:
boxes: a tensor whose last dimension is 4 representing the coordinates
of boxes in ymin, xmin, ymax, xmax order.
scores: a tensor such as all but the last dimensions are the same as
`boxes`. The last dimension is 1. It represents the scores.
min_size: an integer specifying the minimal size.
height: an integer, a scalar or a tensor such as all but the last dimensions
are the same as `boxes`. The last dimension is 1. It represents the height
of the image.
width: an integer, a scalar or a tensor such as all but the last dimensions
are the same as `boxes`. The last dimension is 1. It represents the width
of the image.
scale: an integer, a scalar or a tensor such as all but the last dimensions
are the same as `boxes`. The last dimension is 1. It represents the scale
of the image.
Returns:
filtered_boxes: a tensor whose shape is the same as `boxes` representing the
filtered boxes.
filtered_scores: a tensor whose shape is the same as `scores` representing
the filtered scores.
"""
with
tf
.
name_scope
(
'filter_box'
):
y_min
,
x_min
,
y_max
,
x_max
=
tf
.
split
(
boxes
,
4
,
axis
=-
1
)
# y_min = boxes[..., 0:1]
# x_min = boxes[..., 1:2]
# y_max = boxes[..., 2:3]
# x_max = boxes[..., 3:4]
h
=
y_max
-
y_min
+
1.0
w
=
x_max
-
x_min
+
1.0
yc
=
y_min
+
h
/
2.0
xc
=
x_min
+
w
/
2.0
height
=
tf
.
cast
(
height
,
dtype
=
boxes
.
dtype
)
width
=
tf
.
cast
(
width
,
dtype
=
boxes
.
dtype
)
scale
=
tf
.
cast
(
scale
,
dtype
=
boxes
.
dtype
)
min_size
=
tf
.
cast
(
tf
.
maximum
(
min_size
,
1
),
dtype
=
boxes
.
dtype
)
size_mask
=
tf
.
logical_and
(
tf
.
greater_equal
(
h
,
min_size
*
scale
),
tf
.
greater_equal
(
w
,
min_size
*
scale
)
)
center_mask
=
tf
.
logical_and
(
tf
.
less
(
yc
,
height
),
tf
.
less
(
xc
,
width
))
selected_mask
=
tf
.
logical_and
(
size_mask
,
center_mask
)
filtered_scores
=
tf
.
where
(
selected_mask
,
scores
,
tf
.
zeros_like
(
scores
))
filtered_boxes
=
tf
.
cast
(
selected_mask
,
dtype
=
boxes
.
dtype
)
*
boxes
return
filtered_boxes
,
filtered_scores
def
to_normalized_coordinates
(
boxes
,
height
,
width
):
"""Converted absolute box coordinates to normalized ones.
Args:
boxes: a tensor whose last dimension is 4 representing the coordinates
of boxes in ymin, xmin, ymax, xmax order.
height: an integer, a scalar or a tensor such as all but the last dimensions
are the same as `boxes`. The last dimension is 1. It represents the height
of the image.
width: an integer, a scalar or a tensor such as all but the last dimensions
are the same as `boxes`. The last dimension is 1. It represents the width
of the image.
Returns:
normalized_boxes: a tensor whose shape is the same as `boxes` representing
the boxes in normalized coordinates.
"""
with
tf
.
name_scope
(
'normalize_box'
):
height
=
tf
.
cast
(
height
,
dtype
=
boxes
.
dtype
)
width
=
tf
.
cast
(
width
,
dtype
=
boxes
.
dtype
)
y_min
,
x_min
,
y_max
,
x_max
=
tf
.
split
(
boxes
,
4
,
axis
=-
1
)
y_min
=
y_min
/
height
x_min
=
x_min
/
width
y_max
=
y_max
/
height
x_max
=
x_max
/
width
# y_min = boxes[..., 0:1] / height
# x_min = boxes[..., 1:2] / width
# y_max = boxes[..., 2:3] / height
# x_max = boxes[..., 3:4] / width
normalized_boxes
=
tf
.
concat
([
y_min
,
x_min
,
y_max
,
x_max
],
axis
=-
1
)
return
normalized_boxes
def
to_absolute_coordinates
(
boxes
,
height
,
width
):
"""Converted normalized box coordinates to absolute ones.
Args:
boxes: a tensor whose last dimension is 4 representing the coordinates
of boxes in ymin, xmin, ymax, xmax order.
height: an integer, a scalar or a tensor such as all but the last dimensions
are the same as `boxes`. The last dimension is 1. It represents the height
of the image.
width: an integer, a scalar or a tensor such as all but the last dimensions
are the same as `boxes`. The last dimension is 1. It represents the width
of the image.
Returns:
absolute_boxes: a tensor whose shape is the same as `boxes` representing the
boxes in absolute coordinates.
"""
with
tf
.
name_scope
(
'denormalize_box'
):
height
=
tf
.
cast
(
height
,
dtype
=
boxes
.
dtype
)
width
=
tf
.
cast
(
width
,
dtype
=
boxes
.
dtype
)
y_min
,
x_min
,
y_max
,
x_max
=
tf
.
split
(
boxes
,
4
,
axis
=-
1
)
y_min
=
y_min
*
height
x_min
=
x_min
*
width
y_max
=
y_max
*
height
x_max
=
x_max
*
width
# y_min = boxes[..., 0:1] * height
# x_min = boxes[..., 1:2] * width
# y_max = boxes[..., 2:3] * height
# x_max = boxes[..., 3:4] * width
absolute_boxes
=
tf
.
concat
([
y_min
,
x_min
,
y_max
,
x_max
],
axis
=-
1
)
return
absolute_boxes
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/utils/coco_utils.py
0 → 100644
View file @
c320b6ef
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Util functions to manipulate masks."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
numpy
as
np
import
pycocotools.mask
as
coco_mask
POLYGON_PAD_VALUE
=
-
3
POLYGON_SEPARATOR
=
-
1
MASK_SEPARATOR
=
-
2
def
_np_array_split
(
a
,
v
):
"""Split numpy array by separator value.
Args:
a: 1-D numpy.array.
v: number. Separator value. e.g -1.
Returns:
2-D list of clean separated arrays.
Example:
a = [1, 2, 3, 4, -1, 5, 6, 7, 8]
b = _np_array_split(a, -1)
# Output: b = [[1, 2, 3, 4], [5, 6, 7, 8]]
"""
a
=
np
.
array
(
a
)
arrs
=
np
.
split
(
a
,
np
.
where
(
a
[:]
==
v
)[
0
])
return
[
e
if
(
len
(
e
)
<=
0
or
e
[
0
]
!=
v
)
else
e
[
1
:]
for
e
in
arrs
]
def
_unflat_polygons
(
x
):
"""Unflats/recovers 1-d padded polygons to 3-d polygon list.
Args:
x: numpay.array. shape [num_elements, 1], num_elements = num_obj *
num_vertex + padding.
Returns:
A list of three dimensions: [#obj, #polygon, #vertex]
"""
num_segs
=
_np_array_split
(
x
,
MASK_SEPARATOR
)
polygons
=
[]
for
s
in
num_segs
:
polygons
.
append
(
_np_array_split
(
s
,
POLYGON_SEPARATOR
))
polygons
=
[[
polygon
.
tolist
()
for
polygon
in
obj
]
for
obj
in
polygons
]
# pylint: disable=g-complex-comprehension
return
polygons
def
_denormalize_to_coco_bbox
(
bbox
,
height
,
width
):
"""Denormalize bounding box.
Args:
bbox: numpy.array[float]. Normalized bounding box. Format: ['ymin', 'xmin',
'ymax', 'xmax'].
height: int. image height.
width: int. image width.
Returns:
[x, y, width, height]
"""
y1
,
x1
,
y2
,
x2
=
bbox
y1
*=
height
x1
*=
width
y2
*=
height
x2
*=
width
box_height
=
y2
-
y1
box_width
=
x2
-
x1
return
[
float
(
x1
),
float
(
y1
),
float
(
box_width
),
float
(
box_height
)]
def
_extract_image_info
(
prediction
,
b
):
return
{
'id'
:
int
(
prediction
[
'source_id'
][
b
]),
'width'
:
int
(
prediction
[
'width'
][
b
]),
'height'
:
int
(
prediction
[
'height'
][
b
]),
}
def
_extract_bbox_annotation
(
prediction
,
b
,
obj_i
):
"""Constructs COCO format bounding box annotation."""
height
=
prediction
[
'height'
][
b
]
width
=
prediction
[
'width'
][
b
]
bbox
=
_denormalize_to_coco_bbox
(
prediction
[
'groundtruth_boxes'
][
b
][
obj_i
,
:],
height
,
width
)
if
'groundtruth_area'
in
prediction
:
area
=
float
(
prediction
[
'groundtruth_area'
][
b
][
obj_i
])
else
:
# Using the box area to replace the polygon area. This value will not affect
# real evaluation but may fail the unit test.
area
=
bbox
[
2
]
*
bbox
[
3
]
annotation
=
{
'id'
:
b
*
1000
+
obj_i
,
# place holder of annotation id.
'image_id'
:
int
(
prediction
[
'source_id'
][
b
]),
# source_id,
'category_id'
:
int
(
prediction
[
'groundtruth_classes'
][
b
][
obj_i
]),
'bbox'
:
bbox
,
'iscrowd'
:
int
(
prediction
[
'groundtruth_is_crowd'
][
b
][
obj_i
]),
'area'
:
area
,
'segmentation'
:
[],
}
return
annotation
def
_extract_polygon_info
(
prediction
,
polygons
,
b
,
obj_i
):
"""Constructs 'area' and 'segmentation' fields.
Args:
prediction: dict[str, numpy.array]. Model outputs. The value dimension is
[batch_size, #objects, #features, ...]
polygons: list[list[list]]. Dimensions are [#objects, #polygon, #vertex].
b: batch index.
obj_i: object index.
Returns:
dict[str, numpy.array]. COCO format annotation with 'area' and
'segmentation'.
"""
annotation
=
{}
if
'groundtruth_area'
in
prediction
:
groundtruth_area
=
float
(
prediction
[
'groundtruth_area'
][
b
][
obj_i
])
else
:
height
=
prediction
[
'height'
][
b
]
width
=
prediction
[
'width'
][
b
]
rles
=
coco_mask
.
frPyObjects
(
polygons
[
obj_i
],
height
,
width
)
groundtruth_area
=
coco_mask
.
area
(
rles
)
annotation
[
'area'
]
=
groundtruth_area
annotation
[
'segmentation'
]
=
polygons
[
obj_i
]
# Add dummy polygon to is_crowd instance.
if
not
annotation
[
'segmentation'
][
0
]:
# Adds a dummy polygon in case there is no segmentation.
# Note that this could affect eval number in a very tiny amount since
# for the instance without masks, it creates a fake single pixel mask
# in the center of the box.
height
=
prediction
[
'height'
][
b
]
width
=
prediction
[
'width'
][
b
]
bbox
=
_denormalize_to_coco_bbox
(
prediction
[
'groundtruth_boxes'
][
b
][
obj_i
,
:],
height
,
width
)
xcenter
=
bbox
[
0
]
+
bbox
[
2
]
/
2.0
ycenter
=
bbox
[
1
]
+
bbox
[
3
]
/
2.0
annotation
[
'segmentation'
]
=
[[
xcenter
,
ycenter
,
xcenter
,
ycenter
,
xcenter
,
ycenter
,
xcenter
,
ycenter
]]
return
annotation
def
_extract_categories
(
annotations
):
"""Extract categories from annotations."""
categories
=
{}
for
anno
in
annotations
:
category_id
=
int
(
anno
[
'category_id'
])
categories
[
category_id
]
=
{
'id'
:
category_id
}
return
list
(
categories
.
values
())
def
extract_coco_groundtruth
(
prediction
,
include_mask
=
False
):
"""Extract COCO format groundtruth.
Args:
prediction: dictionary of batch of prediction result. the first dimension
each element is the batch.
include_mask: True for including masks in the output annotations.
Returns:
Tuple of (images, annotations).
images: list[dict].Required keys: 'id', 'width' and 'height'. The values are
image id, width and height.
annotations: list[dict]. Required keys: {'id', 'source_id', 'category_id',
'bbox', 'iscrowd'} when include_mask=False. If include_mask=True, also
required {'area', 'segmentation'}. The 'id' value is the annotation id
and can be any **positive** number (>=1).
Refer to http://cocodataset.org/#format-data for more details.
Raises:
ValueError: If any groundtruth fields is missing.
"""
required_fields
=
[
'source_id'
,
'width'
,
'height'
,
'num_groundtruth_labels'
,
'groundtruth_boxes'
,
'groundtruth_classes'
]
if
include_mask
:
required_fields
+=
[
'groundtruth_polygons'
,
'groundtruth_area'
]
for
key
in
required_fields
:
if
key
not
in
prediction
.
keys
():
raise
ValueError
(
'Missing groundtruth field: "{}" keys: {}'
.
format
(
key
,
prediction
.
keys
()))
images
=
[]
annotations
=
[]
for
b
in
range
(
prediction
[
'source_id'
].
shape
[
0
]):
# Constructs image info.
image
=
_extract_image_info
(
prediction
,
b
)
images
.
append
(
image
)
if
include_mask
:
flatten_padded_polygons
=
prediction
[
'groundtruth_polygons'
][
b
]
flatten_polygons
=
np
.
delete
(
flatten_padded_polygons
,
np
.
where
(
flatten_padded_polygons
[:]
==
POLYGON_PAD_VALUE
)[
0
])
polygons
=
_unflat_polygons
(
flatten_polygons
)
# Constructs annotations.
num_labels
=
prediction
[
'num_groundtruth_labels'
][
b
]
for
obj_i
in
range
(
num_labels
):
annotation
=
_extract_bbox_annotation
(
prediction
,
b
,
obj_i
)
if
include_mask
:
polygon_info
=
_extract_polygon_info
(
prediction
,
polygons
,
b
,
obj_i
)
annotation
.
update
(
polygon_info
)
annotations
.
append
(
annotation
)
return
images
,
annotations
def
create_coco_format_dataset
(
images
,
annotations
,
regenerate_annotation_id
=
True
):
"""Creates COCO format dataset with COCO format images and annotations."""
if
regenerate_annotation_id
:
for
i
in
range
(
len
(
annotations
)):
# WARNING: The annotation id must be positive.
annotations
[
i
][
'id'
]
=
i
+
1
categories
=
_extract_categories
(
annotations
)
dataset
=
{
'images'
:
images
,
'annotations'
:
annotations
,
'categories'
:
categories
,
}
return
dataset
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/utils/decorators.py
0 → 100644
View file @
c320b6ef
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
sys
import
atexit
import
functools
import
inspect
import
signal
import
wrapt
__all__
=
[
"atexit_hook"
]
_executed_exit_fns
=
set
()
_registered_exit_fns
=
set
()
_registered_objects
=
set
()
def
register_atexit_fn
(
fun
=
None
,
signals
=
None
,
logfun
=
lambda
s
:
print
(
s
,
file
=
sys
.
stderr
)):
"""Register a function which will be executed on "normal"
interpreter exit or in case one of the `signals` is received
by this process (differently from atexit.register()).
Also, it makes sure to execute any other function which was
previously registered via signal.signal(). If any, it will be
executed after our own `fun`.
Functions which were already registered or executed via this
function will be ignored.
Note: there's no way to escape SIGKILL, SIGSTOP or os._exit(0)
so don't bother trying.
You can use this either as a function or as a decorator:
@register_atexit_fn
def cleanup():
pass
# ...or
register_atexit_fn(cleanup)
Note about Windows: I tested this some time ago and didn't work
exactly the same as on UNIX, then I didn't care about it
anymore and didn't test since then so may not work on Windows.
Parameters:
- fun: a callable
- signals: a list of signals for which this function will be
executed (default SIGTERM)
- logfun: a logging function which is called when a signal is
received. Default: print to standard error. May be set to
None if no logging is desired.
"""
'''
Source: https://github.com/torvalds/linux/blob/master/include/linux/signal.h
* +--------------------+-----------------+
* | POSIX signal | default action |
* +--------------------+-----------------+
* | SIGHUP | terminate |
* | SIGINT | terminate |
* | SIGQUIT | coredump |
* | SIGILL | coredump |
* | SIGTRAP | coredump |
* | SIGABRT/SIGIOT | coredump |
* | SIGBUS | coredump |
* | SIGFPE | coredump |
* | SIGKILL | terminate(+) |
* | SIGUSR1 | terminate |
* | SIGSEGV | coredump |
* | SIGUSR2 | terminate |
* | SIGPIPE | terminate |
* | SIGALRM | terminate |
* | SIGTERM | terminate |
* | SIGCHLD | ignore |
* | SIGCONT | ignore(*) |
* | SIGSTOP | stop(*)(+) |
* | SIGTSTP | stop(*) |
* | SIGTTIN | stop(*) |
* | SIGTTOU | stop(*) |
* | SIGURG | ignore |
* | SIGXCPU | coredump |
* | SIGXFSZ | coredump |
* | SIGVTALRM | terminate |
* | SIGPROF | terminate |
* | SIGPOLL/SIGIO | terminate |
* | SIGSYS/SIGUNUSED | coredump |
* | SIGSTKFLT | terminate |
* | SIGWINCH | ignore |
* | SIGPWR | terminate |
* | SIGRTMIN-SIGRTMAX | terminate |
* +--------------------+-----------------+
* | non-POSIX signal | default action |
* +--------------------+-----------------+
* | SIGEMT | coredump |
* +--------------------+-----------------+
'''
if
signals
is
None
:
signals
=
[
signal
.
SIGTERM
]
def
stringify_sig
(
signum
):
if
sys
.
version_info
<
(
3
,
5
):
smap
=
dict
([(
getattr
(
signal
,
x
),
x
)
for
x
in
dir
(
signal
)
if
x
.
startswith
(
'SIG'
)])
return
smap
.
get
(
signum
,
signum
)
else
:
return
signum
def
fun_wrapper
():
if
fun
not
in
_executed_exit_fns
:
try
:
fun
()
finally
:
_executed_exit_fns
.
add
(
fun
)
def
signal_wrapper
(
signum
=
None
,
frame
=
None
):
if
signum
is
not
None
:
if
logfun
is
not
None
:
logfun
(
"signal {} received by process with PID {}"
.
format
(
stringify_sig
(
signum
),
os
.
getpid
()))
fun_wrapper
()
# Only return the original signal this process was hit with
# in case fun returns with no errors, otherwise process will
# return with sig 1.
if
signum
is
not
None
:
if
signum
==
signal
.
SIGINT
:
raise
KeyboardInterrupt
# XXX - should we do the same for SIGTERM / SystemExit?
sys
.
exit
(
signum
)
def
register_fun
(
fun
,
signals
):
if
not
callable
(
fun
):
raise
TypeError
(
"{!r} is not callable"
.
format
(
fun
))
set
([
fun
])
# raise exc if obj is not hash-able
signals
=
set
(
signals
)
for
sig
in
signals
:
# Register function for this signal and pop() the previously
# registered one (if any). This can either be a callable,
# SIG_IGN (ignore signal) or SIG_DFL (perform default action
# for signal).
old_handler
=
signal
.
signal
(
sig
,
signal_wrapper
)
if
old_handler
not
in
(
signal
.
SIG_DFL
,
signal
.
SIG_IGN
):
# ...just for extra safety.
if
not
callable
(
old_handler
):
continue
# This is needed otherwise we'll get a KeyboardInterrupt
# strace on interpreter exit, even if the process exited
# with sig 0.
if
(
sig
==
signal
.
SIGINT
and
old_handler
is
signal
.
default_int_handler
):
continue
# There was a function which was already registered for this
# signal. Register it again so it will get executed (after our
# new fun).
if
old_handler
not
in
_registered_exit_fns
:
atexit
.
register
(
old_handler
)
_registered_exit_fns
.
add
(
old_handler
)
# This further registration will be executed in case of clean
# interpreter exit (no signals received).
if
fun
not
in
_registered_exit_fns
or
not
signals
:
atexit
.
register
(
fun_wrapper
)
_registered_exit_fns
.
add
(
fun
)
# This piece of machinery handles 3 usage cases. register_atexit_fn()
# used as:
# - a function
# - a decorator without parentheses
# - a decorator with parentheses
if
fun
is
None
:
@
functools
.
wraps
def
outer
(
fun
):
return
register_fun
(
fun
,
signals
)
return
outer
else
:
register_fun
(
fun
,
signals
)
return
fun
def
atexit_hook
(
*
args
,
**
kwargs
):
@
wrapt
.
decorator
def
wrapper
(
wrapped
,
instance
,
args
,
kwargs
):
if
not
hasattr
(
wrapped
,
"__atexit__"
):
raise
AttributeError
(
"The class `%s` does not have an `__atexit__` method"
%
wrapped
.
__name__
)
def
_func
():
if
instance
is
None
:
if
inspect
.
isclass
(
wrapped
):
# Decorator was applied to a class.
return
wrapped
(
*
args
,
**
kwargs
)
else
:
# Decorator was applied to a function or staticmethod.
return
wrapped
(
*
args
,
**
kwargs
)
else
:
if
inspect
.
isclass
(
instance
):
# Decorator was applied to a classmethod.
return
wrapped
(
*
args
,
**
kwargs
)
else
:
# Decorator was applied to an instancemethod.
return
wrapped
(
*
args
,
**
kwargs
)
_impl
=
_func
()
object_id
=
hex
(
id
(
_impl
))
if
object_id
not
in
_registered_objects
:
register_atexit_fn
(
fun
=
_impl
.
__atexit__
,
signals
=
[
signal
.
SIGTERM
,
signal
.
SIGINT
])
_registered_objects
.
add
(
object_id
)
return
_impl
return
wrapper
(
*
args
,
**
kwargs
)
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/utils/distributed_utils.py
0 → 100644
View file @
c320b6ef
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
sys
__all__
=
[
"MPI_local_rank"
,
"MPI_rank"
,
"MPI_size"
,
"MPI_rank_and_size"
,
"MPI_is_distributed"
]
def
MPI_is_distributed
():
"""Return a boolean whether a distributed training/inference runtime is being used.
:return: bool
"""
if
all
([
var
in
os
.
environ
for
var
in
[
"OMPI_COMM_WORLD_RANK"
,
"OMPI_COMM_WORLD_SIZE"
]]):
return
True
elif
all
([
var
in
os
.
environ
for
var
in
[
"SLURM_PROCID"
,
"SLURM_NTASKS"
]]):
return
True
else
:
return
False
def
MPI_local_rank
():
if
"OMPI_COMM_WORLD_LOCAL_RANK"
in
os
.
environ
:
return
int
(
os
.
environ
.
get
(
"OMPI_COMM_WORLD_LOCAL_RANK"
))
elif
"SLURM_LOCALID"
in
os
.
environ
:
return
int
(
os
.
environ
.
get
(
"SLURM_LOCALID"
))
else
:
return
0
def
MPI_rank
():
return
MPI_rank_and_size
()[
0
]
def
MPI_size
():
return
MPI_rank_and_size
()[
1
]
def
MPI_rank_and_size
():
if
"tensorflow"
in
sys
.
modules
:
return
mpi_env_MPI_rank_and_size
()
else
:
return
0
,
1
# Source: https://github.com/horovod/horovod/blob/c3626e/test/common.py#L25
def
mpi_env_MPI_rank_and_size
():
"""Get MPI rank and size from environment variables and return them as a
tuple of integers.
Most MPI implementations have an `mpirun` or `mpiexec` command that will
run an MPI executable and set up all communication necessary between the
different processors. As part of that set up, they will set environment
variables that contain the rank and size of the MPI_COMM_WORLD
communicator. We can read those environment variables from Python in order
to ensure that `hvd.rank()` and `hvd.size()` return the expected values.
Since MPI is just a standard, not an implementation, implementations
typically choose their own environment variable names. This function tries
to support several different implementation, but really it only needs to
support whatever implementation we want to use for the TensorFlow test
suite.
If this is not running under MPI, then defaults of rank zero and size one
are returned. (This is appropriate because when you call MPI_Init in an
application not started with mpirun, it will create a new independent
communicator with only one process in it.)
Source: https://github.com/horovod/horovod/blob/c3626e/test/common.py#L25
"""
rank_env
=
'PMI_RANK SLURM_PROCID OMPI_COMM_WORLD_RANK'
.
split
()
size_env
=
'PMI_SIZE SLURM_NTASKS OMPI_COMM_WORLD_SIZE'
.
split
()
for
rank_var
,
size_var
in
zip
(
rank_env
,
size_env
):
rank
=
os
.
environ
.
get
(
rank_var
)
size
=
os
.
environ
.
get
(
size_var
)
if
rank
is
not
None
and
size
is
not
None
:
return
int
(
rank
),
int
(
size
)
# Default to rank zero and size one if there are no environment variables
return
0
,
1
\ No newline at end of file
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/utils/lazy_imports.py
0 → 100644
View file @
c320b6ef
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2006-2011, NIPY Developers
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following
# disclaimer in the documentation and/or other materials provided
# with the distribution.
#
# * Neither the name of the NIPY Developers nor the names of any
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Source: https://github.com/nipy/nitime/blob/c8eb314/nitime/lazyimports.py
"""This module provides lazy import functionality to improve the import
performance of nitime. For example, some parts of nitime leverage and import
matplotlib, which is quite a big package, yet most of the nitime code does not
depend on matplotlib. By lazily-loading a module, we defer the overhead of
importing it until the first time it is actually used, thereby speeding up
nitime imports.
A generic :class:`LazyImport` class is implemented which takes the module name
as a parameter, and acts as a proxy for that module, importing it only when
the module is used, but effectively acting as the module in every other way
(including inside IPython with respect to introspection and tab completion)
with the *exception* of reload() - reloading a :class:`LazyImport` raises an
:class:`ImportError`.
Commonly used nitime lazy imports are also defined in :mod:`nitime.lazy`, so
they can be reused throughout nitime.
"""
import
os
import
sys
import
types
class
LazyImport
(
types
.
ModuleType
):
"""
This class takes the module name as a parameter, and acts as a proxy for
that module, importing it only when the module is used, but effectively
acting as the module in every other way (including inside IPython with
respect to introspection and tab completion) with the *exception* of
reload()- reloading a :class:`LazyImport` raises an :class:`ImportError`.
>>> mlab = LazyImport('matplotlib.mlab')
No import happens on the above line, until we do something like call an
``mlab`` method or try to do tab completion or introspection on ``mlab``
in IPython.
>>> mlab
<module 'matplotlib.mlab' will be lazily loaded>
Now the :class:`LazyImport` will do an actual import, and call the dist
function of the imported module.
>>> mlab.dist(1969,2011)
42.0
"""
def
__getattribute__
(
self
,
x
):
# This method will be called only once, since we'll change
# self.__class__ to LoadedLazyImport, and __getattribute__ will point
# to module.__getattribute__
name
=
object
.
__getattribute__
(
self
,
'__name__'
)
__import__
(
name
)
# if name above is 'package.foo.bar', package is returned, the docs
# recommend that in order to get back the full thing, that we import
# and then lookup the full name is sys.modules, see:
# http://docs.python.org/library/functions.html#__import__
module
=
sys
.
modules
[
name
]
# Now that we've done the import, cutout the middleman and make self
# act as the imported module
class
LoadedLazyImport
(
types
.
ModuleType
):
__getattribute__
=
module
.
__getattribute__
__repr__
=
module
.
__repr__
object
.
__setattr__
(
self
,
'__class__'
,
LoadedLazyImport
)
# The next line will make "reload(l)" a silent no-op
return
module
.
__getattribute__
(
x
)
def
__repr__
(
self
):
return
"<module '%s' will be lazily loaded>"
%
object
.
__getattribute__
(
self
,
'__name__'
)
if
'READTHEDOCS'
in
os
.
environ
:
lazy_doc
=
"""
WARNING: To get Sphinx documentation to build we disable
LazyImports, which makes Sphinx incorrectly report this
class as having a base class of object. In reality,
:class:`LazyImport`'s base class is
:class:`types.ModuleType`.
"""
lazy_doc
+=
LazyImport
.
__doc__
class
LazyImport
(
object
):
__doc__
=
lazy_doc
def
__init__
(
self
,
x
):
__import__
(
x
)
self
.
module
=
sys
.
modules
[
x
]
def
__getattr__
(
self
,
x
):
return
self
.
module
.
__getattribute__
(
x
)
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/utils/logging_backend.py
0 → 100644
View file @
c320b6ef
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
sys
import
inspect
import
operator
import
six
import
subprocess
import
time
from
enum
import
Enum
from
mask_rcnn.utils.logging_formatter
import
logging
from
mask_rcnn.utils.decorators
import
atexit_hook
from
mask_rcnn.utils.metaclasses
import
SingletonMetaClass
from
mask_rcnn.utils.meters
import
ACCEPTED_INT_NUMBER_FORMATS
from
mask_rcnn.utils.meters
import
ACCEPTED_FLOAT_NUMBER_FORMATS
import
dllogger
from
dllogger
import
Verbosity
__all__
=
[
"LoggingBackend"
,
"LoggingScope"
,
"DistributedStrategy"
,
"RuntimeMode"
]
class
_BaseEnum
(
Enum
):
@
classmethod
def
__values__
(
cls
):
return
[
getattr
(
cls
,
m
.
name
)
for
m
in
cls
]
class
LoggingScope
(
_BaseEnum
):
ITER
=
'Iteration'
EPOCH
=
'AllReduce'
class
DistributedStrategy
(
_BaseEnum
):
REDUCE_SUM
=
'AllGather'
REDUCE_MEAN
=
'AllReduce'
NONE
=
None
class
RuntimeMode
(
_BaseEnum
):
TRAIN
=
'train'
INFERENCE
=
'inference'
VALIDATION
=
'validation'
TEST
=
'test'
def
validate_runtime_mode
(
requested_mode
):
cls_attributes
=
inspect
.
getmembers
(
RuntimeMode
,
lambda
a
:
not
(
inspect
.
isroutine
(
a
)))
authorized_modes
=
[
a
for
a
in
cls_attributes
if
not
(
a
[
0
].
startswith
(
'__'
)
and
a
[
0
].
endswith
(
'__'
))]
for
_
,
mode
in
authorized_modes
:
if
mode
==
requested_mode
:
return
else
:
raise
ValueError
(
"Unknown requested mode: `%s` - Authorized: %s"
%
(
requested_mode
,
[
name
for
name
,
_
in
authorized_modes
])
)
@
atexit_hook
@
six
.
add_metaclass
(
SingletonMetaClass
)
class
LoggingBackend
(
object
):
SEP_TARGET_LENGTH
=
50
# ================= Logging Methods ================= #
LOGGING_PREFIX
=
""
def
__init__
(
self
):
# super(LoggingBackend, self).__init__()
self
.
runtime_initialized
=
{
"train"
:
False
,
"evaluation"
:
False
}
# ================= Constructor/Destructor Methods ================= #
def
__atexit__
(
self
):
is_success
=
not
(
hasattr
(
sys
,
"last_traceback"
)
and
sys
.
last_traceback
is
not
None
)
print
()
# Visual spacing
if
is_success
:
self
.
log_info
(
"Job finished with status: `SUCCESS`"
)
else
:
logging
.
error
(
"Job finished with an uncaught exception: `FAILURE`"
)
def
log_debug
(
self
,
message
):
logging
.
debug
(
"%s%s"
%
(
self
.
LOGGING_PREFIX
,
message
))
def
log_info
(
self
,
message
):
logging
.
info
(
"%s%s"
%
(
self
.
LOGGING_PREFIX
,
message
))
def
log_warning
(
self
,
message
):
logging
.
warning
(
"%s%s"
%
(
self
.
LOGGING_PREFIX
,
message
))
def
log_error
(
self
,
message
):
logging
.
error
(
"%s%s"
%
(
self
.
LOGGING_PREFIX
,
message
))
def
log_critical
(
self
,
message
):
logging
.
critical
(
"%s%s"
%
(
self
.
LOGGING_PREFIX
,
message
))
# ================= Automated Logging Methods ================= #
@
staticmethod
def
format_metric_value
(
value
):
if
isinstance
(
value
,
ACCEPTED_FLOAT_NUMBER_FORMATS
):
if
value
<
1e-4
or
value
>
1e4
:
print_value
=
"%.4e"
%
value
else
:
print_value
=
"{}"
.
format
(
round
(
value
,
5
))
elif
isinstance
(
value
,
ACCEPTED_INT_NUMBER_FORMATS
):
print_value
=
"%d"
%
value
else
:
print_value
=
value
return
print_value
# ================= Runtime Logging Method ================= #
def
log_runtime
(
self
,
is_train
=
False
):
if
is_train
:
if
not
self
.
runtime_initialized
[
"train"
]:
self
.
runtime_initialized
[
"train"
]
=
True
_message
=
" Start Training "
else
:
_message
=
" Restart Training "
else
:
if
not
self
.
runtime_initialized
[
"evaluation"
]:
self
.
runtime_initialized
[
"evaluation"
]
=
True
_message
=
" Start Evaluation "
else
:
_message
=
" Restart Evaluation "
print
()
# Visual Spacing
self
.
log_info
(
"# ============================================= #"
)
self
.
log_info
(
_message
)
self
.
log_info
(
"# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #"
)
print
()
# Visual Spacing
# ================= Automated Logging Methods ================= #
def
log_git_status
(
self
):
git_metadata
=
dict
()
def
get_cmd_result
(
cmd
):
return
subprocess
.
check_output
(
cmd
,
shell
=
True
).
decode
(
"utf-8"
).
strip
()
try
:
git_metadata
[
"branch_name"
]
=
get_cmd_result
(
"git symbolic-ref -q HEAD | cut -d/ -f3-"
)
# current branch
git_metadata
[
"commit_id"
]
=
get_cmd_result
(
"git rev-parse HEAD"
)
# current commit ID
git_metadata
[
"remote_url"
]
=
get_cmd_result
(
"git remote get-url origin"
)
# git origin url
if
git_metadata
[
"branch_name"
]
==
""
:
del
git_metadata
[
"branch_name"
]
except
subprocess
.
CalledProcessError
:
# Not a git repository
pass
if
git_metadata
is
None
:
raise
ValueError
(
"`git_metadata` value received is `None`"
)
self
.
log_info
(
"===================================== GIT REPOSITORY ====================================="
)
for
key
,
value
in
sorted
(
git_metadata
.
items
(),
key
=
operator
.
itemgetter
(
0
)):
self
.
log_info
(
"%s: %s"
%
(
key
.
replace
(
"_"
,
" "
).
upper
(),
value
))
self
.
log_info
(
"%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\n
"
)
def
log_model_statistics
(
self
,
model_statistics
=
None
):
if
model_statistics
is
None
:
raise
ValueError
(
"`model_statistics` value received is `None`"
)
if
not
isinstance
(
model_statistics
,
dict
):
raise
ValueError
(
"`model_statistics` should be a `dict`"
)
self
.
log_info
(
"==================================== MODEL STATISTICS ===================================="
)
for
key
,
value
in
sorted
(
model_statistics
.
items
(),
key
=
operator
.
itemgetter
(
0
)):
self
.
log_info
(
"%s: %s"
%
(
key
,
value
))
self
.
log_info
(
"%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\n
"
)
def
log_trainable_variables
(
self
,
var_list
=
None
):
if
var_list
is
None
:
raise
ValueError
(
"`var_list` value received is `None`"
)
self
.
log_info
(
"=================================== TRAINABLE VARIABLES =================================="
)
for
idx
,
(
var_name
,
var_shape
)
in
enumerate
(
var_list
):
self
.
log_info
(
"[#{idx:04d}] {name:<60s} => {shape}"
.
format
(
idx
=
idx
+
1
,
name
=
var_name
,
shape
=
str
(
var_shape
))
)
self
.
log_info
(
"%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\n
"
)
# ================= Step Logging Method ================= #
def
log_step
(
self
,
iteration
,
throughput
,
gpu_stats
):
# print() # Visual Spacing
self
.
log_info
(
"timestamp: %s"
%
time
.
time
())
self
.
log_info
(
"iteration: %d"
%
int
(
iteration
))
if
throughput
is
not
None
:
self
.
log_info
(
"throughput: %.1f samples/sec"
%
float
(
throughput
))
else
:
self
.
log_info
(
"throughput: None"
)
def
log_amp_runtime
(
self
,
current_loss_scale
,
steps_non_skipped
,
steps_since_last_scale
):
header_name
=
" AMP Statistics "
reference_len
=
int
((
LoggingBackend
.
SEP_TARGET_LENGTH
-
len
(
header_name
))
/
2
)
if
current_loss_scale
is
not
None
or
steps_since_last_scale
is
not
None
:
self
.
log_info
(
"%s%s%s"
%
(
"="
*
reference_len
,
header_name
,
"="
*
(
LoggingBackend
.
SEP_TARGET_LENGTH
-
len
(
header_name
)
-
reference_len
)
)
)
self
.
log_info
(
"Steps - Non Skipped: %s"
%
steps_non_skipped
)
if
steps_since_last_scale
is
not
None
:
self
.
log_info
(
"Steps - Since last loss scale: %s"
%
steps_since_last_scale
)
if
current_loss_scale
is
not
None
:
self
.
log_info
(
"Loss Scale: %s"
%
current_loss_scale
)
# ================= Metric Logging Methods ================= #
def
log_metrics
(
self
,
metric_data
,
iteration
,
runtime_mode
):
validate_runtime_mode
(
runtime_mode
)
if
not
isinstance
(
metric_data
,
dict
):
raise
ValueError
(
"`metric_data` should be a dictionary. Received: %s"
%
type
(
metric_data
))
if
not
isinstance
(
iteration
,
ACCEPTED_INT_NUMBER_FORMATS
):
raise
ValueError
(
"`iteration` should be an integer. Received: %s"
%
type
(
iteration
))
header_name
=
" Metrics "
reference_len
=
int
((
LoggingBackend
.
SEP_TARGET_LENGTH
-
len
(
header_name
))
/
2
)
self
.
log_info
(
"%s%s%s"
%
(
"="
*
reference_len
,
header_name
,
"="
*
(
LoggingBackend
.
SEP_TARGET_LENGTH
-
len
(
header_name
)
-
reference_len
)
)
)
for
key
,
value
in
sorted
(
metric_data
.
items
(),
key
=
operator
.
itemgetter
(
0
)):
print_value
=
LoggingBackend
.
format_metric_value
(
value
)
self
.
log_info
(
"%s: %s"
%
(
key
,
print_value
))
def
log_final_metrics
(
self
,
metric_data
,
runtime_mode
):
validate_runtime_mode
(
runtime_mode
)
for
key
,
value
in
sorted
(
metric_data
.
items
(),
key
=
operator
.
itemgetter
(
0
)):
print_value
=
LoggingBackend
.
format_metric_value
(
value
)
self
.
log_info
(
"%s: %s"
%
(
key
,
print_value
))
dllogger
.
log
(
step
=
(),
data
=
metric_data
,
verbosity
=
Verbosity
.
DEFAULT
)
# ================= Summary Logging Method ================= #
def
log_summary
(
self
,
is_train
,
total_steps
,
total_processing_time
,
avg_throughput
):
if
is_train
:
_message
=
" Training Performance Summary "
else
:
_message
=
" Evaluation Performance Summary "
print
()
# Visual Spacing
self
.
log_info
(
"# @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ #"
)
self
.
log_info
(
_message
)
self
.
log_info
(
"# @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ #"
)
total_processing_hours
,
rem
=
divmod
(
total_processing_time
,
3600
)
total_processing_minutes
,
total_processing_seconds
=
divmod
(
rem
,
60
)
print
()
# Visual Spacing
total_processing_time
=
total_processing_hours
*
3600
+
int
(
total_processing_minutes
)
*
60
+
int
(
total_processing_seconds
)
dllogger
.
log
(
step
=
(),
data
=
{
"Average_throughput"
:
avg_throughput
,
"Total processed steps"
:
int
(
total_steps
),
"Total_processing_time"
:
total_processing_time
},
verbosity
=
Verbosity
.
DEFAULT
)
self
.
log_info
(
"Average throughput: {throughput:.1f} samples/sec"
.
format
(
throughput
=
avg_throughput
))
self
.
log_info
(
"Total processed steps: {total_steps}"
.
format
(
total_steps
=
total_steps
))
self
.
log_info
(
"Total processing time: {hours}h {minutes:02d}m {seconds:02d}s"
.
format
(
hours
=
total_processing_hours
,
minutes
=
int
(
total_processing_minutes
),
seconds
=
int
(
total_processing_seconds
)
)
)
self
.
log_info
(
"==================== Metrics ===================="
)
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/utils/logging_formatter.py
0 → 100644
View file @
c320b6ef
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
sys
import
inspect
from
contextlib
import
contextmanager
from
six
import
add_metaclass
import
threading
import
logging
as
_logging
import
warnings
from
mask_rcnn.utils.distributed_utils
import
MPI_rank_and_size
from
mask_rcnn.utils.metaclasses
import
SingletonMetaClass
__all__
=
[
"logging"
,
"log_cleaning"
]
MODEL_NAME
=
"MaskRCNN"
class
StdOutFormatter
(
_logging
.
Formatter
):
"""
Log formatter used in Tornado. Key features of this formatter are:
* Color support when logging to a terminal that supports it.
* Timestamps on every log line.
* Robust against str/bytes encoding problems.
"""
DEFAULT_FORMAT
=
'%(color)s[{model_name}] %(levelname)-8s: %(end_color)s%(message)s'
.
format
(
model_name
=
MODEL_NAME
)
DEFAULT_DATE_FORMAT
=
'%Y-%m-%d %H:%M:%S'
def
__init__
(
self
,
fmt
=
None
,
datefmt
=
None
,
style
=
'%'
):
r
"""
:arg bool color: Enables color support.
:arg string fmt: Log message format.
It will be applied to the attributes dict of log records. The
text between ``%(color)s`` and ``%(end_color)s`` will be colored
depending on the level if color support is on.
:arg dict colors: color mappings from logging level to terminal color
code
:arg string datefmt: Datetime format.
Used for formatting ``(asctime)`` placeholder in ``prefix_fmt``.
.. versionchanged:: 3.2
Added ``fmt`` and ``datefmt`` arguments.
"""
if
fmt
is
None
:
fmt
=
self
.
DEFAULT_FORMAT
if
datefmt
is
None
:
datefmt
=
self
.
DEFAULT_DATE_FORMAT
# _logging.Formatter.__init__(self, datefmt=datefmt)
super
(
StdOutFormatter
,
self
).
__init__
(
fmt
=
fmt
,
datefmt
=
datefmt
,
style
=
style
)
self
.
_fmt
=
fmt
self
.
_colors
=
{}
self
.
_normal
=
''
def
format
(
self
,
record
):
try
:
message
=
record
.
getMessage
()
assert
isinstance
(
message
,
str
)
# guaranteed by logging
# Encoding notes: The logging module prefers to work with character
# strings, but only enforces that log messages are instances of
# basestring. In python 2, non-ascii bytestrings will make
# their way through the logging framework until they blow up with
# an unhelpful decoding error (with this formatter it happens
# when we attach the prefix, but there are other opportunities for
# exceptions further along in the framework).
#
# If a byte string makes it this far, convert it to unicode to
# ensure it will make it out to the logs. Use repr() as a fallback
# to ensure that all byte strings can be converted successfully,
# but don't do it by default so we don't add extra quotes to ascii
# bytestrings. This is a bit of a hacky place to do this, but
# it's worth it since the encoding errors that would otherwise
# result are so useless (and tornado is fond of using utf8-encoded
# byte strings wherever possible).
record
.
message
=
self
.
to_unicode
(
message
)
except
Exception
as
e
:
record
.
message
=
"Bad message (%r): %r"
%
(
e
,
record
.
__dict__
)
record
.
asctime
=
self
.
formatTime
(
record
,
self
.
datefmt
)
if
record
.
levelno
in
self
.
_colors
:
record
.
color
=
self
.
_colors
[
record
.
levelno
]
record
.
end_color
=
self
.
_normal
else
:
record
.
color
=
record
.
end_color
=
''
formatted
=
self
.
_fmt
%
record
.
__dict__
if
record
.
exc_info
:
if
not
record
.
exc_text
:
record
.
exc_text
=
self
.
formatException
(
record
.
exc_info
)
if
record
.
exc_text
:
# exc_text contains multiple lines. We need to _safe_unicode
# each line separately so that non-utf8 bytes don't cause
# all the newlines to turn into '\n'.
lines
=
[
formatted
.
rstrip
()]
lines
.
extend
(
self
.
to_unicode
(
ln
)
for
ln
in
record
.
exc_text
.
split
(
'
\n
'
))
formatted
=
'
\n
'
.
join
(
lines
)
return
formatted
.
replace
(
"
\n
"
,
"
\n
"
)
@
staticmethod
def
to_unicode
(
value
):
"""
Converts a string argument to a unicode string.
If the argument is already a unicode string or None, it is returned
unchanged. Otherwise it must be a byte string and is decoded as utf8.
"""
try
:
if
isinstance
(
value
,
(
str
,
type
(
None
))):
return
value
if
not
isinstance
(
value
,
bytes
):
raise
TypeError
(
"Expected bytes, unicode, or None; got %r"
%
type
(
value
))
return
value
.
decode
(
"utf-8"
)
except
UnicodeDecodeError
:
return
repr
(
value
)
@
add_metaclass
(
SingletonMetaClass
)
class
_Logger
(
object
):
# Level 0
NOTSET
=
_logging
.
NOTSET
# Level 10
DEBUG
=
_logging
.
DEBUG
# Level 20
INFO
=
_logging
.
INFO
# Level 30
WARNING
=
_logging
.
WARNING
# Level 40
ERROR
=
_logging
.
ERROR
# Level 50
CRITICAL
=
_logging
.
CRITICAL
_level_names
=
{
0
:
'NOTSET'
,
10
:
'DEBUG'
,
20
:
'INFO'
,
30
:
'WARNING'
,
40
:
'ERROR'
,
50
:
'CRITICAL'
,
}
def
__init__
(
self
,
capture_io
=
True
):
self
.
_logger
=
None
self
.
_logger_lock
=
threading
.
Lock
()
self
.
_handlers
=
dict
()
self
.
old_warnings_showwarning
=
None
if
MPI_rank_and_size
()[
0
]
==
0
:
self
.
_define_logger
()
def
_define_logger
(
self
):
# Use double-checked locking to avoid taking lock unnecessarily.
if
self
.
_logger
is
not
None
:
return
self
.
_logger
with
self
.
_logger_lock
:
try
:
# Scope the TensorFlow logger to not conflict with users' loggers.
self
.
_logger
=
_logging
.
getLogger
(
MODEL_NAME
)
self
.
reset_stream_handler
()
finally
:
self
.
set_verbosity
(
verbosity_level
=
_Logger
.
INFO
)
self
.
_logger
.
propagate
=
False
def
reset_stream_handler
(
self
):
if
self
.
_logger
is
None
:
raise
RuntimeError
(
"Impossible to set handlers if the Logger is not predefined"
)
# ======== Remove Handler if already existing ========
try
:
self
.
_logger
.
removeHandler
(
self
.
_handlers
[
"stream_stdout"
])
except
KeyError
:
pass
try
:
self
.
_logger
.
removeHandler
(
self
.
_handlers
[
"stream_stderr"
])
except
KeyError
:
pass
# ================= Streaming Handler =================
# Add the output handler.
self
.
_handlers
[
"stream_stdout"
]
=
_logging
.
StreamHandler
(
sys
.
stdout
)
self
.
_handlers
[
"stream_stdout"
].
addFilter
(
lambda
record
:
record
.
levelno
<=
_logging
.
INFO
)
self
.
_handlers
[
"stream_stderr"
]
=
_logging
.
StreamHandler
(
sys
.
stderr
)
self
.
_handlers
[
"stream_stderr"
].
addFilter
(
lambda
record
:
record
.
levelno
>
_logging
.
INFO
)
Formatter
=
StdOutFormatter
self
.
_handlers
[
"stream_stdout"
].
setFormatter
(
Formatter
())
self
.
_logger
.
addHandler
(
self
.
_handlers
[
"stream_stdout"
])
try
:
self
.
_handlers
[
"stream_stderr"
].
setFormatter
(
Formatter
())
self
.
_logger
.
addHandler
(
self
.
_handlers
[
"stream_stderr"
])
except
KeyError
:
pass
def
get_verbosity
(
self
):
"""Return how much logging output will be produced."""
if
self
.
_logger
is
not
None
:
return
self
.
_logger
.
getEffectiveLevel
()
def
set_verbosity
(
self
,
verbosity_level
):
"""Sets the threshold for what messages will be logged."""
if
self
.
_logger
is
not
None
:
self
.
_logger
.
setLevel
(
verbosity_level
)
for
handler
in
self
.
_logger
.
handlers
:
handler
.
setLevel
(
verbosity_level
)
@
contextmanager
def
temp_verbosity
(
self
,
verbosity_level
):
"""Sets the a temporary threshold for what messages will be logged."""
if
self
.
_logger
is
not
None
:
old_verbosity
=
self
.
get_verbosity
()
try
:
self
.
set_verbosity
(
verbosity_level
)
yield
finally
:
self
.
set_verbosity
(
old_verbosity
)
else
:
try
:
yield
finally
:
pass
def
captureWarnings
(
self
,
capture
):
"""
If capture is true, redirect all warnings to the logging package.
If capture is False, ensure that warnings are not redirected to logging
but to their original destinations.
"""
if
self
.
_logger
is
not
None
:
if
capture
and
self
.
old_warnings_showwarning
is
None
:
self
.
old_warnings_showwarning
=
warnings
.
showwarning
# Backup Method
warnings
.
showwarning
=
self
.
_showwarning
elif
not
capture
and
self
.
old_warnings_showwarning
is
not
None
:
warnings
.
showwarning
=
self
.
old_warnings_showwarning
# Restore Method
self
.
old_warnings_showwarning
=
None
def
_showwarning
(
self
,
message
,
category
,
filename
,
lineno
,
file
=
None
,
line
=
None
):
"""
Implementation of showwarnings which redirects to logging.
It will call warnings.formatwarning and will log the resulting string
with level logging.WARNING.
"""
s
=
warnings
.
formatwarning
(
message
,
category
,
filename
,
lineno
,
line
)
self
.
warning
(
"%s"
,
s
)
def
debug
(
self
,
msg
,
*
args
,
**
kwargs
):
"""
Log 'msg % args' with severity 'DEBUG'.
To pass exception information, use the keyword argument exc_info with
a true value, e.g.
logger.debug("Houston, we have a %s", "thorny problem", exc_info=1)
"""
if
self
.
_logger
is
not
None
and
self
.
_logger
.
isEnabledFor
(
_Logger
.
DEBUG
):
self
.
_logger
.
_log
(
_Logger
.
DEBUG
,
msg
,
args
,
**
kwargs
)
def
info
(
self
,
msg
,
*
args
,
**
kwargs
):
"""
Log 'msg % args' with severity 'INFO'.
To pass exception information, use the keyword argument exc_info with
a true value, e.g.
logger.info("Houston, we have a %s", "interesting problem", exc_info=1)
"""
if
self
.
_logger
is
not
None
and
self
.
_logger
.
isEnabledFor
(
_Logger
.
INFO
):
self
.
_logger
.
_log
(
_Logger
.
INFO
,
msg
,
args
,
**
kwargs
)
def
warning
(
self
,
msg
,
*
args
,
**
kwargs
):
"""
Log 'msg % args' with severity 'WARNING'.
To pass exception information, use the keyword argument exc_info with
a true value, e.g.
logger.warning("Houston, we have a %s", "bit of a problem", exc_info=1)
"""
if
self
.
_logger
is
not
None
and
self
.
_logger
.
isEnabledFor
(
_Logger
.
WARNING
):
self
.
_logger
.
_log
(
_Logger
.
WARNING
,
msg
,
args
,
**
kwargs
)
def
error
(
self
,
msg
,
*
args
,
**
kwargs
):
"""
Log 'msg % args' with severity 'ERROR'.
To pass exception information, use the keyword argument exc_info with
a true value, e.g.
logger.error("Houston, we have a %s", "major problem", exc_info=1)
"""
if
self
.
_logger
is
not
None
and
self
.
_logger
.
isEnabledFor
(
_Logger
.
ERROR
):
self
.
_logger
.
_log
(
_Logger
.
ERROR
,
msg
,
args
,
**
kwargs
)
def
critical
(
self
,
msg
,
*
args
,
**
kwargs
):
"""
Log 'msg % args' with severity 'CRITICAL'.
To pass exception information, use the keyword argument exc_info with
a true value, e.g.
logger.critical("Houston, we have a %s", "major disaster", exc_info=1)
"""
if
self
.
_logger
is
not
None
and
self
.
_logger
.
isEnabledFor
(
_Logger
.
CRITICAL
):
self
.
_logger
.
_log
(
_Logger
.
CRITICAL
,
msg
,
args
,
**
kwargs
)
def
log_cleaning
(
hide_deprecation_warnings
=
False
):
if
hide_deprecation_warnings
:
warnings
.
simplefilter
(
"ignore"
)
from
tensorflow.python.util
import
deprecation
from
tensorflow.python.util
import
deprecation_wrapper
deprecation
.
_PRINT_DEPRECATION_WARNINGS
=
False
deprecation_wrapper
.
_PER_MODULE_WARNING_LIMIT
=
0
formatter
=
_logging
.
Formatter
(
'[%(levelname)s] %(message)s'
)
from
tensorflow.python.platform
import
tf_logging
tf_logging
.
get_logger
().
propagate
=
False
_logging
.
getLogger
().
propagate
=
False
for
handler
in
_logging
.
getLogger
().
handlers
:
handler
.
setFormatter
(
formatter
)
# Necessary to catch the correct caller
_logging
.
_srcfile
=
os
.
path
.
normcase
(
inspect
.
getfile
(
_Logger
.
__class__
))
logging
=
_Logger
()
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/utils/metaclasses.py
0 → 100644
View file @
c320b6ef
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
__all__
=
[
"SingletonMetaClass"
,
]
class
SingletonMetaClass
(
type
):
_instances
=
{}
def
__call__
(
cls
,
*
args
,
**
kwargs
):
if
cls
not
in
cls
.
_instances
:
cls
.
_instances
[
cls
]
=
super
(
SingletonMetaClass
,
cls
).
__call__
(
*
args
,
**
kwargs
)
return
cls
.
_instances
[
cls
]
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/utils/meters.py
0 → 100644
View file @
c320b6ef
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
abc
import
ABCMeta
from
abc
import
abstractmethod
import
six
import
collections
from
functools
import
lru_cache
import
numpy
as
np
__all__
=
[
"MetricMeter"
,
"StandardMeter"
,
"AverageMeter"
,
"MovingAverageMeter"
,
"MemoryLessMovingAverageMeter"
]
# Supported Numpy DTypes: `np.sctypes`
ACCEPTED_INT_NUMBER_FORMATS
=
(
int
,
np
.
uint8
,
np
.
uint16
,
np
.
uint32
,
np
.
uint64
,
np
.
int
,
np
.
int8
,
np
.
int16
,
np
.
int32
,
np
.
int64
,
)
ACCEPTED_FLOAT_NUMBER_FORMATS
=
(
float
,
np
.
float
,
np
.
float16
,
np
.
float32
,
np
.
float64
,
np
.
float128
,
)
ACCEPTED_STR_NUMBER_FORMATS
=
(
str
,
np
.
str
,
)
ACCEPTED_NUMBER_FORMATS
=
\
ACCEPTED_INT_NUMBER_FORMATS
+
\
ACCEPTED_FLOAT_NUMBER_FORMATS
+
\
ACCEPTED_STR_NUMBER_FORMATS
@
six
.
add_metaclass
(
ABCMeta
)
class
AbstractMeterMixin
(
object
):
@
abstractmethod
def
AUTHORIZED_DTYPES
(
self
):
pass
@
six
.
add_metaclass
(
ABCMeta
)
class
MetricMeter
(
AbstractMeterMixin
):
# Supported Numpy DTypes: `np.sctypes`
AUTHORIZED_DTYPES
=
tuple
(
ACCEPTED_NUMBER_FORMATS
)
@
lru_cache
(
maxsize
=
128
)
def
__init__
(
self
):
self
.
_values
=
np
.
array
([])
def
reset
(
self
):
self
.
_values
=
np
.
array
([])
@
lru_cache
(
maxsize
=
128
)
def
__str__
(
self
):
return
self
.
__class__
.
__name__
def
get_last
(
self
):
try
:
return
self
.
_values
[
-
1
]
except
IndexError
:
raise
ValueError
(
"Impossible to get the last value. No value has been recorded yet"
)
def
record
(
self
,
val
):
if
not
isinstance
(
val
,
MetricMeter
.
AUTHORIZED_DTYPES
):
raise
TypeError
(
"Unsupported datatype received: %s"
%
str
(
type
(
val
)))
if
np
.
isnan
(
val
)
or
np
.
isinf
(
val
):
raise
ValueError
(
"invalid value received: %s"
%
str
(
val
))
self
.
_values
=
np
.
append
(
self
.
_values
,
val
)
@
abstractmethod
def
read
(
self
):
raise
NotImplementedError
()
class
StandardMeter
(
MetricMeter
):
def
read
(
self
):
return
self
.
get_last
()
class
AverageMeter
(
MetricMeter
):
def
read
(
self
):
if
len
(
self
.
_values
):
return
np
.
mean
(
self
.
_values
)
else
:
raise
ValueError
(
"NaN Result, Impossible to compute the average of an empty list"
)
class
MovingAverageMeter
(
MetricMeter
):
def
__init__
(
self
,
window_size
):
super
(
MovingAverageMeter
,
self
).
__init__
()
if
not
isinstance
(
window_size
,
int
):
raise
ValueError
(
"`window_size` must be an integer"
)
if
window_size
<
1
:
raise
ValueError
(
"`window_size` must be superior or equal to 1"
)
self
.
_window_size
=
window_size
@
lru_cache
(
maxsize
=
128
)
def
__str__
(
self
):
return
"%s(window_size=%d)"
%
(
super
(
MovingAverageMeter
,
self
).
__str__
(),
self
.
_window_size
)
def
read
(
self
):
if
len
(
self
.
_values
):
return
np
.
mean
(
self
.
_values
[
-
self
.
_window_size
:])
else
:
raise
ValueError
(
"NaN Result, Impossible to compute the moving average of an empty list"
)
class
MemoryLessMovingAverageMeter
(
MetricMeter
):
def
__init__
(
self
,
window_size
):
super
(
MemoryLessMovingAverageMeter
,
self
).
__init__
()
self
.
_values
=
collections
.
deque
(
maxlen
=
window_size
)
if
not
isinstance
(
window_size
,
int
):
raise
ValueError
(
"`window_size` must be an integer"
)
if
window_size
<
1
:
raise
ValueError
(
"`window_size` must be superior or equal to 1"
)
self
.
_window_size
=
window_size
def
reset
(
self
):
self
.
_values
=
collections
.
deque
(
maxlen
=
self
.
_window_size
)
@
lru_cache
(
maxsize
=
128
)
def
__str__
(
self
):
return
"%s(window_size=%d)"
%
(
super
(
MemoryLessMovingAverageMeter
,
self
).
__str__
(),
self
.
_window_size
)
def
read
(
self
):
if
len
(
self
.
_values
):
return
np
.
mean
(
self
.
_values
)
else
:
raise
ValueError
(
"NaN Result, Impossible to compute the moving average of an empty list"
)
\ No newline at end of file
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/utils/metric_tracking.py
0 → 100644
View file @
c320b6ef
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
weakref
from
mask_rcnn.utils.logging_backend
import
DistributedStrategy
from
mask_rcnn.utils.logging_backend
import
LoggingScope
from
mask_rcnn.utils.logging_formatter
import
logging
from
mask_rcnn.utils
import
meters
__all__
=
[
"TF_METRICS"
,
"KERAS_MODELS"
,
"KERAS_OPTIMIZERS"
,
"register_metric"
,
"clear_registered_metrics"
]
class
WeakRefList
(
object
):
def
__init__
(
self
):
self
.
_items
=
list
()
def
_clean_iternal_list
(
self
):
self
.
_items
=
[
s
for
s
in
self
.
_items
if
s
()
is
not
None
]
def
__iter__
(
self
):
self
.
_clean_iternal_list
()
for
obj
in
self
.
_items
:
if
obj
()
is
None
:
continue
yield
obj
()
def
__len__
(
self
):
self
.
_clean_iternal_list
()
return
len
(
self
.
_items
)
def
clear
(
self
):
self
.
_items
.
clear
()
def
append
(
self
,
new_item
):
self
.
_items
.
append
(
weakref
.
ref
(
new_item
))
self
.
_clean_iternal_list
()
TF_METRICS
=
dict
()
KERAS_MODELS
=
WeakRefList
()
KERAS_OPTIMIZERS
=
WeakRefList
()
def
register_metric
(
name
,
tensor
,
aggregator
=
meters
.
StandardMeter
(),
metric_scope
=
LoggingScope
.
ITER
,
distributed_strategy
=
DistributedStrategy
.
NONE
):
if
name
in
TF_METRICS
.
keys
():
raise
ValueError
(
"A metric with the name `%s` has already been registered"
%
name
)
if
not
issubclass
(
aggregator
.
__class__
,
meters
.
AbstractMeterMixin
):
raise
ValueError
(
"Unknown `aggregator` received: %s"
%
aggregator
.
__class__
.
__name__
)
if
metric_scope
not
in
LoggingScope
.
__values__
():
raise
ValueError
(
"Unknown `metric_scope` received: %s, authorized: %s"
%
(
metric_scope
,
LoggingScope
.
__values__
())
)
if
distributed_strategy
not
in
DistributedStrategy
.
__values__
():
raise
ValueError
(
"Unknown `distributed_strategy` received: %s, authorized: %s"
%
(
distributed_strategy
,
DistributedStrategy
.
__values__
())
)
TF_METRICS
[
name
]
=
{
"tensor"
:
tensor
,
"aggregator"
:
aggregator
,
"distributed_strategy"
:
distributed_strategy
,
"scope"
:
metric_scope
,
}
logging
.
debug
(
"New Metric Registered: `{metric_name}`, Aggregator: {aggregator}, "
"Scope: {scope}, Distributed Strategy: {distributed_strategy}"
.
format
(
metric_name
=
name
,
aggregator
=
str
(
aggregator
),
distributed_strategy
=
distributed_strategy
,
scope
=
metric_scope
)
)
def
clear_registered_metrics
():
TF_METRICS
.
clear
()
logging
.
debug
(
"All registered metrics have been cleared"
)
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn_main.py
0 → 100644
View file @
c320b6ef
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Training script for Mask-RCNN."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
os
os
.
environ
[
'TF_CPP_MIN_LOG_LEVEL'
]
=
'3'
# or any {'0', '1', '2'}
os
.
environ
[
"TF_CPP_VMODULE"
]
=
'non_max_suppression_op=0,generate_box_proposals_op=0,executor=0'
# os.environ["TF_XLA_FLAGS"] = 'tf_xla_print_cluster_outputs=1'
from
absl
import
app
import
tensorflow
as
tf
from
tensorflow.python.framework.ops
import
disable_eager_execution
from
mask_rcnn.utils.logging_formatter
import
logging
from
mask_rcnn.utils.distributed_utils
import
MPI_is_distributed
from
mask_rcnn
import
dataloader
from
mask_rcnn
import
distributed_executer
from
mask_rcnn
import
mask_rcnn_model
from
mask_rcnn.hyperparameters
import
mask_rcnn_params
from
mask_rcnn.hyperparameters
import
params_io
from
mask_rcnn.hyperparameters.cmdline_utils
import
define_hparams_flags
from
mask_rcnn.utils.logging_formatter
import
log_cleaning
import
dllogger
FLAGS
=
define_hparams_flags
()
def
run_executer
(
runtime_config
,
train_input_fn
=
None
,
eval_input_fn
=
None
):
"""Runs Mask RCNN model on distribution strategy defined by the user."""
if
runtime_config
.
use_tf_distributed
:
executer
=
distributed_executer
.
TFDistributedExecuter
(
runtime_config
,
mask_rcnn_model
.
mask_rcnn_model_fn
)
else
:
executer
=
distributed_executer
.
EstimatorExecuter
(
runtime_config
,
mask_rcnn_model
.
mask_rcnn_model_fn
)
if
runtime_config
.
mode
==
'train'
:
executer
.
train
(
train_input_fn
=
train_input_fn
,
run_eval_after_train
=
FLAGS
.
eval_after_training
,
eval_input_fn
=
eval_input_fn
)
elif
runtime_config
.
mode
==
'eval'
:
executer
.
eval
(
eval_input_fn
=
eval_input_fn
)
elif
runtime_config
.
mode
==
'train_and_eval'
:
executer
.
train_and_eval
(
train_input_fn
=
train_input_fn
,
eval_input_fn
=
eval_input_fn
)
else
:
raise
ValueError
(
'Mode must be one of `train`, `eval`, or `train_and_eval`'
)
def
main
(
argv
):
del
argv
# Unused.
# ============================ Configure parameters ============================ #
RUN_CONFIG
=
mask_rcnn_params
.
default_config
()
temp_config
=
FLAGS
.
flag_values_dict
()
temp_config
[
'learning_rate_decay_levels'
]
=
[
float
(
decay
)
for
decay
in
temp_config
[
'learning_rate_decay_levels'
]]
temp_config
[
'learning_rate_levels'
]
=
[
decay
*
temp_config
[
'init_learning_rate'
]
for
decay
in
temp_config
[
'learning_rate_decay_levels'
]
]
temp_config
[
'learning_rate_steps'
]
=
[
int
(
step
)
for
step
in
temp_config
[
'learning_rate_steps'
]]
RUN_CONFIG
=
params_io
.
override_hparams
(
RUN_CONFIG
,
temp_config
)
# ============================ Configure parameters ============================ #
if
RUN_CONFIG
.
use_tf_distributed
and
MPI_is_distributed
():
raise
RuntimeError
(
"Incompatible Runtime. Impossible to use `--use_tf_distributed` with MPIRun Horovod"
)
if
RUN_CONFIG
.
mode
in
(
'train'
,
'train_and_eval'
)
and
not
RUN_CONFIG
.
eval_samples
:
raise
RuntimeError
(
'You must specify `training_file_pattern` for training.'
)
if
RUN_CONFIG
.
mode
in
(
'eval'
,
'train_and_eval'
):
if
not
RUN_CONFIG
.
validation_file_pattern
:
raise
RuntimeError
(
'You must specify `validation_file_pattern` for evaluation.'
)
if
RUN_CONFIG
.
val_json_file
==
""
and
not
RUN_CONFIG
.
include_groundtruth_in_features
:
raise
RuntimeError
(
'You must specify `val_json_file` or include_groundtruth_in_features=True for evaluation.'
)
if
not
RUN_CONFIG
.
include_groundtruth_in_features
and
not
os
.
path
.
isfile
(
RUN_CONFIG
.
val_json_file
):
raise
FileNotFoundError
(
"Validation JSON File not found: %s"
%
RUN_CONFIG
.
val_json_file
)
dllogger
.
init
(
backends
=
[
dllogger
.
JSONStreamBackend
(
verbosity
=
dllogger
.
Verbosity
.
VERBOSE
,
filename
=
RUN_CONFIG
.
log_path
)])
if
RUN_CONFIG
.
mode
in
(
'train'
,
'train_and_eval'
):
train_input_fn
=
dataloader
.
InputReader
(
file_pattern
=
RUN_CONFIG
.
training_file_pattern
,
mode
=
tf
.
estimator
.
ModeKeys
.
TRAIN
,
num_examples
=
None
,
use_fake_data
=
RUN_CONFIG
.
use_fake_data
,
use_instance_mask
=
RUN_CONFIG
.
include_mask
,
seed
=
RUN_CONFIG
.
seed
)
else
:
train_input_fn
=
None
if
RUN_CONFIG
.
mode
in
(
'eval'
,
'train_and_eval'
or
(
RUN_CONFIG
.
mode
==
'train'
and
RUN_CONFIG
.
eval_after_training
)):
eval_input_fn
=
dataloader
.
InputReader
(
file_pattern
=
RUN_CONFIG
.
validation_file_pattern
,
mode
=
tf
.
estimator
.
ModeKeys
.
PREDICT
,
num_examples
=
RUN_CONFIG
.
eval_samples
,
use_fake_data
=
False
,
use_instance_mask
=
RUN_CONFIG
.
include_mask
,
seed
=
RUN_CONFIG
.
seed
)
else
:
eval_input_fn
=
None
run_executer
(
RUN_CONFIG
,
train_input_fn
,
eval_input_fn
)
if
__name__
==
'__main__'
:
logging
.
set_verbosity
(
logging
.
INFO
)
disable_eager_execution
()
logging
.
set_verbosity
(
logging
.
DEBUG
)
tf
.
autograph
.
set_verbosity
(
0
)
log_cleaning
(
hide_deprecation_warnings
=
True
)
app
.
run
(
main
)
TensorFlow2x/ComputeVision/Detection/MaskRCNN/scripts/benchmark_inference.py
0 → 100644
View file @
c320b6ef
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" Scripts that simplifies running evaluation benchmark """
import
argparse
import
os
import
shutil
import
subprocess
def
main
():
# CLI flags
parser
=
argparse
.
ArgumentParser
(
description
=
"MaskRCNN evaluation benchmark"
)
parser
.
add_argument
(
'--batch_size'
,
type
=
int
,
required
=
True
)
parser
.
add_argument
(
'--amp'
,
action
=
'store_true'
)
parser
.
add_argument
(
'--data_dir'
,
type
=
str
,
default
=
'/data'
)
parser
.
add_argument
(
'--model_dir'
,
type
=
str
,
default
=
'/tmp/model'
)
parser
.
add_argument
(
'--weights_dir'
,
type
=
str
,
default
=
'/model'
)
flags
=
parser
.
parse_args
()
main_path
=
os
.
path
.
abspath
(
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
'../mask_rcnn_main.py'
))
# build command
cmd
=
(
f
'python
{
main_path
}
'
f
' --mode eval'
f
' --model_dir "
{
flags
.
model_dir
}
"'
f
' --checkpoint "
{
os
.
path
.
join
(
flags
.
weights_dir
,
"resnet/resnet-nhwc-2018-02-07/model.ckpt-112603"
)
}
"'
f
' --validation_file_pattern "
{
os
.
path
.
join
(
flags
.
data_dir
,
"val*.tfrecord"
)
}
"'
f
' --val_json_file "
{
os
.
path
.
join
(
flags
.
data_dir
,
"annotations/instances_val2017.json"
)
}
"'
f
' --num_steps_per_eval 200'
f
' --eval_samples 1200'
f
' --use_batched_nms'
f
' --nouse_custom_box_proposals_op'
f
' --xla'
f
' --eval_batch_size
{
flags
.
batch_size
}
'
f
'
{
"--amp"
if
flags
.
amp
else
""
}
'
)
# print command
line
=
'-'
*
shutil
.
get_terminal_size
()[
0
]
print
(
line
,
cmd
,
line
,
sep
=
'
\n
'
)
# run model
subprocess
.
call
(
cmd
,
shell
=
True
)
if
__name__
==
'__main__'
:
main
()
TensorFlow2x/ComputeVision/Detection/MaskRCNN/scripts/benchmark_training.py
0 → 100644
View file @
c320b6ef
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" Scripts that simplifies running training benchmark """
import
argparse
import
os
import
shutil
import
subprocess
def
main
():
# CLI flags
parser
=
argparse
.
ArgumentParser
(
description
=
"MaskRCNN train benchmark"
)
parser
.
add_argument
(
'--gpus'
,
type
=
int
,
required
=
True
)
parser
.
add_argument
(
'--batch_size'
,
type
=
int
,
required
=
True
)
parser
.
add_argument
(
'--amp'
,
action
=
'store_true'
)
parser
.
add_argument
(
'--data_dir'
,
type
=
str
,
default
=
'/data'
)
parser
.
add_argument
(
'--model_dir'
,
type
=
str
,
default
=
'/tmp/model'
)
parser
.
add_argument
(
'--weights_dir'
,
type
=
str
,
default
=
'/model'
)
flags
=
parser
.
parse_args
()
main_path
=
os
.
path
.
abspath
(
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
'../mask_rcnn_main.py'
))
# build command
cmd
=
(
f
'horovodrun -np
{
flags
.
gpus
}
'
f
'python
{
main_path
}
'
f
' --mode train'
f
' --model_dir "
{
flags
.
model_dir
}
"'
f
' --checkpoint "
{
os
.
path
.
join
(
flags
.
weights_dir
,
"resnet/resnet-nhwc-2018-02-07/model.ckpt-112603"
)
}
"'
f
' --training_file_pattern "
{
os
.
path
.
join
(
flags
.
data_dir
,
"train*.tfrecord"
)
}
"'
f
' --init_learning_rate 0.04'
f
' --total_steps 200'
f
' --use_batched_nms'
f
' --noeval_after_training'
f
' --nouse_custom_box_proposals_op'
f
' --xla'
f
' --train_batch_size
{
flags
.
batch_size
}
'
f
'
{
"--amp"
if
flags
.
amp
else
""
}
'
)
# print command
line
=
'-'
*
shutil
.
get_terminal_size
()[
0
]
print
(
line
,
cmd
,
line
,
sep
=
'
\n
'
)
# run model
subprocess
.
call
(
cmd
,
shell
=
True
)
if
__name__
==
'__main__'
:
main
()
TensorFlow2x/ComputeVision/Detection/MaskRCNN/scripts/docker/build_tf1.sh
0 → 100644
View file @
c320b6ef
#!/bin/bash
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
CONTAINER_TF1x_BASE
=
"nvcr.io/nvidia/tensorflow"
CONTAINER_TF1x_TAG
=
"20.06-tf1-py3"
# ======================== Refresh base image ======================== #
docker pull
"
${
CONTAINER_TF1x_BASE
}
:
${
CONTAINER_TF1x_TAG
}
"
# ========================== Build container ========================= #
echo
-e
"
\n\n
Building NVIDIA TF 1.x Container
\n\n
"
sleep
1
docker build
-t
joc_tensorflow_maskrcnn:tf1.x-py3
\
--build-arg
BASE_CONTAINER
=
"
${
CONTAINER_TF1x_BASE
}
"
\
--build-arg
IMG_TAG
=
"
${
CONTAINER_TF1x_TAG
}
"
\
--build-arg
FROM_IMAGE_NAME
=
"nvcr.io/nvidia/tensorflow:20.06-tf1-py3"
\
.
Prev
1
2
3
4
5
6
7
8
…
10
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment