Commit e2385734 authored by Kaushik Shivakumar's avatar Kaushik Shivakumar
Browse files

Merge branch 'master' of https://github.com/tensorflow/models into latest

parents 30c14aa9 1bfb577d
# SSD with Resnet 152 v1 FPN feature extractor, shared box predictor and focal
# loss (a.k.a Retinanet).
# See Lin et al, https://arxiv.org/abs/1708.02002
# Trained on COCO, initialized from Imagenet classification checkpoint
# Train on TPU-8
#
# Achieves 39.6 mAP on COCO17 Val
model {
ssd {
inplace_batchnorm_update: true
freeze_batchnorm: false
num_classes: 90
box_coder {
faster_rcnn_box_coder {
y_scale: 10.0
x_scale: 10.0
height_scale: 5.0
width_scale: 5.0
}
}
matcher {
argmax_matcher {
matched_threshold: 0.5
unmatched_threshold: 0.5
ignore_thresholds: false
negatives_lower_than_unmatched: true
force_match_for_each_row: true
use_matmul_gather: true
}
}
similarity_calculator {
iou_similarity {
}
}
encode_background_as_zeros: true
anchor_generator {
multiscale_anchor_generator {
min_level: 3
max_level: 7
anchor_scale: 4.0
aspect_ratios: [1.0, 2.0, 0.5]
scales_per_octave: 2
}
}
image_resizer {
fixed_shape_resizer {
height: 1024
width: 1024
}
}
box_predictor {
weight_shared_convolutional_box_predictor {
depth: 256
class_prediction_bias_init: -4.6
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.0004
}
}
initializer {
random_normal_initializer {
stddev: 0.01
mean: 0.0
}
}
batch_norm {
scale: true,
decay: 0.997,
epsilon: 0.001,
}
}
num_layers_before_predictor: 4
kernel_size: 3
}
}
feature_extractor {
type: 'ssd_resnet152_v1_fpn_keras'
fpn {
min_level: 3
max_level: 7
}
min_depth: 16
depth_multiplier: 1.0
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.0004
}
}
initializer {
truncated_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
batch_norm {
scale: true,
decay: 0.997,
epsilon: 0.001,
}
}
override_base_feature_extractor_hyperparams: true
}
loss {
classification_loss {
weighted_sigmoid_focal {
alpha: 0.25
gamma: 2.0
}
}
localization_loss {
weighted_smooth_l1 {
}
}
classification_weight: 1.0
localization_weight: 1.0
}
normalize_loss_by_num_matches: true
normalize_loc_loss_by_codesize: true
post_processing {
batch_non_max_suppression {
score_threshold: 1e-8
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 100
}
score_converter: SIGMOID
}
}
}
train_config: {
fine_tune_checkpoint_version: V2
fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/resnet152.ckpt-1"
fine_tune_checkpoint_type: "classification"
batch_size: 64
sync_replicas: true
startup_delay_steps: 0
replicas_to_aggregate: 8
use_bfloat16: true
num_steps: 100000
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
random_crop_image {
min_object_covered: 0.0
min_aspect_ratio: 0.75
max_aspect_ratio: 3.0
min_area: 0.75
max_area: 1.0
overlap_thresh: 0.0
}
}
optimizer {
momentum_optimizer: {
learning_rate: {
cosine_decay_learning_rate {
learning_rate_base: .04
total_steps: 100000
warmup_learning_rate: .013333
warmup_steps: 2000
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
max_number_of_boxes: 100
unpad_groundtruth_tensors: false
}
train_input_reader: {
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
}
}
eval_config: {
metrics_set: "coco_detection_metrics"
use_moving_averages: false
}
eval_input_reader: {
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
shuffle: false
num_epochs: 1
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
}
}
# SSD with Resnet 152 v1 FPN feature extractor, shared box predictor and focal
# loss (a.k.a Retinanet).
# See Lin et al, https://arxiv.org/abs/1708.02002
# Trained on COCO, initialized from Imagenet classification checkpoint
# Train on TPU-8
#
# Achieves 35.6 mAP on COCO17 Val
model {
ssd {
inplace_batchnorm_update: true
freeze_batchnorm: false
num_classes: 90
box_coder {
faster_rcnn_box_coder {
y_scale: 10.0
x_scale: 10.0
height_scale: 5.0
width_scale: 5.0
}
}
matcher {
argmax_matcher {
matched_threshold: 0.5
unmatched_threshold: 0.5
ignore_thresholds: false
negatives_lower_than_unmatched: true
force_match_for_each_row: true
use_matmul_gather: true
}
}
similarity_calculator {
iou_similarity {
}
}
encode_background_as_zeros: true
anchor_generator {
multiscale_anchor_generator {
min_level: 3
max_level: 7
anchor_scale: 4.0
aspect_ratios: [1.0, 2.0, 0.5]
scales_per_octave: 2
}
}
image_resizer {
fixed_shape_resizer {
height: 640
width: 640
}
}
box_predictor {
weight_shared_convolutional_box_predictor {
depth: 256
class_prediction_bias_init: -4.6
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.0004
}
}
initializer {
random_normal_initializer {
stddev: 0.01
mean: 0.0
}
}
batch_norm {
scale: true,
decay: 0.997,
epsilon: 0.001,
}
}
num_layers_before_predictor: 4
kernel_size: 3
}
}
feature_extractor {
type: 'ssd_resnet152_v1_fpn_keras'
fpn {
min_level: 3
max_level: 7
}
min_depth: 16
depth_multiplier: 1.0
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.0004
}
}
initializer {
truncated_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
batch_norm {
scale: true,
decay: 0.997,
epsilon: 0.001,
}
}
override_base_feature_extractor_hyperparams: true
}
loss {
classification_loss {
weighted_sigmoid_focal {
alpha: 0.25
gamma: 2.0
}
}
localization_loss {
weighted_smooth_l1 {
}
}
classification_weight: 1.0
localization_weight: 1.0
}
normalize_loss_by_num_matches: true
normalize_loc_loss_by_codesize: true
post_processing {
batch_non_max_suppression {
score_threshold: 1e-8
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 100
}
score_converter: SIGMOID
}
}
}
train_config: {
fine_tune_checkpoint_version: V2
fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/resnet152.ckpt-1"
fine_tune_checkpoint_type: "classification"
batch_size: 64
sync_replicas: true
startup_delay_steps: 0
replicas_to_aggregate: 8
use_bfloat16: true
num_steps: 25000
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
random_crop_image {
min_object_covered: 0.0
min_aspect_ratio: 0.75
max_aspect_ratio: 3.0
min_area: 0.75
max_area: 1.0
overlap_thresh: 0.0
}
}
optimizer {
momentum_optimizer: {
learning_rate: {
cosine_decay_learning_rate {
learning_rate_base: .04
total_steps: 25000
warmup_learning_rate: .013333
warmup_steps: 2000
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
max_number_of_boxes: 100
unpad_groundtruth_tensors: false
}
train_input_reader: {
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
}
}
eval_config: {
metrics_set: "coco_detection_metrics"
use_moving_averages: false
}
eval_input_reader: {
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
shuffle: false
num_epochs: 1
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
}
}
# SSD with Resnet 50 v1 FPN feature extractor, shared box predictor and focal
# loss (a.k.a Retinanet).
# See Lin et al, https://arxiv.org/abs/1708.02002
# Trained on COCO, initialized from Imagenet classification checkpoint
# Train on TPU-8
#
# Achieves 38.3 mAP on COCO17 Val
model {
ssd {
inplace_batchnorm_update: true
freeze_batchnorm: false
num_classes: 90
box_coder {
faster_rcnn_box_coder {
y_scale: 10.0
x_scale: 10.0
height_scale: 5.0
width_scale: 5.0
}
}
matcher {
argmax_matcher {
matched_threshold: 0.5
unmatched_threshold: 0.5
ignore_thresholds: false
negatives_lower_than_unmatched: true
force_match_for_each_row: true
use_matmul_gather: true
}
}
similarity_calculator {
iou_similarity {
}
}
encode_background_as_zeros: true
anchor_generator {
multiscale_anchor_generator {
min_level: 3
max_level: 7
anchor_scale: 4.0
aspect_ratios: [1.0, 2.0, 0.5]
scales_per_octave: 2
}
}
image_resizer {
fixed_shape_resizer {
height: 1024
width: 1024
}
}
box_predictor {
weight_shared_convolutional_box_predictor {
depth: 256
class_prediction_bias_init: -4.6
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.0004
}
}
initializer {
random_normal_initializer {
stddev: 0.01
mean: 0.0
}
}
batch_norm {
scale: true,
decay: 0.997,
epsilon: 0.001,
}
}
num_layers_before_predictor: 4
kernel_size: 3
}
}
feature_extractor {
type: 'ssd_resnet50_v1_fpn_keras'
fpn {
min_level: 3
max_level: 7
}
min_depth: 16
depth_multiplier: 1.0
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.0004
}
}
initializer {
truncated_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
batch_norm {
scale: true,
decay: 0.997,
epsilon: 0.001,
}
}
override_base_feature_extractor_hyperparams: true
}
loss {
classification_loss {
weighted_sigmoid_focal {
alpha: 0.25
gamma: 2.0
}
}
localization_loss {
weighted_smooth_l1 {
}
}
classification_weight: 1.0
localization_weight: 1.0
}
normalize_loss_by_num_matches: true
normalize_loc_loss_by_codesize: true
post_processing {
batch_non_max_suppression {
score_threshold: 1e-8
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 100
}
score_converter: SIGMOID
}
}
}
train_config: {
fine_tune_checkpoint_version: V2
fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/resnet50.ckpt-1"
fine_tune_checkpoint_type: "classification"
batch_size: 64
sync_replicas: true
startup_delay_steps: 0
replicas_to_aggregate: 8
use_bfloat16: true
num_steps: 100000
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
random_crop_image {
min_object_covered: 0.0
min_aspect_ratio: 0.75
max_aspect_ratio: 3.0
min_area: 0.75
max_area: 1.0
overlap_thresh: 0.0
}
}
optimizer {
momentum_optimizer: {
learning_rate: {
cosine_decay_learning_rate {
learning_rate_base: .04
total_steps: 100000
warmup_learning_rate: .013333
warmup_steps: 2000
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
max_number_of_boxes: 100
unpad_groundtruth_tensors: false
}
train_input_reader: {
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
}
}
eval_config: {
metrics_set: "coco_detection_metrics"
use_moving_averages: false
}
eval_input_reader: {
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
shuffle: false
num_epochs: 1
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
}
}
# SSD with Resnet 50 v1 FPN feature extractor, shared box predictor and focal
# loss (a.k.a Retinanet).
# See Lin et al, https://arxiv.org/abs/1708.02002
# Trained on COCO, initialized from Imagenet classification checkpoint
# Train on TPU-8
#
# Achieves 34.3 mAP on COCO17 Val
model {
ssd {
inplace_batchnorm_update: true
freeze_batchnorm: false
num_classes: 90
box_coder {
faster_rcnn_box_coder {
y_scale: 10.0
x_scale: 10.0
height_scale: 5.0
width_scale: 5.0
}
}
matcher {
argmax_matcher {
matched_threshold: 0.5
unmatched_threshold: 0.5
ignore_thresholds: false
negatives_lower_than_unmatched: true
force_match_for_each_row: true
use_matmul_gather: true
}
}
similarity_calculator {
iou_similarity {
}
}
encode_background_as_zeros: true
anchor_generator {
multiscale_anchor_generator {
min_level: 3
max_level: 7
anchor_scale: 4.0
aspect_ratios: [1.0, 2.0, 0.5]
scales_per_octave: 2
}
}
image_resizer {
fixed_shape_resizer {
height: 640
width: 640
}
}
box_predictor {
weight_shared_convolutional_box_predictor {
depth: 256
class_prediction_bias_init: -4.6
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.0004
}
}
initializer {
random_normal_initializer {
stddev: 0.01
mean: 0.0
}
}
batch_norm {
scale: true,
decay: 0.997,
epsilon: 0.001,
}
}
num_layers_before_predictor: 4
kernel_size: 3
}
}
feature_extractor {
type: 'ssd_resnet50_v1_fpn_keras'
fpn {
min_level: 3
max_level: 7
}
min_depth: 16
depth_multiplier: 1.0
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.0004
}
}
initializer {
truncated_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
batch_norm {
scale: true,
decay: 0.997,
epsilon: 0.001,
}
}
override_base_feature_extractor_hyperparams: true
}
loss {
classification_loss {
weighted_sigmoid_focal {
alpha: 0.25
gamma: 2.0
}
}
localization_loss {
weighted_smooth_l1 {
}
}
classification_weight: 1.0
localization_weight: 1.0
}
normalize_loss_by_num_matches: true
normalize_loc_loss_by_codesize: true
post_processing {
batch_non_max_suppression {
score_threshold: 1e-8
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 100
}
score_converter: SIGMOID
}
}
}
train_config: {
fine_tune_checkpoint_version: V2
fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED/resnet50.ckpt-1"
fine_tune_checkpoint_type: "classification"
batch_size: 64
sync_replicas: true
startup_delay_steps: 0
replicas_to_aggregate: 8
use_bfloat16: true
num_steps: 25000
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
random_crop_image {
min_object_covered: 0.0
min_aspect_ratio: 0.75
max_aspect_ratio: 3.0
min_area: 0.75
max_area: 1.0
overlap_thresh: 0.0
}
}
optimizer {
momentum_optimizer: {
learning_rate: {
cosine_decay_learning_rate {
learning_rate_base: .04
total_steps: 25000
warmup_learning_rate: .013333
warmup_steps: 2000
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
max_number_of_boxes: 100
unpad_groundtruth_tensors: false
}
train_input_reader: {
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/train2017-?????-of-00256.tfrecord"
}
}
eval_config: {
metrics_set: "coco_detection_metrics"
use_moving_averages: false
}
eval_input_reader: {
label_map_path: "PATH_TO_BE_CONFIGURED/label_map.txt"
shuffle: false
num_epochs: 1
tf_record_input_reader {
input_path: "PATH_TO_BE_CONFIGURED/val2017-?????-of-00032.tfrecord"
}
}
......@@ -42,9 +42,6 @@ PART_NAMES = [
b'left_face',
]
_SRC_PATH = ('google3/third_party/tensorflow_models/object_detection/'
'dataset_tools/densepose')
def scale(dp_surface_coords, y_scale, x_scale, scope=None):
"""Scales DensePose coordinates in y and x dimensions.
......@@ -266,10 +263,14 @@ class DensePoseHorizontalFlip(object):
def __init__(self):
"""Constructor."""
uv_symmetry_transforms_path = os.path.join(
tf.resource_loader.get_data_files_path(), '..', 'dataset_tools',
'densepose', 'UV_symmetry_transforms.mat')
data = scipy.io.loadmat(uv_symmetry_transforms_path)
path = os.path.dirname(os.path.abspath(__file__))
uv_symmetry_transforms_path = tf.resource_loader.get_path_to_datafile(
os.path.join(path, '..', 'dataset_tools', 'densepose',
'UV_symmetry_transforms.mat'))
tf.logging.info('Loading DensePose symmetry transforms file from {}'.format(
uv_symmetry_transforms_path))
with tf.io.gfile.GFile(uv_symmetry_transforms_path, 'rb') as f:
data = scipy.io.loadmat(f)
# Create lookup maps which indicate how a VU coordinate changes after a
# horizontal flip.
......
......@@ -102,7 +102,7 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
Args:
field: a string key, options are
fields.BoxListFields.{boxes,classes,masks,keypoints,
keypoint_visibilities} or
keypoint_visibilities, densepose_*}
fields.InputDataFields.is_annotated.
Returns:
......@@ -123,7 +123,7 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
Args:
field: a string key, options are
fields.BoxListFields.{boxes,classes,masks,keypoints,
keypoint_visibilities} or
keypoint_visibilities, densepose_*} or
fields.InputDataFields.is_annotated.
Returns:
......@@ -251,9 +251,14 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
detection_classes: [batch, max_detections]
(If a model is producing class-agnostic detections, this field may be
missing)
instance_masks: [batch, max_detections, image_height, image_width]
detection_masks: [batch, max_detections, mask_height, mask_width]
(optional)
keypoints: [batch, max_detections, num_keypoints, 2] (optional)
detection_keypoints: [batch, max_detections, num_keypoints, 2]
(optional)
detection_keypoint_scores: [batch, max_detections, num_keypoints]
(optional)
detection_surface_coords: [batch, max_detections, mask_height,
mask_width, 2] (optional)
num_detections: [batch]
In addition to the above fields this stage also outputs the following
......@@ -288,19 +293,23 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
"""
pass
def provide_groundtruth(self,
groundtruth_boxes_list,
groundtruth_classes_list,
groundtruth_masks_list=None,
groundtruth_keypoints_list=None,
groundtruth_keypoint_visibilities_list=None,
groundtruth_weights_list=None,
groundtruth_confidences_list=None,
groundtruth_is_crowd_list=None,
groundtruth_group_of_list=None,
groundtruth_area_list=None,
is_annotated_list=None,
groundtruth_labeled_classes=None):
def provide_groundtruth(
self,
groundtruth_boxes_list,
groundtruth_classes_list,
groundtruth_masks_list=None,
groundtruth_keypoints_list=None,
groundtruth_keypoint_visibilities_list=None,
groundtruth_dp_num_points_list=None,
groundtruth_dp_part_ids_list=None,
groundtruth_dp_surface_coords_list=None,
groundtruth_weights_list=None,
groundtruth_confidences_list=None,
groundtruth_is_crowd_list=None,
groundtruth_group_of_list=None,
groundtruth_area_list=None,
is_annotated_list=None,
groundtruth_labeled_classes=None):
"""Provide groundtruth tensors.
Args:
......@@ -324,6 +333,15 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
`groundtruth_keypoint_visibilities_list`).
groundtruth_keypoint_visibilities_list: a list of 3-D tf.bool tensors
of shape [num_boxes, num_keypoints] containing keypoint visibilities.
groundtruth_dp_num_points_list: a list of 1-D tf.int32 tensors of shape
[num_boxes] containing the number of DensePose sampled points.
groundtruth_dp_part_ids_list: a list of 2-D tf.int32 tensors of shape
[num_boxes, max_sampled_points] containing the DensePose part ids
(0-indexed) for each sampled point. Note that there may be padding.
groundtruth_dp_surface_coords_list: a list of 3-D tf.float32 tensors of
shape [num_boxes, max_sampled_points, 4] containing the DensePose
surface coordinates for each sampled point. Note that there may be
padding.
groundtruth_weights_list: A list of 1-D tf.float32 tensors of shape
[num_boxes] containing weights for groundtruth boxes.
groundtruth_confidences_list: A list of 2-D tf.float32 tensors of shape
......@@ -361,6 +379,18 @@ class DetectionModel(six.with_metaclass(abc.ABCMeta, _BaseClass)):
self._groundtruth_lists[
fields.BoxListFields.keypoint_visibilities] = (
groundtruth_keypoint_visibilities_list)
if groundtruth_dp_num_points_list:
self._groundtruth_lists[
fields.BoxListFields.densepose_num_points] = (
groundtruth_dp_num_points_list)
if groundtruth_dp_part_ids_list:
self._groundtruth_lists[
fields.BoxListFields.densepose_part_ids] = (
groundtruth_dp_part_ids_list)
if groundtruth_dp_surface_coords_list:
self._groundtruth_lists[
fields.BoxListFields.densepose_surface_coords] = (
groundtruth_dp_surface_coords_list)
if groundtruth_is_crowd_list:
self._groundtruth_lists[
fields.BoxListFields.is_crowd] = groundtruth_is_crowd_list
......
......@@ -3984,7 +3984,7 @@ def random_square_crop_by_scale(image, boxes, labels, label_weights,
Args:
image: rank 3 float32 tensor containing 1 image ->
[height, width,channels].
[height, width, channels].
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
Boxes are in normalized form meaning their coordinates vary
between [0, 1]. Each row is in the form of [ymin, xmin, ymax, xmax].
......@@ -4128,6 +4128,131 @@ def random_square_crop_by_scale(image, boxes, labels, label_weights,
return return_values
def random_scale_crop_and_pad_to_square(
image,
boxes,
labels,
label_weights,
masks=None,
keypoints=None,
scale_min=0.1,
scale_max=2.0,
output_size=512,
resize_method=tf.image.ResizeMethod.BILINEAR,
seed=None):
"""Randomly scale, crop, and then pad an image to fixed square dimensions.
Randomly scale, crop, and then pad an image to the desired square output
dimensions. Specifically, this method first samples a random_scale factor
from a uniform distribution between scale_min and scale_max, and then resizes
the image such that it's maximum dimension is (output_size * random_scale).
Secondly, a square output_size crop is extracted from the resized image
(note, this will only occur when random_scale > 1.0). Lastly, the cropped
region is padded to the desired square output_size, by filling with zeros.
The augmentation is borrowed from [1]
[1]: https://arxiv.org/abs/1911.09070
Args:
image: rank 3 float32 tensor containing 1 image ->
[height, width, channels].
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4]. Boxes
are in normalized form meaning their coordinates vary between [0, 1]. Each
row is in the form of [ymin, xmin, ymax, xmax]. Boxes on the crop boundary
are clipped to the boundary and boxes falling outside the crop are
ignored.
labels: rank 1 int32 tensor containing the object classes.
label_weights: float32 tensor of shape [num_instances] representing the
weight for each box.
masks: (optional) rank 3 float32 tensor with shape [num_instances, height,
width] containing instance masks. The masks are of the same height, width
as the input `image`.
keypoints: (optional) rank 3 float32 tensor with shape [num_instances,
num_keypoints, 2]. The keypoints are in y-x normalized coordinates.
scale_min: float, the minimum value for the random scale factor.
scale_max: float, the maximum value for the random scale factor.
output_size: int, the desired (square) output image size.
resize_method: tf.image.ResizeMethod, resize method to use when scaling the
input images.
seed: random seed.
Returns:
image: image which is the same rank as input image.
boxes: boxes which is the same rank as input boxes.
Boxes are in normalized form.
labels: new labels.
label_weights: rank 1 float32 tensor with shape [num_instances].
masks: rank 3 float32 tensor with shape [num_instances, height, width]
containing instance masks.
"""
img_shape = tf.shape(image)
input_height, input_width = img_shape[0], img_shape[1]
random_scale = tf.random_uniform([], scale_min, scale_max, seed=seed)
# Compute the scaled height and width from the random scale.
max_input_dim = tf.cast(tf.maximum(input_height, input_width), tf.float32)
input_ar_y = tf.cast(input_height, tf.float32) / max_input_dim
input_ar_x = tf.cast(input_width, tf.float32) / max_input_dim
scaled_height = tf.cast(random_scale * output_size * input_ar_y, tf.int32)
scaled_width = tf.cast(random_scale * output_size * input_ar_x, tf.int32)
# Compute the offsets:
offset_y = tf.cast(scaled_height - output_size, tf.float32)
offset_x = tf.cast(scaled_width - output_size, tf.float32)
offset_y = tf.maximum(0.0, offset_y) * tf.random_uniform([], 0, 1, seed=seed)
offset_x = tf.maximum(0.0, offset_x) * tf.random_uniform([], 0, 1, seed=seed)
offset_y = tf.cast(offset_y, tf.int32)
offset_x = tf.cast(offset_x, tf.int32)
# Scale, crop, and pad the input image.
scaled_image = tf.image.resize_images(
image, [scaled_height, scaled_width], method=resize_method)
scaled_image = scaled_image[offset_y:offset_y + output_size,
offset_x:offset_x + output_size, :]
output_image = tf.image.pad_to_bounding_box(scaled_image, 0, 0, output_size,
output_size)
# Update the boxes.
new_window = tf.cast(
tf.stack([offset_y, offset_x,
offset_y + output_size, offset_x + output_size]),
dtype=tf.float32)
new_window /= tf.cast(
tf.stack([scaled_height, scaled_width, scaled_height, scaled_width]),
dtype=tf.float32)
boxlist = box_list.BoxList(boxes)
boxlist = box_list_ops.change_coordinate_frame(boxlist, new_window)
boxlist, indices = box_list_ops.prune_completely_outside_window(
boxlist, [0.0, 0.0, 1.0, 1.0])
boxlist = box_list_ops.clip_to_window(
boxlist, [0.0, 0.0, 1.0, 1.0], filter_nonoverlapping=False)
return_values = [output_image, boxlist.get(),
tf.gather(labels, indices),
tf.gather(label_weights, indices)]
if masks is not None:
new_masks = tf.expand_dims(masks, -1)
new_masks = tf.image.resize_images(
new_masks, [scaled_height, scaled_width], method=resize_method)
new_masks = new_masks[:, offset_y:offset_y + output_size,
offset_x:offset_x + output_size, :]
new_masks = tf.image.pad_to_bounding_box(
new_masks, 0, 0, output_size, output_size)
new_masks = tf.squeeze(new_masks, [-1])
return_values.append(tf.gather(new_masks, indices))
if keypoints is not None:
keypoints = tf.gather(keypoints, indices)
keypoints = keypoint_ops.change_coordinate_frame(keypoints, new_window)
keypoints = keypoint_ops.prune_outside_window(
keypoints, [0.0, 0.0, 1.0, 1.0])
return_values.append(keypoints)
return return_values
def get_default_func_arg_map(include_label_weights=True,
include_label_confidences=False,
include_multiclass_scores=False,
......@@ -4230,15 +4355,14 @@ def get_default_func_arg_map(include_label_weights=True,
random_adjust_saturation: (fields.InputDataFields.image,),
random_distort_color: (fields.InputDataFields.image,),
random_jitter_boxes: (fields.InputDataFields.groundtruth_boxes,),
random_crop_image: (fields.InputDataFields.image,
fields.InputDataFields.groundtruth_boxes,
fields.InputDataFields.groundtruth_classes,
groundtruth_label_weights,
groundtruth_label_confidences, multiclass_scores,
groundtruth_instance_masks, groundtruth_keypoints,
groundtruth_keypoint_visibilities,
groundtruth_dp_num_points, groundtruth_dp_part_ids,
groundtruth_dp_surface_coords),
random_crop_image:
(fields.InputDataFields.image,
fields.InputDataFields.groundtruth_boxes,
fields.InputDataFields.groundtruth_classes,
groundtruth_label_weights, groundtruth_label_confidences,
multiclass_scores, groundtruth_instance_masks, groundtruth_keypoints,
groundtruth_keypoint_visibilities, groundtruth_dp_num_points,
groundtruth_dp_part_ids, groundtruth_dp_surface_coords),
random_pad_image:
(fields.InputDataFields.image,
fields.InputDataFields.groundtruth_boxes, groundtruth_instance_masks,
......@@ -4361,6 +4485,12 @@ def get_default_func_arg_map(include_label_weights=True,
fields.InputDataFields.groundtruth_classes,
groundtruth_label_weights, groundtruth_instance_masks,
groundtruth_keypoints),
random_scale_crop_and_pad_to_square:
(fields.InputDataFields.image,
fields.InputDataFields.groundtruth_boxes,
fields.InputDataFields.groundtruth_classes,
groundtruth_label_weights, groundtruth_instance_masks,
groundtruth_keypoints),
}
return prep_func_arg_map
......
......@@ -712,76 +712,6 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
test_masks=True,
test_keypoints=True)
@parameterized.parameters(
{'include_dense_pose': False},
{'include_dense_pose': True}
)
def testRunRandomHorizontalFlipWithMaskAndKeypoints(self, include_dense_pose):
def graph_fn():
preprocess_options = [(preprocessor.random_horizontal_flip, {})]
image_height = 3
image_width = 3
images = tf.random_uniform([1, image_height, image_width, 3])
boxes = self.createTestBoxes()
masks = self.createTestMasks()
keypoints, keypoint_visibilities = self.createTestKeypoints()
dp_num_point, dp_part_ids, dp_surface_coords = self.createTestDensePose()
keypoint_flip_permutation = self.createKeypointFlipPermutation()
tensor_dict = {
fields.InputDataFields.image:
images,
fields.InputDataFields.groundtruth_boxes:
boxes,
fields.InputDataFields.groundtruth_instance_masks:
masks,
fields.InputDataFields.groundtruth_keypoints:
keypoints,
fields.InputDataFields.groundtruth_keypoint_visibilities:
keypoint_visibilities
}
if include_dense_pose:
tensor_dict.update({
fields.InputDataFields.groundtruth_dp_num_points: dp_num_point,
fields.InputDataFields.groundtruth_dp_part_ids: dp_part_ids,
fields.InputDataFields.groundtruth_dp_surface_coords:
dp_surface_coords
})
preprocess_options = [(preprocessor.random_horizontal_flip, {
'keypoint_flip_permutation': keypoint_flip_permutation
})]
preprocessor_arg_map = preprocessor.get_default_func_arg_map(
include_instance_masks=True,
include_keypoints=True,
include_keypoint_visibilities=True,
include_dense_pose=include_dense_pose)
tensor_dict = preprocessor.preprocess(
tensor_dict, preprocess_options, func_arg_map=preprocessor_arg_map)
boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
masks = tensor_dict[fields.InputDataFields.groundtruth_instance_masks]
keypoints = tensor_dict[fields.InputDataFields.groundtruth_keypoints]
keypoint_visibilities = tensor_dict[
fields.InputDataFields.groundtruth_keypoint_visibilities]
output_tensors = [boxes, masks, keypoints, keypoint_visibilities]
if include_dense_pose:
dp_num_points = tensor_dict[
fields.InputDataFields.groundtruth_dp_num_points]
dp_part_ids = tensor_dict[
fields.InputDataFields.groundtruth_dp_part_ids]
dp_surface_coords = tensor_dict[
fields.InputDataFields.groundtruth_dp_surface_coords]
output_tensors.extend([dp_num_points, dp_part_ids, dp_surface_coords])
return output_tensors
output_tensors = self.execute_cpu(graph_fn, [])
self.assertIsNotNone(output_tensors[0]) # Boxes.
self.assertIsNotNone(output_tensors[1]) # Masks.
self.assertIsNotNone(output_tensors[2]) # Keypoints
self.assertIsNotNone(output_tensors[3]) # Keypoint Visibilities.
if include_dense_pose:
self.assertIsNotNone(output_tensors[4]) # DensePose Num Points.
self.assertIsNotNone(output_tensors[5]) # DensePose Part IDs.
self.assertIsNotNone(output_tensors[6]) # DensePose Surface Coords
def testRandomVerticalFlip(self):
......@@ -2380,7 +2310,6 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
@parameterized.parameters(
{'include_dense_pose': False},
{'include_dense_pose': True}
)
def testRandomPadImageWithKeypointsAndMasks(self, include_dense_pose):
def graph_fn():
......@@ -3912,6 +3841,90 @@ class PreprocessorTest(test_case.TestCase, parameterized.TestCase):
size = max(image.shape)
self.assertAlmostEqual(scale * 256.0, size)
self.assertAllClose(image[:, :, 0], masks[0, :, :])
@parameterized.named_parameters(('scale_0_1', 0.1), ('scale_1_0', 1.0),
('scale_2_0', 2.0))
def test_random_scale_crop_and_pad_to_square(self, scale):
def graph_fn():
image = np.random.randn(512, 256, 1)
box_centers = [0.25, 0.5, 0.75]
box_size = 0.1
box_corners = []
box_labels = []
box_label_weights = []
keypoints = []
masks = []
for center_y in box_centers:
for center_x in box_centers:
box_corners.append(
[center_y - box_size / 2.0, center_x - box_size / 2.0,
center_y + box_size / 2.0, center_x + box_size / 2.0])
box_labels.append([1])
box_label_weights.append([1.])
keypoints.append(
[[center_y - box_size / 2.0, center_x - box_size / 2.0],
[center_y + box_size / 2.0, center_x + box_size / 2.0]])
masks.append(image[:, :, 0].reshape(512, 256))
image = tf.constant(image)
boxes = tf.constant(box_corners)
labels = tf.constant(box_labels)
label_weights = tf.constant(box_label_weights)
keypoints = tf.constant(keypoints)
masks = tf.constant(np.stack(masks))
(new_image, new_boxes, _, _, new_masks,
new_keypoints) = preprocessor.random_scale_crop_and_pad_to_square(
image,
boxes,
labels,
label_weights,
masks=masks,
keypoints=keypoints,
scale_min=scale,
scale_max=scale,
output_size=512)
return new_image, new_boxes, new_masks, new_keypoints
image, boxes, masks, keypoints = self.execute_cpu(graph_fn, [])
# Since random_scale_crop_and_pad_to_square may prune and clip boxes,
# we only need to find one of the boxes that was not clipped and check
# that it matches the expected dimensions. Note, assertAlmostEqual(a, b)
# is equivalent to round(a-b, 7) == 0.
any_box_has_correct_size = False
effective_scale_y = int(scale * 512) / 512.0
effective_scale_x = int(scale * 256) / 512.0
expected_size_y = 0.1 * effective_scale_y
expected_size_x = 0.1 * effective_scale_x
for box in boxes:
ymin, xmin, ymax, xmax = box
any_box_has_correct_size |= (
(round(ymin, 7) != 0.0) and (round(xmin, 7) != 0.0) and
(round(ymax, 7) != 1.0) and (round(xmax, 7) != 1.0) and
(round((ymax - ymin) - expected_size_y, 7) == 0.0) and
(round((xmax - xmin) - expected_size_x, 7) == 0.0))
self.assertTrue(any_box_has_correct_size)
# Similar to the approach above where we check for at least one box with the
# expected dimensions, we check for at least one pair of keypoints whose
# distance matches the expected dimensions.
any_keypoint_pair_has_correct_dist = False
for keypoint_pair in keypoints:
ymin, xmin = keypoint_pair[0]
ymax, xmax = keypoint_pair[1]
any_keypoint_pair_has_correct_dist |= (
(round(ymin, 7) != 0.0) and (round(xmin, 7) != 0.0) and
(round(ymax, 7) != 1.0) and (round(xmax, 7) != 1.0) and
(round((ymax - ymin) - expected_size_y, 7) == 0.0) and
(round((xmax - xmin) - expected_size_x, 7) == 0.0))
self.assertTrue(any_keypoint_pair_has_correct_dist)
self.assertAlmostEqual(512.0, image.shape[0])
self.assertAlmostEqual(512.0, image.shape[1])
self.assertAllClose(image[:, :, 0],
masks[0, :, :])
......
......@@ -141,6 +141,8 @@ class DetectionResultFields(object):
for detection boxes in the image including background class.
detection_classes: detection-level class labels.
detection_masks: contains a segmentation mask for each detection box.
detection_surface_coords: contains DensePose surface coordinates for each
box.
detection_boundaries: contains an object boundary for each detection box.
detection_keypoints: contains detection keypoints for each detection box.
detection_keypoint_scores: contains detection keypoint scores.
......@@ -161,6 +163,7 @@ class DetectionResultFields(object):
detection_features = 'detection_features'
detection_classes = 'detection_classes'
detection_masks = 'detection_masks'
detection_surface_coords = 'detection_surface_coords'
detection_boundaries = 'detection_boundaries'
detection_keypoints = 'detection_keypoints'
detection_keypoint_scores = 'detection_keypoint_scores'
......@@ -182,7 +185,11 @@ class BoxListFields(object):
masks: masks per bounding box.
boundaries: boundaries per bounding box.
keypoints: keypoints per bounding box.
keypoint_visibilities: keypoint visibilities per bounding box.
keypoint_heatmaps: keypoint heatmaps per bounding box.
densepose_num_points: number of DensePose points per bounding box.
densepose_part_ids: DensePose part ids per bounding box.
densepose_surface_coords: DensePose surface coordinates per bounding box.
is_crowd: is_crowd annotation per bounding box.
"""
boxes = 'boxes'
......@@ -196,6 +203,9 @@ class BoxListFields(object):
keypoints = 'keypoints'
keypoint_visibilities = 'keypoint_visibilities'
keypoint_heatmaps = 'keypoint_heatmaps'
densepose_num_points = 'densepose_num_points'
densepose_part_ids = 'densepose_part_ids'
densepose_surface_coords = 'densepose_surface_coords'
is_crowd = 'is_crowd'
group_of = 'group_of'
......
......@@ -51,25 +51,25 @@ def tf_example_from_annotations_data_frame(annotations_data_frame, label_map,
feature_map = {
standard_fields.TfExampleFields.object_bbox_ymin:
dataset_util.float_list_feature(
filtered_data_frame_boxes.YMin.as_matrix()),
filtered_data_frame_boxes.YMin.to_numpy()),
standard_fields.TfExampleFields.object_bbox_xmin:
dataset_util.float_list_feature(
filtered_data_frame_boxes.XMin.as_matrix()),
filtered_data_frame_boxes.XMin.to_numpy()),
standard_fields.TfExampleFields.object_bbox_ymax:
dataset_util.float_list_feature(
filtered_data_frame_boxes.YMax.as_matrix()),
filtered_data_frame_boxes.YMax.to_numpy()),
standard_fields.TfExampleFields.object_bbox_xmax:
dataset_util.float_list_feature(
filtered_data_frame_boxes.XMax.as_matrix()),
filtered_data_frame_boxes.XMax.to_numpy()),
standard_fields.TfExampleFields.object_class_text:
dataset_util.bytes_list_feature([
six.ensure_binary(label_text)
for label_text in filtered_data_frame_boxes.LabelName.as_matrix()
for label_text in filtered_data_frame_boxes.LabelName.to_numpy()
]),
standard_fields.TfExampleFields.object_class_label:
dataset_util.int64_list_feature(
filtered_data_frame_boxes.LabelName.map(
lambda x: label_map[x]).as_matrix()),
lambda x: label_map[x]).to_numpy()),
standard_fields.TfExampleFields.filename:
dataset_util.bytes_feature(
six.ensure_binary('{}.jpg'.format(image_id))),
......@@ -82,31 +82,31 @@ def tf_example_from_annotations_data_frame(annotations_data_frame, label_map,
if 'IsGroupOf' in filtered_data_frame.columns:
feature_map[standard_fields.TfExampleFields.
object_group_of] = dataset_util.int64_list_feature(
filtered_data_frame_boxes.IsGroupOf.as_matrix().astype(int))
filtered_data_frame_boxes.IsGroupOf.to_numpy().astype(int))
if 'IsOccluded' in filtered_data_frame.columns:
feature_map[standard_fields.TfExampleFields.
object_occluded] = dataset_util.int64_list_feature(
filtered_data_frame_boxes.IsOccluded.as_matrix().astype(
filtered_data_frame_boxes.IsOccluded.to_numpy().astype(
int))
if 'IsTruncated' in filtered_data_frame.columns:
feature_map[standard_fields.TfExampleFields.
object_truncated] = dataset_util.int64_list_feature(
filtered_data_frame_boxes.IsTruncated.as_matrix().astype(
filtered_data_frame_boxes.IsTruncated.to_numpy().astype(
int))
if 'IsDepiction' in filtered_data_frame.columns:
feature_map[standard_fields.TfExampleFields.
object_depiction] = dataset_util.int64_list_feature(
filtered_data_frame_boxes.IsDepiction.as_matrix().astype(
filtered_data_frame_boxes.IsDepiction.to_numpy().astype(
int))
if 'ConfidenceImageLabel' in filtered_data_frame_labels.columns:
feature_map[standard_fields.TfExampleFields.
image_class_label] = dataset_util.int64_list_feature(
filtered_data_frame_labels.LabelName.map(
lambda x: label_map[x]).as_matrix())
lambda x: label_map[x]).to_numpy())
feature_map[standard_fields.TfExampleFields
.image_class_text] = dataset_util.bytes_list_feature([
six.ensure_binary(label_text) for label_text in
filtered_data_frame_labels.LabelName.as_matrix()
filtered_data_frame_labels.LabelName.to_numpy()
]),
return tf.train.Example(features=tf.train.Features(feature=feature_map))
......@@ -25,20 +25,17 @@ RUN useradd -ms /bin/bash tensorflow
USER tensorflow
WORKDIR /home/tensorflow
# Install pip dependencies
RUN pip3 install --user absl-py
RUN pip3 install --user contextlib2
RUN pip3 install --user Cython
RUN pip3 install --user jupyter
RUN pip3 install --user matplotlib
RUN pip3 install --user pycocotools
RUN pip3 install --user tf-slim
# Copy this version of of the model garden into the image
COPY --chown=tensorflow . /home/tensorflow/models
# Compile protobuf configs
RUN (cd /home/tensorflow/models/research/ && protoc object_detection/protos/*.proto --python_out=.)
WORKDIR /home/tensorflow/models/research/
RUN cp object_detection/packages/tf1/setup.py ./
ENV PATH="/home/tensorflow/.local/bin:${PATH}"
RUN python -m pip install --user -U pip
RUN python -m pip install --user .
ENV PYTHONPATH $PYTHONPATH:/home/tensorflow/models/research/:/home/tensorflow/models/research/slim
ENV TF_CPP_MIN_LOG_LEVEL 3
# Tensorflow Object Detection on Docker
# TensorFlow Object Detection on Docker
These instructions are experimental.
......@@ -6,6 +6,6 @@ These instructions are experimental.
```bash
# From the root of the git repository
docker build -f research/object_detection/dockerfiles/1.15/Dockerfile -t od .
docker build -f research/object_detection/dockerfiles/tf1/Dockerfile -t od .
docker run -it od
```
......@@ -25,20 +25,17 @@ RUN useradd -ms /bin/bash tensorflow
USER tensorflow
WORKDIR /home/tensorflow
# Install pip dependencies
RUN pip3 install --user absl-py
RUN pip3 install --user contextlib2
RUN pip3 install --user Cython
RUN pip3 install --user jupyter
RUN pip3 install --user matplotlib
RUN pip3 install --user pycocotools
RUN pip3 install --user tf-slim
# Copy this version of of the model garden into the image
COPY --chown=tensorflow . /home/tensorflow/models
# Compile protobuf configs
RUN (cd /home/tensorflow/models/research/ && protoc object_detection/protos/*.proto --python_out=.)
WORKDIR /home/tensorflow/models/research/
RUN cp object_detection/packages/tf2/setup.py ./
ENV PATH="/home/tensorflow/.local/bin:${PATH}"
RUN python -m pip install -U pip
RUN python -m pip install .
ENV PYTHONPATH $PYTHONPATH:/home/tensorflow/models/research/:/home/tensorflow/models/research/slim
ENV TF_CPP_MIN_LOG_LEVEL 3
# Tensorflow Object Detection on Docker
# TensorFlow Object Detection on Docker
These instructions are experimental.
......@@ -6,6 +6,6 @@ These instructions are experimental.
```bash
# From the root of the git repository
docker build -f research/object_detection/dockerfiles/2.2/Dockerfile -t od .
docker build -f research/object_detection/dockerfiles/tf2/Dockerfile -t od .
docker run -it od
```
......@@ -552,7 +552,11 @@ def _resize_detection_masks(args):
detection_boxes, detection_masks, image_shape = args
detection_masks_reframed = ops.reframe_box_masks_to_image_masks(
detection_masks, detection_boxes, image_shape[0], image_shape[1])
return tf.cast(tf.greater(detection_masks_reframed, 0.5), tf.uint8)
# If the masks are currently float, binarize them. Otherwise keep them as
# integers, since they have already been thresholded.
if detection_masks_reframed.dtype == tf.float32:
detection_masks_reframed = tf.greater(detection_masks_reframed, 0.5)
return tf.cast(detection_masks_reframed, tf.uint8)
def _resize_groundtruth_masks(args):
......@@ -570,6 +574,17 @@ def _resize_groundtruth_masks(args):
return tf.cast(tf.squeeze(mask, 3), tf.uint8)
def _resize_surface_coordinate_masks(args):
detection_boxes, surface_coords, image_shape = args
surface_coords_v, surface_coords_u = tf.unstack(surface_coords, axis=-1)
surface_coords_v_reframed = ops.reframe_box_masks_to_image_masks(
surface_coords_v, detection_boxes, image_shape[0], image_shape[1])
surface_coords_u_reframed = ops.reframe_box_masks_to_image_masks(
surface_coords_u, detection_boxes, image_shape[0], image_shape[1])
return tf.stack([surface_coords_v_reframed, surface_coords_u_reframed],
axis=-1)
def _scale_keypoint_to_absolute(args):
keypoints, image_shape = args
return keypoint_ops.scale(keypoints, image_shape[0], image_shape[1])
......@@ -720,6 +735,12 @@ def result_dict_for_batched_example(images,
num_keypoints] bool tensor with keypoint visibilities (Optional).
'groundtruth_labeled_classes': [batch_size, num_classes] int64
tensor of 1-indexed classes. (Optional)
'groundtruth_dp_num_points': [batch_size, max_number_of_boxes] int32
tensor. (Optional)
'groundtruth_dp_part_ids': [batch_size, max_number_of_boxes,
max_sampled_points] int32 tensor. (Optional)
'groundtruth_dp_surface_coords_list': [batch_size, max_number_of_boxes,
max_sampled_points, 4] float32 tensor. (Optional)
class_agnostic: Boolean indicating whether the detections are class-agnostic
(i.e. binary). Default False.
scale_to_absolute: Boolean indicating whether boxes and keypoints should be
......@@ -747,12 +768,16 @@ def result_dict_for_batched_example(images,
'detection_scores': [batch_size, max_detections] float32 tensor of scores.
'detection_classes': [batch_size, max_detections] int64 tensor of 1-indexed
classes.
'detection_masks': [batch_size, max_detections, H, W] float32 tensor of
binarized masks, reframed to full image masks. (Optional)
'detection_masks': [batch_size, max_detections, H, W] uint8 tensor of
instance masks, reframed to full image masks. Note that these may be
binarized (e.g. {0, 1}), or may contain 1-indexed part labels. (Optional)
'detection_keypoints': [batch_size, max_detections, num_keypoints, 2]
float32 tensor containing keypoint coordinates. (Optional)
'detection_keypoint_scores': [batch_size, max_detections, num_keypoints]
float32 tensor containing keypoint scores. (Optional)
'detection_surface_coords': [batch_size, max_detection, H, W, 2] float32
tensor with normalized surface coordinates (e.g. DensePose UV
coordinates). (Optional)
'num_detections': [batch_size] int64 tensor containing number of valid
detections.
'groundtruth_boxes': [batch_size, num_boxes, 4] float32 tensor of boxes, in
......@@ -844,14 +869,21 @@ def result_dict_for_batched_example(images,
if detection_fields.detection_masks in detections:
detection_masks = detections[detection_fields.detection_masks]
# TODO(rathodv): This should be done in model's postprocess
# function ideally.
output_dict[detection_fields.detection_masks] = (
shape_utils.static_or_dynamic_map_fn(
_resize_detection_masks,
elems=[detection_boxes, detection_masks,
original_image_spatial_shapes],
dtype=tf.uint8))
if detection_fields.detection_surface_coords in detections:
detection_surface_coords = detections[
detection_fields.detection_surface_coords]
output_dict[detection_fields.detection_surface_coords] = (
shape_utils.static_or_dynamic_map_fn(
_resize_surface_coordinate_masks,
elems=[detection_boxes, detection_surface_coords,
original_image_spatial_shapes],
dtype=tf.float32))
if detection_fields.detection_keypoints in detections:
detection_keypoints = detections[detection_fields.detection_keypoints]
......@@ -1074,3 +1106,8 @@ def evaluator_options_from_eval_config(eval_config):
'recall_upper_bound': (eval_config.recall_upper_bound)
}
return evaluator_options
def has_densepose(eval_dict):
return (fields.DetectionResultFields.detection_masks in eval_dict and
fields.DetectionResultFields.detection_surface_coords in eval_dict)
......@@ -47,7 +47,7 @@ python object_detection/dataset_tools/oid_hierarchical_labels_expansion.py \
--annotation_type=2
```
1. If you are not using Tensorflow, you can run evaluation directly using your
1. If you are not using TensorFlow, you can run evaluation directly using your
algorithm's output and generated ground-truth files. {value=4}
After step 3 you produced the ground-truth files suitable for running 'OID
......@@ -73,7 +73,7 @@ For the Object Detection Track, the participants will be ranked on:
- "OpenImagesDetectionChallenge_Precision/mAP@0.5IOU"
To use evaluation within Tensorflow training, use metric name
To use evaluation within TensorFlow training, use metric name
`oid_challenge_detection_metrics` in the evaluation config.
## Instance Segmentation Track
......@@ -130,7 +130,7 @@ python object_detection/dataset_tools/oid_hierarchical_labels_expansion.py \
--annotation_type=1
```
1. If you are not using Tensorflow, you can run evaluation directly using your
1. If you are not using TensorFlow, you can run evaluation directly using your
algorithm's output and generated ground-truth files. {value=4}
```
......
......@@ -2,7 +2,7 @@
## Overview
The Tensorflow Object Detection API uses protobuf files to configure the
The TensorFlow Object Detection API uses protobuf files to configure the
training and evaluation process. The schema for the training pipeline can be
found in object_detection/protos/pipeline.proto. At a high level, the config
file is split into 5 parts:
......@@ -60,7 +60,7 @@ to a value suited for the dataset the user is training on.
## Defining Inputs
The Tensorflow Object Detection API accepts inputs in the TFRecord file format.
The TensorFlow Object Detection API accepts inputs in the TFRecord file format.
Users must specify the locations of both the training and evaluation files.
Additionally, users should also specify a label map, which define the mapping
between a class id and class name. The label map should be identical between
......@@ -126,24 +126,6 @@ data_augmentation_options {
}
```
### Model Parameter Initialization
While optional, it is highly recommended that users utilize other object
detection checkpoints. Training an object detector from scratch can take days.
To speed up the training process, it is recommended that users re-use the
feature extractor parameters from a pre-existing image classification or
object detection checkpoint. `train_config` provides two fields to specify
pre-existing checkpoints: `fine_tune_checkpoint` and
`from_detection_checkpoint`. `fine_tune_checkpoint` should provide a path to
the pre-existing checkpoint
(ie:"/usr/home/username/checkpoint/model.ckpt-#####").
`from_detection_checkpoint` is a boolean value. If false, it assumes the
checkpoint was from an object classification checkpoint. Note that starting
from a detection checkpoint will usually result in a faster training job than
a classification checkpoint.
The list of provided checkpoints can be found [here](detection_model_zoo.md).
### Input Preprocessing
The `data_augmentation_options` in `train_config` can be used to specify
......
# Context R-CNN
[![TensorFlow 1.15](https://img.shields.io/badge/TensorFlow-1.15-FF6F00?logo=tensorflow)](https://github.com/tensorflow/tensorflow/releases/tag/v1.15.0)
Context R-CNN is an object detection model that uses contextual features to
improve object detection. See https://arxiv.org/abs/1912.03538 for more details.
......
......@@ -2,14 +2,14 @@
In this section, we discuss some of the abstractions that we use
for defining detection models. If you would like to define a new model
architecture for detection and use it in the Tensorflow Detection API,
architecture for detection and use it in the TensorFlow Detection API,
then this section should also serve as a high level guide to the files that you
will need to edit to get your new model working.
## DetectionModels (`object_detection/core/model.py`)
In order to be trained, evaluated, and exported for serving using our
provided binaries, all models under the Tensorflow Object Detection API must
provided binaries, all models under the TensorFlow Object Detection API must
implement the `DetectionModel` interface (see the full definition in `object_detection/core/model.py`). In particular,
each of these models are responsible for implementing 5 functions:
......@@ -20,7 +20,7 @@ each of these models are responsible for implementing 5 functions:
postprocess functions.
* `postprocess`: Convert predicted output tensors to final detections.
* `loss`: Compute scalar loss tensors with respect to provided groundtruth.
* `restore`: Load a checkpoint into the Tensorflow graph.
* `restore`: Load a checkpoint into the TensorFlow graph.
Given a `DetectionModel` at training time, we pass each image batch through
the following sequence of functions to compute a loss which can be optimized via
......@@ -87,7 +87,7 @@ functions:
* `_extract_box_classifier_features`: Extract second stage Box Classifier
features.
* `restore_from_classification_checkpoint_fn`: Load a checkpoint into the
Tensorflow graph.
TensorFlow graph.
See the `object_detection/models/faster_rcnn_resnet_v1_feature_extractor.py`
definition as one example. Some remarks:
......
# Supported object detection evaluation protocols
The Tensorflow Object Detection API currently supports three evaluation protocols,
The TensorFlow Object Detection API currently supports three evaluation protocols,
that can be configured in `EvalConfig` by setting `metrics_set` to the
corresponding value.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment