Unverified Commit 44f6d511 authored by Srihari Humbarwadi's avatar Srihari Humbarwadi Committed by GitHub
Browse files

Merge branch 'tensorflow:master' into panoptic-deeplab

parents 686a287d 8bc5a1a5
# --experiment_type=retinanet_mobile_coco # --experiment_type=retinanet_mobile_coco
# COCO mAP: 27.6
runtime: runtime:
distribution_strategy: 'tpu' distribution_strategy: 'tpu'
mixed_precision_dtype: 'bfloat16' mixed_precision_dtype: 'bfloat16'
...@@ -26,7 +27,7 @@ task: ...@@ -26,7 +27,7 @@ task:
max_level: 7 max_level: 7
min_level: 3 min_level: 3
norm_activation: norm_activation:
activation: 'swish' activation: 'hard_swish'
norm_epsilon: 0.001 norm_epsilon: 0.001
norm_momentum: 0.99 norm_momentum: 0.99
use_sync_bn: true use_sync_bn: true
......
# --experiment_type=retinanet_mobile_coco # --experiment_type=retinanet_mobile_coco
# COCO mAP: 23.5
runtime: runtime:
distribution_strategy: 'tpu' distribution_strategy: 'tpu'
mixed_precision_dtype: 'bfloat16' mixed_precision_dtype: 'bfloat16'
...@@ -26,7 +27,7 @@ task: ...@@ -26,7 +27,7 @@ task:
max_level: 7 max_level: 7
min_level: 3 min_level: 3
norm_activation: norm_activation:
activation: 'swish' activation: 'hard_swish'
norm_epsilon: 0.001 norm_epsilon: 0.001
norm_momentum: 0.99 norm_momentum: 0.99
use_sync_bn: true use_sync_bn: true
......
# --experiment_type=retinanet_mobile_coco # --experiment_type=retinanet_mobile_coco
# COCO mAP: 16.8
runtime: runtime:
distribution_strategy: 'tpu' distribution_strategy: 'tpu'
mixed_precision_dtype: 'bfloat16' mixed_precision_dtype: 'bfloat16'
...@@ -26,7 +27,7 @@ task: ...@@ -26,7 +27,7 @@ task:
max_level: 7 max_level: 7
min_level: 3 min_level: 3
norm_activation: norm_activation:
activation: 'swish' activation: 'hard_swish'
norm_epsilon: 0.001 norm_epsilon: 0.001
norm_momentum: 0.99 norm_momentum: 0.99
use_sync_bn: true use_sync_bn: true
......
...@@ -126,13 +126,6 @@ class DetectionGenerator(hyperparams.Config): ...@@ -126,13 +126,6 @@ class DetectionGenerator(hyperparams.Config):
tflite_post_processing: common.TFLitePostProcessingConfig = common.TFLitePostProcessingConfig( tflite_post_processing: common.TFLitePostProcessingConfig = common.TFLitePostProcessingConfig(
) )
max_detections: int = 200
max_classes_per_detection: int = 5
# Regular NMS run in a multi-class fashion and is slow. Setting it to False
# uses class-agnostic NMS, which is faster.
use_regular_nms: bool = False
nms_score_threshold: float = 0.1
@dataclasses.dataclass @dataclasses.dataclass
class RetinaNet(hyperparams.Config): class RetinaNet(hyperparams.Config):
...@@ -174,6 +167,10 @@ class RetinaNetTask(cfg.TaskConfig): ...@@ -174,6 +167,10 @@ class RetinaNetTask(cfg.TaskConfig):
# If set, the Waymo Open Dataset evaluator would be used. # If set, the Waymo Open Dataset evaluator would be used.
use_wod_metrics: bool = False use_wod_metrics: bool = False
# If set, freezes the backbone during training.
# TODO(crisnv) Add paper link when available.
freeze_backbone: bool = False
@exp_factory.register_config_factory('retinanet') @exp_factory.register_config_factory('retinanet')
def retinanet() -> cfg.ExperimentConfig: def retinanet() -> cfg.ExperimentConfig:
......
...@@ -68,6 +68,11 @@ flags.DEFINE_boolean( ...@@ -68,6 +68,11 @@ flags.DEFINE_boolean(
'default: False.') 'default: False.')
flags.DEFINE_string('output_file_prefix', '/tmp/train', 'Path to output file') flags.DEFINE_string('output_file_prefix', '/tmp/train', 'Path to output file')
flags.DEFINE_integer('num_shards', 32, 'Number of shards for output file.') flags.DEFINE_integer('num_shards', 32, 'Number of shards for output file.')
_NUM_PROCESSES = flags.DEFINE_integer(
'num_processes', None,
('Number of parallel processes to use. '
'If set to 0, disables multi-processing.'))
FLAGS = flags.FLAGS FLAGS = flags.FLAGS
...@@ -518,7 +523,8 @@ def _create_tf_record_from_coco_annotations(images_info_file, ...@@ -518,7 +523,8 @@ def _create_tf_record_from_coco_annotations(images_info_file,
include_masks=include_masks) include_masks=include_masks)
num_skipped = tfrecord_lib.write_tf_record_dataset( num_skipped = tfrecord_lib.write_tf_record_dataset(
output_path, coco_annotations_iter, create_tf_example, num_shards) output_path, coco_annotations_iter, create_tf_example, num_shards,
multiple_processes=_NUM_PROCESSES.value)
logging.info('Finished writing, skipped %d annotations.', num_skipped) logging.info('Finished writing, skipped %d annotations.', num_skipped)
......
...@@ -26,6 +26,9 @@ import tensorflow as tf ...@@ -26,6 +26,9 @@ import tensorflow as tf
import multiprocessing as mp import multiprocessing as mp
LOG_EVERY = 100
def convert_to_feature(value, value_type=None): def convert_to_feature(value, value_type=None):
"""Converts the given python object to a tf.train.Feature. """Converts the given python object to a tf.train.Feature.
...@@ -114,7 +117,7 @@ def encode_mask_as_png(mask): ...@@ -114,7 +117,7 @@ def encode_mask_as_png(mask):
def write_tf_record_dataset(output_path, annotation_iterator, def write_tf_record_dataset(output_path, annotation_iterator,
process_func, num_shards, process_func, num_shards,
use_multiprocessing=True, unpack_arguments=True): multiple_processes=None, unpack_arguments=True):
"""Iterates over annotations, processes them and writes into TFRecords. """Iterates over annotations, processes them and writes into TFRecords.
Args: Args:
...@@ -125,7 +128,10 @@ def write_tf_record_dataset(output_path, annotation_iterator, ...@@ -125,7 +128,10 @@ def write_tf_record_dataset(output_path, annotation_iterator,
annotation_iterator as arguments and returns a tuple of (tf.train.Example, annotation_iterator as arguments and returns a tuple of (tf.train.Example,
int). The integer indicates the number of annotations that were skipped. int). The integer indicates the number of annotations that were skipped.
num_shards: int, the number of shards to write for the dataset. num_shards: int, the number of shards to write for the dataset.
use_multiprocessing: multiple_processes: integer, the number of multiple parallel processes to
use. If None, uses multi-processing with number of processes equal to
`os.cpu_count()`, which is Python's default behavior. If set to 0,
multi-processing is disabled.
Whether or not to use multiple processes to write TF Records. Whether or not to use multiple processes to write TF Records.
unpack_arguments: unpack_arguments:
Whether to unpack the tuples from annotation_iterator as individual Whether to unpack the tuples from annotation_iterator as individual
...@@ -143,8 +149,9 @@ def write_tf_record_dataset(output_path, annotation_iterator, ...@@ -143,8 +149,9 @@ def write_tf_record_dataset(output_path, annotation_iterator,
total_num_annotations_skipped = 0 total_num_annotations_skipped = 0
if use_multiprocessing: if multiple_processes is None or multiple_processes > 0:
pool = mp.Pool() pool = mp.Pool(
processes=multiple_processes)
if unpack_arguments: if unpack_arguments:
tf_example_iterator = pool.starmap(process_func, annotation_iterator) tf_example_iterator = pool.starmap(process_func, annotation_iterator)
else: else:
...@@ -157,13 +164,13 @@ def write_tf_record_dataset(output_path, annotation_iterator, ...@@ -157,13 +164,13 @@ def write_tf_record_dataset(output_path, annotation_iterator,
for idx, (tf_example, num_annotations_skipped) in enumerate( for idx, (tf_example, num_annotations_skipped) in enumerate(
tf_example_iterator): tf_example_iterator):
if idx % 100 == 0: if idx % LOG_EVERY == 0:
logging.info('On image %d', idx) logging.info('On image %d', idx)
total_num_annotations_skipped += num_annotations_skipped total_num_annotations_skipped += num_annotations_skipped
writers[idx % num_shards].write(tf_example.SerializeToString()) writers[idx % num_shards].write(tf_example.SerializeToString())
if use_multiprocessing: if multiple_processes is None or multiple_processes > 0:
pool.close() pool.close()
pool.join() pool.join()
......
...@@ -47,7 +47,7 @@ class TfrecordLibTest(parameterized.TestCase): ...@@ -47,7 +47,7 @@ class TfrecordLibTest(parameterized.TestCase):
path = os.path.join(FLAGS.test_tmpdir, 'train') path = os.path.join(FLAGS.test_tmpdir, 'train')
tfrecord_lib.write_tf_record_dataset( tfrecord_lib.write_tf_record_dataset(
path, data, process_sample, 3, use_multiprocessing=False) path, data, process_sample, 3, multiple_processes=0)
tfrecord_files = tf.io.gfile.glob(path + '*') tfrecord_files = tf.io.gfile.glob(path + '*')
self.assertLen(tfrecord_files, 3) self.assertLen(tfrecord_files, 3)
......
...@@ -277,12 +277,12 @@ class PanopticQuality: ...@@ -277,12 +277,12 @@ class PanopticQuality:
np.sum(in_category_set.astype(np.int32)), np.sum(in_category_set.astype(np.int32)),
}) })
else: else:
results[category_set_name] = { results.update({
f'{category_set_name}_pq': 0., f'{category_set_name}_pq': 0.,
f'{category_set_name}_sq': 0., f'{category_set_name}_sq': 0.,
f'{category_set_name}_rq': 0., f'{category_set_name}_rq': 0.,
f'{category_set_name}_num_categories': 0 f'{category_set_name}_num_categories': 0
} })
return results return results
......
...@@ -33,14 +33,13 @@ class SegmentationLoss: ...@@ -33,14 +33,13 @@ class SegmentationLoss:
self._use_groundtruth_dimension = use_groundtruth_dimension self._use_groundtruth_dimension = use_groundtruth_dimension
self._label_smoothing = label_smoothing self._label_smoothing = label_smoothing
def __call__(self, logits, labels): def __call__(self, logits, labels, **kwargs):
_, height, width, num_classes = logits.get_shape().as_list() _, height, width, num_classes = logits.get_shape().as_list()
if self._use_groundtruth_dimension: if self._use_groundtruth_dimension:
# TODO(arashwan): Test using align corners to match deeplab alignment. # TODO(arashwan): Test using align corners to match deeplab alignment.
logits = tf.image.resize( logits = tf.image.resize(
logits, tf.shape(labels)[1:3], logits, tf.shape(labels)[1:3], method=tf.image.ResizeMethod.BILINEAR)
method=tf.image.ResizeMethod.BILINEAR)
else: else:
labels = tf.image.resize( labels = tf.image.resize(
labels, (height, width), labels, (height, width),
...@@ -54,11 +53,9 @@ class SegmentationLoss: ...@@ -54,11 +53,9 @@ class SegmentationLoss:
labels = tf.squeeze(tf.cast(labels, tf.int32), axis=3) labels = tf.squeeze(tf.cast(labels, tf.int32), axis=3)
valid_mask = tf.squeeze(tf.cast(valid_mask, tf.float32), axis=3) valid_mask = tf.squeeze(tf.cast(valid_mask, tf.float32), axis=3)
onehot_labels = tf.one_hot(labels, num_classes)
onehot_labels = onehot_labels * (
1 - self._label_smoothing) + self._label_smoothing / num_classes
cross_entropy_loss = tf.nn.softmax_cross_entropy_with_logits( cross_entropy_loss = tf.nn.softmax_cross_entropy_with_logits(
labels=onehot_labels, logits=logits) labels=self.get_labels_with_prob(labels, logits, **kwargs),
logits=logits)
if not self._class_weights: if not self._class_weights:
class_weights = [1] * num_classes class_weights = [1] * num_classes
...@@ -90,6 +87,26 @@ class SegmentationLoss: ...@@ -90,6 +87,26 @@ class SegmentationLoss:
return loss return loss
def get_labels_with_prob(self, labels, logits, **unused_kwargs):
"""Get a tensor representing the probability of each class for each pixel.
This method can be overridden in subclasses for customizing loss function.
Args:
labels: A float tensor in shape (batch_size, height, width), which is the
label map of the ground truth.
logits: A float tensor in shape (batch_size, height, width, num_classes)
which is the output of the network.
**unused_kwargs: Unused keyword arguments.
Returns:
A float tensor in shape (batch_size, height, width, num_classes).
"""
num_classes = logits.get_shape().as_list()[-1]
onehot_labels = tf.one_hot(labels, num_classes)
return onehot_labels * (
1 - self._label_smoothing) + self._label_smoothing / num_classes
def get_actual_mask_scores(logits, labels, ignore_label): def get_actual_mask_scores(logits, labels, ignore_label):
"""Gets actual mask scores.""" """Gets actual mask scores."""
...@@ -97,8 +114,7 @@ def get_actual_mask_scores(logits, labels, ignore_label): ...@@ -97,8 +114,7 @@ def get_actual_mask_scores(logits, labels, ignore_label):
batch_size = tf.shape(logits)[0] batch_size = tf.shape(logits)[0]
logits = tf.stop_gradient(logits) logits = tf.stop_gradient(logits)
labels = tf.image.resize( labels = tf.image.resize(
labels, (height, width), labels, (height, width), method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
predicted_labels = tf.argmax(logits, -1, output_type=tf.int32) predicted_labels = tf.argmax(logits, -1, output_type=tf.int32)
flat_predictions = tf.reshape(predicted_labels, [batch_size, -1]) flat_predictions = tf.reshape(predicted_labels, [batch_size, -1])
flat_labels = tf.cast(tf.reshape(labels, [batch_size, -1]), tf.int32) flat_labels = tf.cast(tf.reshape(labels, [batch_size, -1]), tf.int32)
......
...@@ -243,6 +243,7 @@ class SpineNetMobile(tf.keras.Model): ...@@ -243,6 +243,7 @@ class SpineNetMobile(tf.keras.Model):
in_filters=in_filters, in_filters=in_filters,
out_filters=out_filters, out_filters=out_filters,
strides=strides, strides=strides,
se_gating_activation='hard_sigmoid',
se_ratio=se_ratio, se_ratio=se_ratio,
expand_ratio=expand_ratio, expand_ratio=expand_ratio,
stochastic_depth_drop_rate=stochastic_depth_drop_rate, stochastic_depth_drop_rate=stochastic_depth_drop_rate,
...@@ -364,15 +365,21 @@ class SpineNetMobile(tf.keras.Model): ...@@ -364,15 +365,21 @@ class SpineNetMobile(tf.keras.Model):
parent_weights = [ parent_weights = [
tf.nn.relu(tf.cast(tf.Variable(1.0, name='block{}_fusion{}'.format( tf.nn.relu(tf.cast(tf.Variable(1.0, name='block{}_fusion{}'.format(
i, j)), dtype=dtype)) for j in range(len(parents))] i, j)), dtype=dtype)) for j in range(len(parents))]
weights_sum = layers.Add()(parent_weights) weights_sum = parent_weights[0]
for adder in parent_weights[1:]:
weights_sum = layers.Add()([weights_sum, adder])
parents = [ parents = [
parents[i] * parent_weights[i] / (weights_sum + 0.0001) parents[i] * parent_weights[i] / (weights_sum + 0.0001)
for i in range(len(parents)) for i in range(len(parents))
] ]
# Fuse all parent nodes then build a new block. # Fuse all parent nodes then build a new block.
x = parents[0]
for adder in parents[1:]:
x = layers.Add()([x, adder])
x = tf_utils.get_activation( x = tf_utils.get_activation(
self._activation, use_keras_layer=True)(layers.Add()(parents)) self._activation, use_keras_layer=True)(x)
x = self._block_group( x = self._block_group(
inputs=x, inputs=x,
in_filters=target_num_filters, in_filters=target_num_filters,
......
...@@ -233,8 +233,9 @@ class SegmentationHead(tf.keras.layers.Layer): ...@@ -233,8 +233,9 @@ class SegmentationHead(tf.keras.layers.Layer):
prediction layer. prediction layer.
upsample_factor: An `int` number to specify the upsampling factor to upsample_factor: An `int` number to specify the upsampling factor to
generate finer mask. Default 1 means no upsampling is applied. generate finer mask. Default 1 means no upsampling is applied.
feature_fusion: One of `deeplabv3plus`, `pyramid_fusion`, feature_fusion: One of the constants in nn_layers.FeatureFusion, namely
`panoptic_fpn_fusion`, or None. If `deeplabv3plus`, features from `deeplabv3plus`, `pyramid_fusion`, `panoptic_fpn_fusion`,
`deeplabv3plus_sum_to_merge`, or None. If `deeplabv3plus`, features from
decoder_features[level] will be fused with low level feature maps from decoder_features[level] will be fused with low level feature maps from
backbone. If `pyramid_fusion`, multiscale features will be resized and backbone. If `pyramid_fusion`, multiscale features will be resized and
fused at the target level. fused at the target level.
...@@ -245,10 +246,12 @@ class SegmentationHead(tf.keras.layers.Layer): ...@@ -245,10 +246,12 @@ class SegmentationHead(tf.keras.layers.Layer):
feature fusion. It is only used when feature_fusion is set to feature fusion. It is only used when feature_fusion is set to
`panoptic_fpn_fusion`. `panoptic_fpn_fusion`.
low_level: An `int` of backbone level to be used for feature fusion. It is low_level: An `int` of backbone level to be used for feature fusion. It is
used when feature_fusion is set to `deeplabv3plus`. used when feature_fusion is set to `deeplabv3plus` or
`deeplabv3plus_sum_to_merge`.
low_level_num_filters: An `int` of reduced number of filters for the low low_level_num_filters: An `int` of reduced number of filters for the low
level features before fusing it with higher level features. It is only level features before fusing it with higher level features. It is only
used when feature_fusion is set to `deeplabv3plus`. used when feature_fusion is set to `deeplabv3plus` or
`deeplabv3plus_sum_to_merge`.
num_decoder_filters: An `int` of number of filters in the decoder outputs. num_decoder_filters: An `int` of number of filters in the decoder outputs.
It is only used when feature_fusion is set to `panoptic_fpn_fusion`. It is only used when feature_fusion is set to `panoptic_fpn_fusion`.
activation: A `str` that indicates which activation is used, e.g. 'relu', activation: A `str` that indicates which activation is used, e.g. 'relu',
...@@ -312,7 +315,8 @@ class SegmentationHead(tf.keras.layers.Layer): ...@@ -312,7 +315,8 @@ class SegmentationHead(tf.keras.layers.Layer):
'epsilon': self._config_dict['norm_epsilon'], 'epsilon': self._config_dict['norm_epsilon'],
} }
if self._config_dict['feature_fusion'] == 'deeplabv3plus': if self._config_dict['feature_fusion'] in {'deeplabv3plus',
'deeplabv3plus_sum_to_merge'}:
# Deeplabv3+ feature fusion layers. # Deeplabv3+ feature fusion layers.
self._dlv3p_conv = conv_op( self._dlv3p_conv = conv_op(
kernel_size=1, kernel_size=1,
...@@ -398,7 +402,8 @@ class SegmentationHead(tf.keras.layers.Layer): ...@@ -398,7 +402,8 @@ class SegmentationHead(tf.keras.layers.Layer):
backbone_output = inputs[0] backbone_output = inputs[0]
decoder_output = inputs[1] decoder_output = inputs[1]
if self._config_dict['feature_fusion'] == 'deeplabv3plus': if self._config_dict['feature_fusion'] in {'deeplabv3plus',
'deeplabv3plus_sum_to_merge'}:
# deeplabv3+ feature fusion # deeplabv3+ feature fusion
x = decoder_output[str(self._config_dict['level'])] if isinstance( x = decoder_output[str(self._config_dict['level'])] if isinstance(
decoder_output, dict) else decoder_output decoder_output, dict) else decoder_output
...@@ -410,7 +415,10 @@ class SegmentationHead(tf.keras.layers.Layer): ...@@ -410,7 +415,10 @@ class SegmentationHead(tf.keras.layers.Layer):
x = tf.image.resize( x = tf.image.resize(
x, tf.shape(y)[1:3], method=tf.image.ResizeMethod.BILINEAR) x, tf.shape(y)[1:3], method=tf.image.ResizeMethod.BILINEAR)
x = tf.cast(x, dtype=y.dtype) x = tf.cast(x, dtype=y.dtype)
x = tf.concat([x, y], axis=self._bn_axis) if self._config_dict['feature_fusion'] == 'deeplabv3plus':
x = tf.concat([x, y], axis=self._bn_axis)
else:
x = tf.keras.layers.Add()([x, y])
elif self._config_dict['feature_fusion'] == 'pyramid_fusion': elif self._config_dict['feature_fusion'] == 'pyramid_fusion':
if not isinstance(decoder_output, dict): if not isinstance(decoder_output, dict):
raise ValueError('Only support dictionary decoder_output.') raise ValueError('Only support dictionary decoder_output.')
......
...@@ -30,7 +30,9 @@ class SegmentationHeadTest(parameterized.TestCase, tf.test.TestCase): ...@@ -30,7 +30,9 @@ class SegmentationHeadTest(parameterized.TestCase, tf.test.TestCase):
(2, 'panoptic_fpn_fusion', 2, 5), (2, 'panoptic_fpn_fusion', 2, 5),
(2, 'panoptic_fpn_fusion', 2, 6), (2, 'panoptic_fpn_fusion', 2, 6),
(3, 'panoptic_fpn_fusion', 3, 5), (3, 'panoptic_fpn_fusion', 3, 5),
(3, 'panoptic_fpn_fusion', 3, 6)) (3, 'panoptic_fpn_fusion', 3, 6),
(3, 'deeplabv3plus', 3, 6),
(3, 'deeplabv3plus_sum_to_merge', 3, 6))
def test_forward(self, level, feature_fusion, def test_forward(self, level, feature_fusion,
decoder_min_level, decoder_max_level): decoder_min_level, decoder_max_level):
backbone_features = { backbone_features = {
...@@ -52,6 +54,8 @@ class SegmentationHeadTest(parameterized.TestCase, tf.test.TestCase): ...@@ -52,6 +54,8 @@ class SegmentationHeadTest(parameterized.TestCase, tf.test.TestCase):
head = segmentation_heads.SegmentationHead( head = segmentation_heads.SegmentationHead(
num_classes=10, num_classes=10,
level=level, level=level,
low_level=decoder_min_level,
low_level_num_filters=64,
feature_fusion=feature_fusion, feature_fusion=feature_fusion,
decoder_min_level=decoder_min_level, decoder_min_level=decoder_min_level,
decoder_max_level=decoder_max_level, decoder_max_level=decoder_max_level,
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
"""Anchor box and labeler definition.""" """Anchor box and labeler definition."""
import collections import collections
from typing import Dict, Optional, Tuple
# Import libraries # Import libraries
...@@ -65,7 +66,7 @@ class Anchor(object): ...@@ -65,7 +66,7 @@ class Anchor(object):
self.image_size = image_size self.image_size = image_size
self.boxes = self._generate_boxes() self.boxes = self._generate_boxes()
def _generate_boxes(self): def _generate_boxes(self) -> tf.Tensor:
"""Generates multiscale anchor boxes. """Generates multiscale anchor boxes.
Returns: Returns:
...@@ -100,7 +101,7 @@ class Anchor(object): ...@@ -100,7 +101,7 @@ class Anchor(object):
boxes_all.append(boxes_l) boxes_all.append(boxes_l)
return tf.concat(boxes_all, axis=0) return tf.concat(boxes_all, axis=0)
def unpack_labels(self, labels): def unpack_labels(self, labels: tf.Tensor) -> Dict[str, tf.Tensor]:
"""Unpacks an array of labels into multiscales labels.""" """Unpacks an array of labels into multiscales labels."""
unpacked_labels = collections.OrderedDict() unpacked_labels = collections.OrderedDict()
count = 0 count = 0
...@@ -146,17 +147,24 @@ class AnchorLabeler(object): ...@@ -146,17 +147,24 @@ class AnchorLabeler(object):
force_match_for_each_col=True) force_match_for_each_col=True)
self.box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder() self.box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
def label_anchors(self, def label_anchors(
anchor_boxes, self,
gt_boxes, anchor_boxes: Dict[str, tf.Tensor],
gt_labels, gt_boxes: tf.Tensor,
gt_attributes=None, gt_labels: tf.Tensor,
gt_weights=None): gt_attributes: Optional[Dict[str, tf.Tensor]] = None,
gt_weights: Optional[tf.Tensor] = None
) -> Tuple[Dict[str, tf.Tensor], Dict[str, tf.Tensor], Dict[str, Dict[
str, tf.Tensor]], tf.Tensor, tf.Tensor]:
"""Labels anchors with ground truth inputs. """Labels anchors with ground truth inputs.
Args: Args:
anchor_boxes: A float tensor with shape [N, 4] representing anchor boxes. anchor_boxes: An ordered dictionary with keys
For each row, it stores [y0, x0, y1, x1] for four corners of a box. [min_level, min_level+1, ..., max_level]. The values are tensor with
shape [height_l, width_l, num_anchors_per_location * 4]. The height_l
and width_l represent the dimension of the feature pyramid at l-th
level. For each anchor box, the tensor stores [y0, x0, y1, x1] for the
four corners.
gt_boxes: A float tensor with shape [N, 4] representing groundtruth boxes. gt_boxes: A float tensor with shape [N, 4] representing groundtruth boxes.
For each row, it stores [y0, x0, y1, x1] for four corners of a box. For each row, it stores [y0, x0, y1, x1] for four corners of a box.
gt_labels: A integer tensor with shape [N, 1] representing groundtruth gt_labels: A integer tensor with shape [N, 1] representing groundtruth
...@@ -166,30 +174,29 @@ class AnchorLabeler(object): ...@@ -166,30 +174,29 @@ class AnchorLabeler(object):
representing groundtruth attributes. representing groundtruth attributes.
gt_weights: If not None, a float tensor with shape [N] representing gt_weights: If not None, a float tensor with shape [N] representing
groundtruth weights. groundtruth weights.
Returns: Returns:
cls_targets_dict: ordered dictionary with keys cls_targets_dict: An ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with [min_level, min_level+1, ..., max_level]. The values are tensor with
shape [height_l, width_l, num_anchors_per_location]. The height_l and shape [height_l, width_l, num_anchors_per_location]. The height_l and
width_l represent the dimension of class logits at l-th level. width_l represent the dimension of class logits at l-th level.
box_targets_dict: ordered dictionary with keys box_targets_dict: An ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with [min_level, min_level+1, ..., max_level]. The values are tensor with
shape [height_l, width_l, num_anchors_per_location * 4]. The height_l shape [height_l, width_l, num_anchors_per_location * 4]. The height_l
and width_l represent the dimension of bounding box regression output at and width_l represent the dimension of bounding box regression output at
l-th level. l-th level.
attribute_targets_dict: a dict with (name, attribute_targets) pairs. Each attribute_targets_dict: A dict with (name, attribute_targets) pairs. Each
`attribute_targets` represents an ordered dictionary with keys `attribute_targets` represents an ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with [min_level, min_level+1, ..., max_level]. The values are tensor with
shape [height_l, width_l, num_anchors_per_location * attribute_size]. shape [height_l, width_l, num_anchors_per_location * attribute_size].
The height_l and width_l represent the dimension of attribute prediction The height_l and width_l represent the dimension of attribute prediction
output at l-th level. output at l-th level.
cls_weights: A flattened Tensor with shape [batch_size, num_anchors], that cls_weights: A flattened Tensor with shape [num_anchors], that serves as
serves as masking / sample weight for classification loss. Its value masking / sample weight for classification loss. Its value is 1.0 for
is 1.0 for positive and negative matched anchors, and 0.0 for ignored positive and negative matched anchors, and 0.0 for ignored anchors.
anchors. box_weights: A flattened Tensor with shape [num_anchors], that serves as
box_weights: A flattened Tensor with shape [batch_size, num_anchors], that masking / sample weight for regression loss. Its value is 1.0 for
serves as masking / sample weight for regression loss. Its value is positive matched anchors, and 0.0 for negative and ignored anchors.
1.0 for positive matched anchors, and 0.0 for negative and ignored
anchors.
""" """
flattened_anchor_boxes = [] flattened_anchor_boxes = []
for anchors in anchor_boxes.values(): for anchors in anchor_boxes.values():
...@@ -286,25 +293,33 @@ class RpnAnchorLabeler(AnchorLabeler): ...@@ -286,25 +293,33 @@ class RpnAnchorLabeler(AnchorLabeler):
return (ignore_labels + positive_labels + negative_labels, return (ignore_labels + positive_labels + negative_labels,
positive_labels, negative_labels) positive_labels, negative_labels)
def label_anchors(self, anchor_boxes, gt_boxes, gt_labels): def label_anchors(
self, anchor_boxes: Dict[str, tf.Tensor], gt_boxes: tf.Tensor,
gt_labels: tf.Tensor
) -> Tuple[Dict[str, tf.Tensor], Dict[str, tf.Tensor]]:
"""Labels anchors with ground truth inputs. """Labels anchors with ground truth inputs.
Args: Args:
anchor_boxes: A float tensor with shape [N, 4] representing anchor boxes. anchor_boxes: An ordered dictionary with keys
For each row, it stores [y0, x0, y1, x1] for four corners of a box. [min_level, min_level+1, ..., max_level]. The values are tensor with
shape [height_l, width_l, num_anchors_per_location * 4]. The height_l
and width_l represent the dimension of the feature pyramid at l-th
level. For each anchor box, the tensor stores [y0, x0, y1, x1] for the
four corners.
gt_boxes: A float tensor with shape [N, 4] representing groundtruth boxes. gt_boxes: A float tensor with shape [N, 4] representing groundtruth boxes.
For each row, it stores [y0, x0, y1, x1] for four corners of a box. For each row, it stores [y0, x0, y1, x1] for four corners of a box.
gt_labels: A integer tensor with shape [N, 1] representing groundtruth gt_labels: A integer tensor with shape [N, 1] representing groundtruth
classes. classes.
Returns: Returns:
score_targets_dict: ordered dictionary with keys score_targets_dict: An ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with [min_level, min_level+1, ..., max_level]. The values are tensor with
shape [height_l, width_l, num_anchors]. The height_l and width_l shape [height_l, width_l, num_anchors_per_location]. The height_l and
represent the dimension of class logits at l-th level. width_l represent the dimension of class logits at l-th level.
box_targets_dict: ordered dictionary with keys box_targets_dict: An ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with [min_level, min_level+1, ..., max_level]. The values are tensor with
shape [height_l, width_l, num_anchors * 4]. The height_l and shape [height_l, width_l, num_anchors_per_location * 4]. The height_l
width_l represent the dimension of bounding box regression output at and width_l represent the dimension of bounding box regression output at
l-th level. l-th level.
""" """
flattened_anchor_boxes = [] flattened_anchor_boxes = []
...@@ -362,8 +377,27 @@ def build_anchor_generator(min_level, max_level, num_scales, aspect_ratios, ...@@ -362,8 +377,27 @@ def build_anchor_generator(min_level, max_level, num_scales, aspect_ratios,
return anchor_gen return anchor_gen
def unpack_targets(targets, anchor_boxes_dict): def unpack_targets(
"""Unpacks an array of labels into multiscales labels.""" targets: tf.Tensor,
anchor_boxes_dict: Dict[str, tf.Tensor]) -> Dict[str, tf.Tensor]:
"""Unpacks an array of labels into multiscales labels.
Args:
targets: A tensor with shape [num_anchors, M] representing the packed
targets with M values stored for each anchor.
anchor_boxes_dict: An ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with shape
[height_l, width_l, num_anchors_per_location * 4]. The height_l and
width_l represent the dimension of the feature pyramid at l-th level. For
each anchor box, the tensor stores [y0, x0, y1, x1] for the four corners.
Returns:
unpacked_targets: An ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with shape
[height_l, width_l, num_anchors_per_location * M]. The height_l and
width_l represent the dimension of the feature pyramid at l-th level. M is
the number of values stored for each anchor.
"""
unpacked_targets = collections.OrderedDict() unpacked_targets = collections.OrderedDict()
count = 0 count = 0
for level, anchor_boxes in anchor_boxes_dict.items(): for level, anchor_boxes in anchor_boxes_dict.items():
......
...@@ -32,7 +32,8 @@ class ExportModule(export_base.ExportModule, metaclass=abc.ABCMeta): ...@@ -32,7 +32,8 @@ class ExportModule(export_base.ExportModule, metaclass=abc.ABCMeta):
input_image_size: List[int], input_image_size: List[int],
input_type: str = 'image_tensor', input_type: str = 'image_tensor',
num_channels: int = 3, num_channels: int = 3,
model: Optional[tf.keras.Model] = None): model: Optional[tf.keras.Model] = None,
input_name: Optional[str] = None):
"""Initializes a module for export. """Initializes a module for export.
Args: Args:
...@@ -43,12 +44,14 @@ class ExportModule(export_base.ExportModule, metaclass=abc.ABCMeta): ...@@ -43,12 +44,14 @@ class ExportModule(export_base.ExportModule, metaclass=abc.ABCMeta):
input_type: The input signature type. input_type: The input signature type.
num_channels: The number of the image channels. num_channels: The number of the image channels.
model: A tf.keras.Model instance to be exported. model: A tf.keras.Model instance to be exported.
input_name: A customized input tensor name.
""" """
self.params = params self.params = params
self._batch_size = batch_size self._batch_size = batch_size
self._input_image_size = input_image_size self._input_image_size = input_image_size
self._num_channels = num_channels self._num_channels = num_channels
self._input_type = input_type self._input_type = input_type
self._input_name = input_name
if model is None: if model is None:
model = self._build_model() # pylint: disable=assignment-from-none model = self._build_model() # pylint: disable=assignment-from-none
super().__init__(params=params, model=model) super().__init__(params=params, model=model)
...@@ -163,19 +166,20 @@ class ExportModule(export_base.ExportModule, metaclass=abc.ABCMeta): ...@@ -163,19 +166,20 @@ class ExportModule(export_base.ExportModule, metaclass=abc.ABCMeta):
input_signature = tf.TensorSpec( input_signature = tf.TensorSpec(
shape=[self._batch_size] + [None] * len(self._input_image_size) + shape=[self._batch_size] + [None] * len(self._input_image_size) +
[self._num_channels], [self._num_channels],
dtype=tf.uint8) dtype=tf.uint8,
name=self._input_name)
signatures[ signatures[
def_name] = self.inference_from_image_tensors.get_concrete_function( def_name] = self.inference_from_image_tensors.get_concrete_function(
input_signature) input_signature)
elif key == 'image_bytes': elif key == 'image_bytes':
input_signature = tf.TensorSpec( input_signature = tf.TensorSpec(
shape=[self._batch_size], dtype=tf.string) shape=[self._batch_size], dtype=tf.string, name=self._input_name)
signatures[ signatures[
def_name] = self.inference_from_image_bytes.get_concrete_function( def_name] = self.inference_from_image_bytes.get_concrete_function(
input_signature) input_signature)
elif key == 'serve_examples' or key == 'tf_example': elif key == 'serve_examples' or key == 'tf_example':
input_signature = tf.TensorSpec( input_signature = tf.TensorSpec(
shape=[self._batch_size], dtype=tf.string) shape=[self._batch_size], dtype=tf.string, name=self._input_name)
signatures[ signatures[
def_name] = self.inference_from_tf_example.get_concrete_function( def_name] = self.inference_from_tf_example.get_concrete_function(
input_signature) input_signature)
...@@ -183,7 +187,8 @@ class ExportModule(export_base.ExportModule, metaclass=abc.ABCMeta): ...@@ -183,7 +187,8 @@ class ExportModule(export_base.ExportModule, metaclass=abc.ABCMeta):
input_signature = tf.TensorSpec( input_signature = tf.TensorSpec(
shape=[self._batch_size] + self._input_image_size + shape=[self._batch_size] + self._input_image_size +
[self._num_channels], [self._num_channels],
dtype=tf.float32) dtype=tf.float32,
name=self._input_name)
signatures[def_name] = self.inference_for_tflite.get_concrete_function( signatures[def_name] = self.inference_for_tflite.get_concrete_function(
input_signature) input_signature)
else: else:
......
...@@ -45,11 +45,12 @@ from official.vision.serving import export_saved_model_lib ...@@ -45,11 +45,12 @@ from official.vision.serving import export_saved_model_lib
FLAGS = flags.FLAGS FLAGS = flags.FLAGS
flags.DEFINE_string('experiment', None, _EXPERIMENT = flags.DEFINE_string(
'experiment type, e.g. retinanet_resnetfpn_coco') 'experiment', None, 'experiment type, e.g. retinanet_resnetfpn_coco')
flags.DEFINE_string('export_dir', None, 'The export directory.') _EXPORT_DIR = flags.DEFINE_string('export_dir', None, 'The export directory.')
flags.DEFINE_string('checkpoint_path', None, 'Checkpoint path.') _CHECKPOINT_PATH = flags.DEFINE_string('checkpoint_path', None,
flags.DEFINE_multi_string( 'Checkpoint path.')
_CONFIG_FILE = flags.DEFINE_multi_string(
'config_file', 'config_file',
default=None, default=None,
help='YAML/JSON files which specifies overrides. The override order ' help='YAML/JSON files which specifies overrides. The override order '
...@@ -58,49 +59,57 @@ flags.DEFINE_multi_string( ...@@ -58,49 +59,57 @@ flags.DEFINE_multi_string(
'specified in Python. If the same parameter is specified in both ' 'specified in Python. If the same parameter is specified in both '
'`--config_file` and `--params_override`, `config_file` will be used ' '`--config_file` and `--params_override`, `config_file` will be used '
'first, followed by params_override.') 'first, followed by params_override.')
flags.DEFINE_string( _PARAMS_OVERRIDE = flags.DEFINE_string(
'params_override', '', 'params_override', '',
'The JSON/YAML file or string which specifies the parameter to be overriden' 'The JSON/YAML file or string which specifies the parameter to be overriden'
' on top of `config_file` template.') ' on top of `config_file` template.')
flags.DEFINE_integer('batch_size', None, 'The batch size.') _BATCH_SIZSE = flags.DEFINE_integer('batch_size', None, 'The batch size.')
flags.DEFINE_string( _IMAGE_TYPE = flags.DEFINE_string(
'input_type', 'image_tensor', 'input_type', 'image_tensor',
'One of `image_tensor`, `image_bytes`, `tf_example` and `tflite`.') 'One of `image_tensor`, `image_bytes`, `tf_example` and `tflite`.')
flags.DEFINE_string( _INPUT_IMAGE_SIZE = flags.DEFINE_string(
'input_image_size', '224,224', 'input_image_size', '224,224',
'The comma-separated string of two integers representing the height,width ' 'The comma-separated string of two integers representing the height,width '
'of the input to the model.') 'of the input to the model.')
flags.DEFINE_string('export_checkpoint_subdir', 'checkpoint', _EXPORT_CHECKPOINT_SUBDIR = flags.DEFINE_string(
'The subdirectory for checkpoints.') 'export_checkpoint_subdir', 'checkpoint',
flags.DEFINE_string('export_saved_model_subdir', 'saved_model', 'The subdirectory for checkpoints.')
'The subdirectory for saved model.') _EXPORT_SAVED_MODEL_SUBDIR = flags.DEFINE_string(
flags.DEFINE_bool('log_model_flops_and_params', False, 'export_saved_model_subdir', 'saved_model',
'If true, logs model flops and parameters.') 'The subdirectory for saved model.')
_LOG_MODEL_FLOPS_AND_PARAMS = flags.DEFINE_bool(
'log_model_flops_and_params', False,
'If true, logs model flops and parameters.')
_INPUT_NAME = flags.DEFINE_string(
'input_name', None,
'Input tensor name in signature def. Default at None which'
'produces input tensor name `inputs`.')
def main(_): def main(_):
params = exp_factory.get_exp_config(FLAGS.experiment) params = exp_factory.get_exp_config(_EXPERIMENT.value)
for config_file in FLAGS.config_file or []: for config_file in _CONFIG_FILE.value or []:
params = hyperparams.override_params_dict( params = hyperparams.override_params_dict(
params, config_file, is_strict=True) params, config_file, is_strict=True)
if FLAGS.params_override: if _PARAMS_OVERRIDE.value:
params = hyperparams.override_params_dict( params = hyperparams.override_params_dict(
params, FLAGS.params_override, is_strict=True) params, _PARAMS_OVERRIDE.value, is_strict=True)
params.validate() params.validate()
params.lock() params.lock()
export_saved_model_lib.export_inference_graph( export_saved_model_lib.export_inference_graph(
input_type=FLAGS.input_type, input_type=_IMAGE_TYPE.value,
batch_size=FLAGS.batch_size, batch_size=_BATCH_SIZSE.value,
input_image_size=[int(x) for x in FLAGS.input_image_size.split(',')], input_image_size=[int(x) for x in _INPUT_IMAGE_SIZE.value.split(',')],
params=params, params=params,
checkpoint_path=FLAGS.checkpoint_path, checkpoint_path=_CHECKPOINT_PATH.value,
export_dir=FLAGS.export_dir, export_dir=_EXPORT_DIR.value,
export_checkpoint_subdir=FLAGS.export_checkpoint_subdir, export_checkpoint_subdir=_EXPORT_CHECKPOINT_SUBDIR.value,
export_saved_model_subdir=FLAGS.export_saved_model_subdir, export_saved_model_subdir=_EXPORT_SAVED_MODEL_SUBDIR.value,
log_model_flops_and_params=FLAGS.log_model_flops_and_params) log_model_flops_and_params=_LOG_MODEL_FLOPS_AND_PARAMS.value,
input_name=_INPUT_NAME.value)
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -43,7 +43,8 @@ def export_inference_graph( ...@@ -43,7 +43,8 @@ def export_inference_graph(
export_saved_model_subdir: Optional[str] = None, export_saved_model_subdir: Optional[str] = None,
save_options: Optional[tf.saved_model.SaveOptions] = None, save_options: Optional[tf.saved_model.SaveOptions] = None,
log_model_flops_and_params: bool = False, log_model_flops_and_params: bool = False,
checkpoint: Optional[tf.train.Checkpoint] = None): checkpoint: Optional[tf.train.Checkpoint] = None,
input_name: Optional[str] = None):
"""Exports inference graph for the model specified in the exp config. """Exports inference graph for the model specified in the exp config.
Saved model is stored at export_dir/saved_model, checkpoint is saved Saved model is stored at export_dir/saved_model, checkpoint is saved
...@@ -69,6 +70,8 @@ def export_inference_graph( ...@@ -69,6 +70,8 @@ def export_inference_graph(
and model parameters to model_params.txt. and model parameters to model_params.txt.
checkpoint: An optional tf.train.Checkpoint. If provided, the export module checkpoint: An optional tf.train.Checkpoint. If provided, the export module
will use it to read the weights. will use it to read the weights.
input_name: The input tensor name, default at `None` which produces input
tensor name `inputs`.
""" """
if export_checkpoint_subdir: if export_checkpoint_subdir:
...@@ -92,7 +95,8 @@ def export_inference_graph( ...@@ -92,7 +95,8 @@ def export_inference_graph(
batch_size=batch_size, batch_size=batch_size,
input_image_size=input_image_size, input_image_size=input_image_size,
input_type=input_type, input_type=input_type,
num_channels=num_channels) num_channels=num_channels,
input_name=input_name)
elif isinstance(params.task, configs.retinanet.RetinaNetTask) or isinstance( elif isinstance(params.task, configs.retinanet.RetinaNetTask) or isinstance(
params.task, configs.maskrcnn.MaskRCNNTask): params.task, configs.maskrcnn.MaskRCNNTask):
export_module = detection.DetectionModule( export_module = detection.DetectionModule(
...@@ -100,7 +104,8 @@ def export_inference_graph( ...@@ -100,7 +104,8 @@ def export_inference_graph(
batch_size=batch_size, batch_size=batch_size,
input_image_size=input_image_size, input_image_size=input_image_size,
input_type=input_type, input_type=input_type,
num_channels=num_channels) num_channels=num_channels,
input_name=input_name)
elif isinstance(params.task, elif isinstance(params.task,
configs.semantic_segmentation.SemanticSegmentationTask): configs.semantic_segmentation.SemanticSegmentationTask):
export_module = semantic_segmentation.SegmentationModule( export_module = semantic_segmentation.SegmentationModule(
...@@ -108,7 +113,8 @@ def export_inference_graph( ...@@ -108,7 +113,8 @@ def export_inference_graph(
batch_size=batch_size, batch_size=batch_size,
input_image_size=input_image_size, input_image_size=input_image_size,
input_type=input_type, input_type=input_type,
num_channels=num_channels) num_channels=num_channels,
input_name=input_name)
elif isinstance(params.task, elif isinstance(params.task,
configs.video_classification.VideoClassificationTask): configs.video_classification.VideoClassificationTask):
export_module = video_classification.VideoClassificationModule( export_module = video_classification.VideoClassificationModule(
...@@ -116,7 +122,8 @@ def export_inference_graph( ...@@ -116,7 +122,8 @@ def export_inference_graph(
batch_size=batch_size, batch_size=batch_size,
input_image_size=input_image_size, input_image_size=input_image_size,
input_type=input_type, input_type=input_type,
num_channels=num_channels) num_channels=num_channels,
input_name=input_name)
else: else:
raise ValueError('Export module not implemented for {} task.'.format( raise ValueError('Export module not implemented for {} task.'.format(
type(params.task))) type(params.task)))
......
...@@ -59,6 +59,10 @@ class RetinaNetTask(base_task.Task): ...@@ -59,6 +59,10 @@ class RetinaNetTask(base_task.Task):
input_specs=input_specs, input_specs=input_specs,
model_config=self.task_config.model, model_config=self.task_config.model,
l2_regularizer=l2_regularizer) l2_regularizer=l2_regularizer)
if self.task_config.freeze_backbone:
model.backbone.trainable = False
return model return model
def initialize(self, model: tf.keras.Model): def initialize(self, model: tf.keras.Model):
......
...@@ -166,7 +166,7 @@ class SemanticSegmentationTask(base_task.Task): ...@@ -166,7 +166,7 @@ class SemanticSegmentationTask(base_task.Task):
**kwargs: other args. **kwargs: other args.
""" """
for metric in metrics: for metric in metrics:
if 'mask_scores_mse' is metric.name: if 'mask_scores_mse' == metric.name:
actual_mask_scores = segmentation_losses.get_actual_mask_scores( actual_mask_scores = segmentation_losses.get_actual_mask_scores(
model_outputs['logits'], labels['masks'], model_outputs['logits'], labels['masks'],
self.task_config.losses.ignore_label) self.task_config.losses.ignore_label)
......
...@@ -77,8 +77,8 @@ class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler): ...@@ -77,8 +77,8 @@ class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler):
tf.zeros(input_length, tf.int32)) tf.zeros(input_length, tf.int32))
num_sampled_pos = tf.reduce_sum( num_sampled_pos = tf.reduce_sum(
input_tensor=tf.cast(valid_positive_index, tf.int32)) input_tensor=tf.cast(valid_positive_index, tf.int32))
max_num_positive_samples = tf.constant( max_num_positive_samples = tf.cast(
int(sample_size * self._positive_fraction), tf.int32) tf.cast(sample_size, tf.float32) * self._positive_fraction, tf.int32)
num_positive_samples = tf.minimum(max_num_positive_samples, num_sampled_pos) num_positive_samples = tf.minimum(max_num_positive_samples, num_sampled_pos)
num_negative_samples = tf.constant(sample_size, num_negative_samples = tf.constant(sample_size,
tf.int32) - num_positive_samples tf.int32) - num_positive_samples
...@@ -219,7 +219,7 @@ class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler): ...@@ -219,7 +219,7 @@ class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler):
indicator: boolean tensor of shape [N] whose True entries can be sampled. indicator: boolean tensor of shape [N] whose True entries can be sampled.
batch_size: desired batch size. If None, keeps all positive samples and batch_size: desired batch size. If None, keeps all positive samples and
randomly selects negative samples so that the positive sample fraction randomly selects negative samples so that the positive sample fraction
matches self._positive_fraction. It cannot be None is is_static is True. matches self._positive_fraction. It cannot be None if is_static is True.
labels: boolean tensor of shape [N] denoting positive(=True) and negative labels: boolean tensor of shape [N] denoting positive(=True) and negative
(=False) examples. (=False) examples.
scope: name scope. scope: name scope.
...@@ -259,7 +259,9 @@ class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler): ...@@ -259,7 +259,9 @@ class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler):
max_num_pos = tf.reduce_sum( max_num_pos = tf.reduce_sum(
input_tensor=tf.cast(positive_idx, dtype=tf.int32)) input_tensor=tf.cast(positive_idx, dtype=tf.int32))
else: else:
max_num_pos = int(self._positive_fraction * batch_size) max_num_pos = tf.cast(
self._positive_fraction * tf.cast(batch_size, tf.float32),
tf.int32)
sampled_pos_idx = self.subsample_indicator(positive_idx, max_num_pos) sampled_pos_idx = self.subsample_indicator(positive_idx, max_num_pos)
num_sampled_pos = tf.reduce_sum( num_sampled_pos = tf.reduce_sum(
input_tensor=tf.cast(sampled_pos_idx, tf.int32)) input_tensor=tf.cast(sampled_pos_idx, tf.int32))
......
...@@ -54,7 +54,7 @@ def area(boxlist, scope=None): ...@@ -54,7 +54,7 @@ def area(boxlist, scope=None):
Returns: Returns:
a tensor with shape [N] representing box areas. a tensor with shape [N] representing box areas.
""" """
with tf.name_scope(scope, 'Area'): with tf.name_scope(scope or 'Area'):
y_min, x_min, y_max, x_max = tf.split( y_min, x_min, y_max, x_max = tf.split(
value=boxlist.get(), num_or_size_splits=4, axis=1) value=boxlist.get(), num_or_size_splits=4, axis=1)
return tf.squeeze((y_max - y_min) * (x_max - x_min), [1]) return tf.squeeze((y_max - y_min) * (x_max - x_min), [1])
...@@ -71,7 +71,7 @@ def height_width(boxlist, scope=None): ...@@ -71,7 +71,7 @@ def height_width(boxlist, scope=None):
Height: A tensor with shape [N] representing box heights. Height: A tensor with shape [N] representing box heights.
Width: A tensor with shape [N] representing box widths. Width: A tensor with shape [N] representing box widths.
""" """
with tf.name_scope(scope, 'HeightWidth'): with tf.name_scope(scope or 'HeightWidth'):
y_min, x_min, y_max, x_max = tf.split( y_min, x_min, y_max, x_max = tf.split(
value=boxlist.get(), num_or_size_splits=4, axis=1) value=boxlist.get(), num_or_size_splits=4, axis=1)
return tf.squeeze(y_max - y_min, [1]), tf.squeeze(x_max - x_min, [1]) return tf.squeeze(y_max - y_min, [1]), tf.squeeze(x_max - x_min, [1])
...@@ -89,7 +89,7 @@ def scale(boxlist, y_scale, x_scale, scope=None): ...@@ -89,7 +89,7 @@ def scale(boxlist, y_scale, x_scale, scope=None):
Returns: Returns:
boxlist: BoxList holding N boxes boxlist: BoxList holding N boxes
""" """
with tf.name_scope(scope, 'Scale'): with tf.name_scope(scope or 'Scale'):
y_scale = tf.cast(y_scale, tf.float32) y_scale = tf.cast(y_scale, tf.float32)
x_scale = tf.cast(x_scale, tf.float32) x_scale = tf.cast(x_scale, tf.float32)
y_min, x_min, y_max, x_max = tf.split( y_min, x_min, y_max, x_max = tf.split(
...@@ -121,7 +121,7 @@ def clip_to_window(boxlist, window, filter_nonoverlapping=True, scope=None): ...@@ -121,7 +121,7 @@ def clip_to_window(boxlist, window, filter_nonoverlapping=True, scope=None):
Returns: Returns:
a BoxList holding M_out boxes where M_out <= M_in a BoxList holding M_out boxes where M_out <= M_in
""" """
with tf.name_scope(scope, 'ClipToWindow'): with tf.name_scope(scope or 'ClipToWindow'):
y_min, x_min, y_max, x_max = tf.split( y_min, x_min, y_max, x_max = tf.split(
value=boxlist.get(), num_or_size_splits=4, axis=1) value=boxlist.get(), num_or_size_splits=4, axis=1)
win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window) win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
...@@ -160,7 +160,7 @@ def prune_outside_window(boxlist, window, scope=None): ...@@ -160,7 +160,7 @@ def prune_outside_window(boxlist, window, scope=None):
valid_indices: a tensor with shape [M_out] indexing the valid bounding boxes valid_indices: a tensor with shape [M_out] indexing the valid bounding boxes
in the input tensor. in the input tensor.
""" """
with tf.name_scope(scope, 'PruneOutsideWindow'): with tf.name_scope(scope or 'PruneOutsideWindow'):
y_min, x_min, y_max, x_max = tf.split( y_min, x_min, y_max, x_max = tf.split(
value=boxlist.get(), num_or_size_splits=4, axis=1) value=boxlist.get(), num_or_size_splits=4, axis=1)
win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window) win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
...@@ -194,7 +194,7 @@ def prune_completely_outside_window(boxlist, window, scope=None): ...@@ -194,7 +194,7 @@ def prune_completely_outside_window(boxlist, window, scope=None):
valid_indices: a tensor with shape [M_out] indexing the valid bounding boxes valid_indices: a tensor with shape [M_out] indexing the valid bounding boxes
in the input tensor. in the input tensor.
""" """
with tf.name_scope(scope, 'PruneCompleteleyOutsideWindow'): with tf.name_scope(scope or 'PruneCompleteleyOutsideWindow'):
y_min, x_min, y_max, x_max = tf.split( y_min, x_min, y_max, x_max = tf.split(
value=boxlist.get(), num_or_size_splits=4, axis=1) value=boxlist.get(), num_or_size_splits=4, axis=1)
win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window) win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
...@@ -220,7 +220,7 @@ def intersection(boxlist1, boxlist2, scope=None): ...@@ -220,7 +220,7 @@ def intersection(boxlist1, boxlist2, scope=None):
Returns: Returns:
a tensor with shape [N, M] representing pairwise intersections a tensor with shape [N, M] representing pairwise intersections
""" """
with tf.name_scope(scope, 'Intersection'): with tf.name_scope(scope or 'Intersection'):
y_min1, x_min1, y_max1, x_max1 = tf.split( y_min1, x_min1, y_max1, x_max1 = tf.split(
value=boxlist1.get(), num_or_size_splits=4, axis=1) value=boxlist1.get(), num_or_size_splits=4, axis=1)
y_min2, x_min2, y_max2, x_max2 = tf.split( y_min2, x_min2, y_max2, x_max2 = tf.split(
...@@ -245,7 +245,7 @@ def matched_intersection(boxlist1, boxlist2, scope=None): ...@@ -245,7 +245,7 @@ def matched_intersection(boxlist1, boxlist2, scope=None):
Returns: Returns:
a tensor with shape [N] representing pairwise intersections a tensor with shape [N] representing pairwise intersections
""" """
with tf.name_scope(scope, 'MatchedIntersection'): with tf.name_scope(scope or 'MatchedIntersection'):
y_min1, x_min1, y_max1, x_max1 = tf.split( y_min1, x_min1, y_max1, x_max1 = tf.split(
value=boxlist1.get(), num_or_size_splits=4, axis=1) value=boxlist1.get(), num_or_size_splits=4, axis=1)
y_min2, x_min2, y_max2, x_max2 = tf.split( y_min2, x_min2, y_max2, x_max2 = tf.split(
...@@ -270,7 +270,7 @@ def iou(boxlist1, boxlist2, scope=None): ...@@ -270,7 +270,7 @@ def iou(boxlist1, boxlist2, scope=None):
Returns: Returns:
a tensor with shape [N, M] representing pairwise iou scores. a tensor with shape [N, M] representing pairwise iou scores.
""" """
with tf.name_scope(scope, 'IOU'): with tf.name_scope(scope or 'IOU'):
intersections = intersection(boxlist1, boxlist2) intersections = intersection(boxlist1, boxlist2)
areas1 = area(boxlist1) areas1 = area(boxlist1)
areas2 = area(boxlist2) areas2 = area(boxlist2)
...@@ -292,7 +292,7 @@ def matched_iou(boxlist1, boxlist2, scope=None): ...@@ -292,7 +292,7 @@ def matched_iou(boxlist1, boxlist2, scope=None):
Returns: Returns:
a tensor with shape [N] representing pairwise iou scores. a tensor with shape [N] representing pairwise iou scores.
""" """
with tf.name_scope(scope, 'MatchedIOU'): with tf.name_scope(scope or 'MatchedIOU'):
intersections = matched_intersection(boxlist1, boxlist2) intersections = matched_intersection(boxlist1, boxlist2)
areas1 = area(boxlist1) areas1 = area(boxlist1)
areas2 = area(boxlist2) areas2 = area(boxlist2)
...@@ -317,7 +317,7 @@ def ioa(boxlist1, boxlist2, scope=None): ...@@ -317,7 +317,7 @@ def ioa(boxlist1, boxlist2, scope=None):
Returns: Returns:
a tensor with shape [N, M] representing pairwise ioa scores. a tensor with shape [N, M] representing pairwise ioa scores.
""" """
with tf.name_scope(scope, 'IOA'): with tf.name_scope(scope or 'IOA'):
intersections = intersection(boxlist1, boxlist2) intersections = intersection(boxlist1, boxlist2)
areas = tf.expand_dims(area(boxlist2), 0) areas = tf.expand_dims(area(boxlist2), 0)
return tf.truediv(intersections, areas) return tf.truediv(intersections, areas)
...@@ -344,7 +344,7 @@ def prune_non_overlapping_boxes(boxlist1, ...@@ -344,7 +344,7 @@ def prune_non_overlapping_boxes(boxlist1,
keep_inds: A tensor with shape [N'] indexing kept bounding boxes in the keep_inds: A tensor with shape [N'] indexing kept bounding boxes in the
first input BoxList `boxlist1`. first input BoxList `boxlist1`.
""" """
with tf.name_scope(scope, 'PruneNonOverlappingBoxes'): with tf.name_scope(scope or 'PruneNonOverlappingBoxes'):
ioa_ = ioa(boxlist2, boxlist1) # [M, N] tensor ioa_ = ioa(boxlist2, boxlist1) # [M, N] tensor
ioa_ = tf.reduce_max(ioa_, reduction_indices=[0]) # [N] tensor ioa_ = tf.reduce_max(ioa_, reduction_indices=[0]) # [N] tensor
keep_bool = tf.greater_equal(ioa_, tf.constant(min_overlap)) keep_bool = tf.greater_equal(ioa_, tf.constant(min_overlap))
...@@ -364,7 +364,7 @@ def prune_small_boxes(boxlist, min_side, scope=None): ...@@ -364,7 +364,7 @@ def prune_small_boxes(boxlist, min_side, scope=None):
Returns: Returns:
A pruned boxlist. A pruned boxlist.
""" """
with tf.name_scope(scope, 'PruneSmallBoxes'): with tf.name_scope(scope or 'PruneSmallBoxes'):
height, width = height_width(boxlist) height, width = height_width(boxlist)
is_valid = tf.logical_and( is_valid = tf.logical_and(
tf.greater_equal(width, min_side), tf.greater_equal(height, min_side)) tf.greater_equal(width, min_side), tf.greater_equal(height, min_side))
...@@ -391,7 +391,7 @@ def change_coordinate_frame(boxlist, window, scope=None): ...@@ -391,7 +391,7 @@ def change_coordinate_frame(boxlist, window, scope=None):
Returns: Returns:
Returns a BoxList object with N boxes. Returns a BoxList object with N boxes.
""" """
with tf.name_scope(scope, 'ChangeCoordinateFrame'): with tf.name_scope(scope or 'ChangeCoordinateFrame'):
win_height = window[2] - window[0] win_height = window[2] - window[0]
win_width = window[3] - window[1] win_width = window[3] - window[1]
boxlist_new = scale( boxlist_new = scale(
...@@ -423,7 +423,7 @@ def sq_dist(boxlist1, boxlist2, scope=None): ...@@ -423,7 +423,7 @@ def sq_dist(boxlist1, boxlist2, scope=None):
Returns: Returns:
a tensor with shape [N, M] representing pairwise distances a tensor with shape [N, M] representing pairwise distances
""" """
with tf.name_scope(scope, 'SqDist'): with tf.name_scope(scope or 'SqDist'):
sqnorm1 = tf.reduce_sum(tf.square(boxlist1.get()), 1, keep_dims=True) sqnorm1 = tf.reduce_sum(tf.square(boxlist1.get()), 1, keep_dims=True)
sqnorm2 = tf.reduce_sum(tf.square(boxlist2.get()), 1, keep_dims=True) sqnorm2 = tf.reduce_sum(tf.square(boxlist2.get()), 1, keep_dims=True)
innerprod = tf.matmul( innerprod = tf.matmul(
...@@ -463,7 +463,7 @@ def boolean_mask(boxlist, ...@@ -463,7 +463,7 @@ def boolean_mask(boxlist,
Raises: Raises:
ValueError: if `indicator` is not a rank-1 boolean tensor. ValueError: if `indicator` is not a rank-1 boolean tensor.
""" """
with tf.name_scope(scope, 'BooleanMask'): with tf.name_scope(scope or 'BooleanMask'):
if indicator.shape.ndims != 1: if indicator.shape.ndims != 1:
raise ValueError('indicator should have rank 1') raise ValueError('indicator should have rank 1')
if indicator.dtype != tf.bool: if indicator.dtype != tf.bool:
...@@ -521,7 +521,7 @@ def gather(boxlist, indices, fields=None, scope=None, use_static_shapes=False): ...@@ -521,7 +521,7 @@ def gather(boxlist, indices, fields=None, scope=None, use_static_shapes=False):
ValueError: if specified field is not contained in boxlist or if the ValueError: if specified field is not contained in boxlist or if the
indices are not of type int32 indices are not of type int32
""" """
with tf.name_scope(scope, 'Gather'): with tf.name_scope(scope or 'Gather'):
if len(indices.shape.as_list()) != 1: if len(indices.shape.as_list()) != 1:
raise ValueError('indices should have rank 1') raise ValueError('indices should have rank 1')
if indices.dtype != tf.int32 and indices.dtype != tf.int64: if indices.dtype != tf.int32 and indices.dtype != tf.int64:
...@@ -562,7 +562,7 @@ def concatenate(boxlists, fields=None, scope=None): ...@@ -562,7 +562,7 @@ def concatenate(boxlists, fields=None, scope=None):
contains non BoxList objects), or if requested fields are not contained in contains non BoxList objects), or if requested fields are not contained in
all boxlists all boxlists
""" """
with tf.name_scope(scope, 'Concatenate'): with tf.name_scope(scope or 'Concatenate'):
if not isinstance(boxlists, list): if not isinstance(boxlists, list):
raise ValueError('boxlists should be a list') raise ValueError('boxlists should be a list')
if not boxlists: if not boxlists:
...@@ -612,7 +612,7 @@ def sort_by_field(boxlist, field, order=SortOrder.descend, scope=None): ...@@ -612,7 +612,7 @@ def sort_by_field(boxlist, field, order=SortOrder.descend, scope=None):
ValueError: if specified field does not exist ValueError: if specified field does not exist
ValueError: if the order is not either descend or ascend ValueError: if the order is not either descend or ascend
""" """
with tf.name_scope(scope, 'SortByField'): with tf.name_scope(scope or 'SortByField'):
if order != SortOrder.descend and order != SortOrder.ascend: if order != SortOrder.descend and order != SortOrder.ascend:
raise ValueError('Invalid sort order') raise ValueError('Invalid sort order')
...@@ -653,7 +653,7 @@ def visualize_boxes_in_image(image, boxlist, normalized=False, scope=None): ...@@ -653,7 +653,7 @@ def visualize_boxes_in_image(image, boxlist, normalized=False, scope=None):
Returns: Returns:
image_and_boxes: an image tensor with shape [height, width, 3] image_and_boxes: an image tensor with shape [height, width, 3]
""" """
with tf.name_scope(scope, 'VisualizeBoxesInImage'): with tf.name_scope(scope or 'VisualizeBoxesInImage'):
if not normalized: if not normalized:
height, width, _ = tf.unstack(tf.shape(image)) height, width, _ = tf.unstack(tf.shape(image))
boxlist = scale(boxlist, 1.0 / tf.cast(height, tf.float32), boxlist = scale(boxlist, 1.0 / tf.cast(height, tf.float32),
...@@ -679,7 +679,7 @@ def filter_field_value_equals(boxlist, field, value, scope=None): ...@@ -679,7 +679,7 @@ def filter_field_value_equals(boxlist, field, value, scope=None):
ValueError: if boxlist not a BoxList object or if it does not have ValueError: if boxlist not a BoxList object or if it does not have
the specified field. the specified field.
""" """
with tf.name_scope(scope, 'FilterFieldValueEquals'): with tf.name_scope(scope or 'FilterFieldValueEquals'):
if not isinstance(boxlist, box_list.BoxList): if not isinstance(boxlist, box_list.BoxList):
raise ValueError('boxlist must be a BoxList') raise ValueError('boxlist must be a BoxList')
if not boxlist.has_field(field): if not boxlist.has_field(field):
...@@ -710,7 +710,7 @@ def filter_greater_than(boxlist, thresh, scope=None): ...@@ -710,7 +710,7 @@ def filter_greater_than(boxlist, thresh, scope=None):
ValueError: if boxlist not a BoxList object or if it does not ValueError: if boxlist not a BoxList object or if it does not
have a scores field have a scores field
""" """
with tf.name_scope(scope, 'FilterGreaterThan'): with tf.name_scope(scope or 'FilterGreaterThan'):
if not isinstance(boxlist, box_list.BoxList): if not isinstance(boxlist, box_list.BoxList):
raise ValueError('boxlist must be a BoxList') raise ValueError('boxlist must be a BoxList')
if not boxlist.has_field('scores'): if not boxlist.has_field('scores'):
...@@ -746,7 +746,7 @@ def non_max_suppression(boxlist, thresh, max_output_size, scope=None): ...@@ -746,7 +746,7 @@ def non_max_suppression(boxlist, thresh, max_output_size, scope=None):
Raises: Raises:
ValueError: if thresh is not in [0, 1] ValueError: if thresh is not in [0, 1]
""" """
with tf.name_scope(scope, 'NonMaxSuppression'): with tf.name_scope(scope or 'NonMaxSuppression'):
if not 0 <= thresh <= 1.0: if not 0 <= thresh <= 1.0:
raise ValueError('thresh must be between 0 and 1') raise ValueError('thresh must be between 0 and 1')
if not isinstance(boxlist, box_list.BoxList): if not isinstance(boxlist, box_list.BoxList):
...@@ -802,7 +802,7 @@ def to_normalized_coordinates(boxlist, ...@@ -802,7 +802,7 @@ def to_normalized_coordinates(boxlist,
Returns: Returns:
boxlist with normalized coordinates in [0, 1]. boxlist with normalized coordinates in [0, 1].
""" """
with tf.name_scope(scope, 'ToNormalizedCoordinates'): with tf.name_scope(scope or 'ToNormalizedCoordinates'):
height = tf.cast(height, tf.float32) height = tf.cast(height, tf.float32)
width = tf.cast(width, tf.float32) width = tf.cast(width, tf.float32)
...@@ -842,7 +842,7 @@ def to_absolute_coordinates(boxlist, ...@@ -842,7 +842,7 @@ def to_absolute_coordinates(boxlist,
boxlist with absolute coordinates in terms of the image size. boxlist with absolute coordinates in terms of the image size.
""" """
with tf.name_scope(scope, 'ToAbsoluteCoordinates'): with tf.name_scope(scope or 'ToAbsoluteCoordinates'):
height = tf.cast(height, tf.float32) height = tf.cast(height, tf.float32)
width = tf.cast(width, tf.float32) width = tf.cast(width, tf.float32)
...@@ -987,10 +987,9 @@ def box_voting(selected_boxes, pool_boxes, iou_thresh=0.5): ...@@ -987,10 +987,9 @@ def box_voting(selected_boxes, pool_boxes, iou_thresh=0.5):
# match to any boxes in pool_boxes. For such boxes without any matches, we # match to any boxes in pool_boxes. For such boxes without any matches, we
# should return the original boxes without voting. # should return the original boxes without voting.
match_assert = tf.Assert( match_assert = tf.Assert(
tf.reduce_all(tf.greater(num_matches, 0)), [ tf.reduce_all(tf.greater(num_matches, 0)),
'Each box in selected_boxes must match with at least one box ' 'Each box in selected_boxes must match with at least one box '
'in pool_boxes.' 'in pool_boxes.')
])
scores = tf.expand_dims(pool_boxes.get_field('scores'), 1) scores = tf.expand_dims(pool_boxes.get_field('scores'), 1)
scores_assert = tf.Assert( scores_assert = tf.Assert(
...@@ -1024,7 +1023,7 @@ def get_minimal_coverage_box(boxlist, default_box=None, scope=None): ...@@ -1024,7 +1023,7 @@ def get_minimal_coverage_box(boxlist, default_box=None, scope=None):
boxes in the box list. If the boxlist does not contain any boxes, the boxes in the box list. If the boxlist does not contain any boxes, the
default box is returned. default box is returned.
""" """
with tf.name_scope(scope, 'CreateCoverageBox'): with tf.name_scope(scope or 'CreateCoverageBox'):
num_boxes = boxlist.num_boxes() num_boxes = boxlist.num_boxes()
def coverage_box(bboxes): def coverage_box(bboxes):
...@@ -1068,7 +1067,7 @@ def sample_boxes_by_jittering(boxlist, ...@@ -1068,7 +1067,7 @@ def sample_boxes_by_jittering(boxlist,
sampled_boxlist: A boxlist containing num_boxes_to_sample boxes in sampled_boxlist: A boxlist containing num_boxes_to_sample boxes in
normalized coordinates. normalized coordinates.
""" """
with tf.name_scope(scope, 'SampleBoxesByJittering'): with tf.name_scope(scope or 'SampleBoxesByJittering'):
num_boxes = boxlist.num_boxes() num_boxes = boxlist.num_boxes()
box_indices = tf.random_uniform([num_boxes_to_sample], box_indices = tf.random_uniform([num_boxes_to_sample],
minval=0, minval=0,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment