Unverified Commit 44f6d511 authored by Srihari Humbarwadi's avatar Srihari Humbarwadi Committed by GitHub
Browse files

Merge branch 'tensorflow:master' into panoptic-deeplab

parents 686a287d 8bc5a1a5
# --experiment_type=retinanet_mobile_coco
# COCO mAP: 27.6
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'bfloat16'
......@@ -26,7 +27,7 @@ task:
max_level: 7
min_level: 3
norm_activation:
activation: 'swish'
activation: 'hard_swish'
norm_epsilon: 0.001
norm_momentum: 0.99
use_sync_bn: true
......
# --experiment_type=retinanet_mobile_coco
# COCO mAP: 23.5
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'bfloat16'
......@@ -26,7 +27,7 @@ task:
max_level: 7
min_level: 3
norm_activation:
activation: 'swish'
activation: 'hard_swish'
norm_epsilon: 0.001
norm_momentum: 0.99
use_sync_bn: true
......
# --experiment_type=retinanet_mobile_coco
# COCO mAP: 16.8
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'bfloat16'
......@@ -26,7 +27,7 @@ task:
max_level: 7
min_level: 3
norm_activation:
activation: 'swish'
activation: 'hard_swish'
norm_epsilon: 0.001
norm_momentum: 0.99
use_sync_bn: true
......
......@@ -126,13 +126,6 @@ class DetectionGenerator(hyperparams.Config):
tflite_post_processing: common.TFLitePostProcessingConfig = common.TFLitePostProcessingConfig(
)
max_detections: int = 200
max_classes_per_detection: int = 5
# Regular NMS run in a multi-class fashion and is slow. Setting it to False
# uses class-agnostic NMS, which is faster.
use_regular_nms: bool = False
nms_score_threshold: float = 0.1
@dataclasses.dataclass
class RetinaNet(hyperparams.Config):
......@@ -174,6 +167,10 @@ class RetinaNetTask(cfg.TaskConfig):
# If set, the Waymo Open Dataset evaluator would be used.
use_wod_metrics: bool = False
# If set, freezes the backbone during training.
# TODO(crisnv) Add paper link when available.
freeze_backbone: bool = False
@exp_factory.register_config_factory('retinanet')
def retinanet() -> cfg.ExperimentConfig:
......
......@@ -68,6 +68,11 @@ flags.DEFINE_boolean(
'default: False.')
flags.DEFINE_string('output_file_prefix', '/tmp/train', 'Path to output file')
flags.DEFINE_integer('num_shards', 32, 'Number of shards for output file.')
_NUM_PROCESSES = flags.DEFINE_integer(
'num_processes', None,
('Number of parallel processes to use. '
'If set to 0, disables multi-processing.'))
FLAGS = flags.FLAGS
......@@ -518,7 +523,8 @@ def _create_tf_record_from_coco_annotations(images_info_file,
include_masks=include_masks)
num_skipped = tfrecord_lib.write_tf_record_dataset(
output_path, coco_annotations_iter, create_tf_example, num_shards)
output_path, coco_annotations_iter, create_tf_example, num_shards,
multiple_processes=_NUM_PROCESSES.value)
logging.info('Finished writing, skipped %d annotations.', num_skipped)
......
......@@ -26,6 +26,9 @@ import tensorflow as tf
import multiprocessing as mp
LOG_EVERY = 100
def convert_to_feature(value, value_type=None):
"""Converts the given python object to a tf.train.Feature.
......@@ -114,7 +117,7 @@ def encode_mask_as_png(mask):
def write_tf_record_dataset(output_path, annotation_iterator,
process_func, num_shards,
use_multiprocessing=True, unpack_arguments=True):
multiple_processes=None, unpack_arguments=True):
"""Iterates over annotations, processes them and writes into TFRecords.
Args:
......@@ -125,7 +128,10 @@ def write_tf_record_dataset(output_path, annotation_iterator,
annotation_iterator as arguments and returns a tuple of (tf.train.Example,
int). The integer indicates the number of annotations that were skipped.
num_shards: int, the number of shards to write for the dataset.
use_multiprocessing:
multiple_processes: integer, the number of multiple parallel processes to
use. If None, uses multi-processing with number of processes equal to
`os.cpu_count()`, which is Python's default behavior. If set to 0,
multi-processing is disabled.
Whether or not to use multiple processes to write TF Records.
unpack_arguments:
Whether to unpack the tuples from annotation_iterator as individual
......@@ -143,8 +149,9 @@ def write_tf_record_dataset(output_path, annotation_iterator,
total_num_annotations_skipped = 0
if use_multiprocessing:
pool = mp.Pool()
if multiple_processes is None or multiple_processes > 0:
pool = mp.Pool(
processes=multiple_processes)
if unpack_arguments:
tf_example_iterator = pool.starmap(process_func, annotation_iterator)
else:
......@@ -157,13 +164,13 @@ def write_tf_record_dataset(output_path, annotation_iterator,
for idx, (tf_example, num_annotations_skipped) in enumerate(
tf_example_iterator):
if idx % 100 == 0:
if idx % LOG_EVERY == 0:
logging.info('On image %d', idx)
total_num_annotations_skipped += num_annotations_skipped
writers[idx % num_shards].write(tf_example.SerializeToString())
if use_multiprocessing:
if multiple_processes is None or multiple_processes > 0:
pool.close()
pool.join()
......
......@@ -47,7 +47,7 @@ class TfrecordLibTest(parameterized.TestCase):
path = os.path.join(FLAGS.test_tmpdir, 'train')
tfrecord_lib.write_tf_record_dataset(
path, data, process_sample, 3, use_multiprocessing=False)
path, data, process_sample, 3, multiple_processes=0)
tfrecord_files = tf.io.gfile.glob(path + '*')
self.assertLen(tfrecord_files, 3)
......
......@@ -277,12 +277,12 @@ class PanopticQuality:
np.sum(in_category_set.astype(np.int32)),
})
else:
results[category_set_name] = {
results.update({
f'{category_set_name}_pq': 0.,
f'{category_set_name}_sq': 0.,
f'{category_set_name}_rq': 0.,
f'{category_set_name}_num_categories': 0
}
})
return results
......
......@@ -33,14 +33,13 @@ class SegmentationLoss:
self._use_groundtruth_dimension = use_groundtruth_dimension
self._label_smoothing = label_smoothing
def __call__(self, logits, labels):
def __call__(self, logits, labels, **kwargs):
_, height, width, num_classes = logits.get_shape().as_list()
if self._use_groundtruth_dimension:
# TODO(arashwan): Test using align corners to match deeplab alignment.
logits = tf.image.resize(
logits, tf.shape(labels)[1:3],
method=tf.image.ResizeMethod.BILINEAR)
logits, tf.shape(labels)[1:3], method=tf.image.ResizeMethod.BILINEAR)
else:
labels = tf.image.resize(
labels, (height, width),
......@@ -54,11 +53,9 @@ class SegmentationLoss:
labels = tf.squeeze(tf.cast(labels, tf.int32), axis=3)
valid_mask = tf.squeeze(tf.cast(valid_mask, tf.float32), axis=3)
onehot_labels = tf.one_hot(labels, num_classes)
onehot_labels = onehot_labels * (
1 - self._label_smoothing) + self._label_smoothing / num_classes
cross_entropy_loss = tf.nn.softmax_cross_entropy_with_logits(
labels=onehot_labels, logits=logits)
labels=self.get_labels_with_prob(labels, logits, **kwargs),
logits=logits)
if not self._class_weights:
class_weights = [1] * num_classes
......@@ -90,6 +87,26 @@ class SegmentationLoss:
return loss
def get_labels_with_prob(self, labels, logits, **unused_kwargs):
"""Get a tensor representing the probability of each class for each pixel.
This method can be overridden in subclasses for customizing loss function.
Args:
labels: A float tensor in shape (batch_size, height, width), which is the
label map of the ground truth.
logits: A float tensor in shape (batch_size, height, width, num_classes)
which is the output of the network.
**unused_kwargs: Unused keyword arguments.
Returns:
A float tensor in shape (batch_size, height, width, num_classes).
"""
num_classes = logits.get_shape().as_list()[-1]
onehot_labels = tf.one_hot(labels, num_classes)
return onehot_labels * (
1 - self._label_smoothing) + self._label_smoothing / num_classes
def get_actual_mask_scores(logits, labels, ignore_label):
"""Gets actual mask scores."""
......@@ -97,8 +114,7 @@ def get_actual_mask_scores(logits, labels, ignore_label):
batch_size = tf.shape(logits)[0]
logits = tf.stop_gradient(logits)
labels = tf.image.resize(
labels, (height, width),
method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
labels, (height, width), method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
predicted_labels = tf.argmax(logits, -1, output_type=tf.int32)
flat_predictions = tf.reshape(predicted_labels, [batch_size, -1])
flat_labels = tf.cast(tf.reshape(labels, [batch_size, -1]), tf.int32)
......
......@@ -243,6 +243,7 @@ class SpineNetMobile(tf.keras.Model):
in_filters=in_filters,
out_filters=out_filters,
strides=strides,
se_gating_activation='hard_sigmoid',
se_ratio=se_ratio,
expand_ratio=expand_ratio,
stochastic_depth_drop_rate=stochastic_depth_drop_rate,
......@@ -364,15 +365,21 @@ class SpineNetMobile(tf.keras.Model):
parent_weights = [
tf.nn.relu(tf.cast(tf.Variable(1.0, name='block{}_fusion{}'.format(
i, j)), dtype=dtype)) for j in range(len(parents))]
weights_sum = layers.Add()(parent_weights)
weights_sum = parent_weights[0]
for adder in parent_weights[1:]:
weights_sum = layers.Add()([weights_sum, adder])
parents = [
parents[i] * parent_weights[i] / (weights_sum + 0.0001)
for i in range(len(parents))
]
# Fuse all parent nodes then build a new block.
x = parents[0]
for adder in parents[1:]:
x = layers.Add()([x, adder])
x = tf_utils.get_activation(
self._activation, use_keras_layer=True)(layers.Add()(parents))
self._activation, use_keras_layer=True)(x)
x = self._block_group(
inputs=x,
in_filters=target_num_filters,
......
......@@ -233,8 +233,9 @@ class SegmentationHead(tf.keras.layers.Layer):
prediction layer.
upsample_factor: An `int` number to specify the upsampling factor to
generate finer mask. Default 1 means no upsampling is applied.
feature_fusion: One of `deeplabv3plus`, `pyramid_fusion`,
`panoptic_fpn_fusion`, or None. If `deeplabv3plus`, features from
feature_fusion: One of the constants in nn_layers.FeatureFusion, namely
`deeplabv3plus`, `pyramid_fusion`, `panoptic_fpn_fusion`,
`deeplabv3plus_sum_to_merge`, or None. If `deeplabv3plus`, features from
decoder_features[level] will be fused with low level feature maps from
backbone. If `pyramid_fusion`, multiscale features will be resized and
fused at the target level.
......@@ -245,10 +246,12 @@ class SegmentationHead(tf.keras.layers.Layer):
feature fusion. It is only used when feature_fusion is set to
`panoptic_fpn_fusion`.
low_level: An `int` of backbone level to be used for feature fusion. It is
used when feature_fusion is set to `deeplabv3plus`.
used when feature_fusion is set to `deeplabv3plus` or
`deeplabv3plus_sum_to_merge`.
low_level_num_filters: An `int` of reduced number of filters for the low
level features before fusing it with higher level features. It is only
used when feature_fusion is set to `deeplabv3plus`.
used when feature_fusion is set to `deeplabv3plus` or
`deeplabv3plus_sum_to_merge`.
num_decoder_filters: An `int` of number of filters in the decoder outputs.
It is only used when feature_fusion is set to `panoptic_fpn_fusion`.
activation: A `str` that indicates which activation is used, e.g. 'relu',
......@@ -312,7 +315,8 @@ class SegmentationHead(tf.keras.layers.Layer):
'epsilon': self._config_dict['norm_epsilon'],
}
if self._config_dict['feature_fusion'] == 'deeplabv3plus':
if self._config_dict['feature_fusion'] in {'deeplabv3plus',
'deeplabv3plus_sum_to_merge'}:
# Deeplabv3+ feature fusion layers.
self._dlv3p_conv = conv_op(
kernel_size=1,
......@@ -398,7 +402,8 @@ class SegmentationHead(tf.keras.layers.Layer):
backbone_output = inputs[0]
decoder_output = inputs[1]
if self._config_dict['feature_fusion'] == 'deeplabv3plus':
if self._config_dict['feature_fusion'] in {'deeplabv3plus',
'deeplabv3plus_sum_to_merge'}:
# deeplabv3+ feature fusion
x = decoder_output[str(self._config_dict['level'])] if isinstance(
decoder_output, dict) else decoder_output
......@@ -410,7 +415,10 @@ class SegmentationHead(tf.keras.layers.Layer):
x = tf.image.resize(
x, tf.shape(y)[1:3], method=tf.image.ResizeMethod.BILINEAR)
x = tf.cast(x, dtype=y.dtype)
x = tf.concat([x, y], axis=self._bn_axis)
if self._config_dict['feature_fusion'] == 'deeplabv3plus':
x = tf.concat([x, y], axis=self._bn_axis)
else:
x = tf.keras.layers.Add()([x, y])
elif self._config_dict['feature_fusion'] == 'pyramid_fusion':
if not isinstance(decoder_output, dict):
raise ValueError('Only support dictionary decoder_output.')
......
......@@ -30,7 +30,9 @@ class SegmentationHeadTest(parameterized.TestCase, tf.test.TestCase):
(2, 'panoptic_fpn_fusion', 2, 5),
(2, 'panoptic_fpn_fusion', 2, 6),
(3, 'panoptic_fpn_fusion', 3, 5),
(3, 'panoptic_fpn_fusion', 3, 6))
(3, 'panoptic_fpn_fusion', 3, 6),
(3, 'deeplabv3plus', 3, 6),
(3, 'deeplabv3plus_sum_to_merge', 3, 6))
def test_forward(self, level, feature_fusion,
decoder_min_level, decoder_max_level):
backbone_features = {
......@@ -52,6 +54,8 @@ class SegmentationHeadTest(parameterized.TestCase, tf.test.TestCase):
head = segmentation_heads.SegmentationHead(
num_classes=10,
level=level,
low_level=decoder_min_level,
low_level_num_filters=64,
feature_fusion=feature_fusion,
decoder_min_level=decoder_min_level,
decoder_max_level=decoder_max_level,
......
......@@ -15,6 +15,7 @@
"""Anchor box and labeler definition."""
import collections
from typing import Dict, Optional, Tuple
# Import libraries
......@@ -65,7 +66,7 @@ class Anchor(object):
self.image_size = image_size
self.boxes = self._generate_boxes()
def _generate_boxes(self):
def _generate_boxes(self) -> tf.Tensor:
"""Generates multiscale anchor boxes.
Returns:
......@@ -100,7 +101,7 @@ class Anchor(object):
boxes_all.append(boxes_l)
return tf.concat(boxes_all, axis=0)
def unpack_labels(self, labels):
def unpack_labels(self, labels: tf.Tensor) -> Dict[str, tf.Tensor]:
"""Unpacks an array of labels into multiscales labels."""
unpacked_labels = collections.OrderedDict()
count = 0
......@@ -146,17 +147,24 @@ class AnchorLabeler(object):
force_match_for_each_col=True)
self.box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
def label_anchors(self,
anchor_boxes,
gt_boxes,
gt_labels,
gt_attributes=None,
gt_weights=None):
def label_anchors(
self,
anchor_boxes: Dict[str, tf.Tensor],
gt_boxes: tf.Tensor,
gt_labels: tf.Tensor,
gt_attributes: Optional[Dict[str, tf.Tensor]] = None,
gt_weights: Optional[tf.Tensor] = None
) -> Tuple[Dict[str, tf.Tensor], Dict[str, tf.Tensor], Dict[str, Dict[
str, tf.Tensor]], tf.Tensor, tf.Tensor]:
"""Labels anchors with ground truth inputs.
Args:
anchor_boxes: A float tensor with shape [N, 4] representing anchor boxes.
For each row, it stores [y0, x0, y1, x1] for four corners of a box.
anchor_boxes: An ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with
shape [height_l, width_l, num_anchors_per_location * 4]. The height_l
and width_l represent the dimension of the feature pyramid at l-th
level. For each anchor box, the tensor stores [y0, x0, y1, x1] for the
four corners.
gt_boxes: A float tensor with shape [N, 4] representing groundtruth boxes.
For each row, it stores [y0, x0, y1, x1] for four corners of a box.
gt_labels: A integer tensor with shape [N, 1] representing groundtruth
......@@ -166,30 +174,29 @@ class AnchorLabeler(object):
representing groundtruth attributes.
gt_weights: If not None, a float tensor with shape [N] representing
groundtruth weights.
Returns:
cls_targets_dict: ordered dictionary with keys
cls_targets_dict: An ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with
shape [height_l, width_l, num_anchors_per_location]. The height_l and
width_l represent the dimension of class logits at l-th level.
box_targets_dict: ordered dictionary with keys
box_targets_dict: An ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with
shape [height_l, width_l, num_anchors_per_location * 4]. The height_l
and width_l represent the dimension of bounding box regression output at
l-th level.
attribute_targets_dict: a dict with (name, attribute_targets) pairs. Each
attribute_targets_dict: A dict with (name, attribute_targets) pairs. Each
`attribute_targets` represents an ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with
shape [height_l, width_l, num_anchors_per_location * attribute_size].
The height_l and width_l represent the dimension of attribute prediction
output at l-th level.
cls_weights: A flattened Tensor with shape [batch_size, num_anchors], that
serves as masking / sample weight for classification loss. Its value
is 1.0 for positive and negative matched anchors, and 0.0 for ignored
anchors.
box_weights: A flattened Tensor with shape [batch_size, num_anchors], that
serves as masking / sample weight for regression loss. Its value is
1.0 for positive matched anchors, and 0.0 for negative and ignored
anchors.
cls_weights: A flattened Tensor with shape [num_anchors], that serves as
masking / sample weight for classification loss. Its value is 1.0 for
positive and negative matched anchors, and 0.0 for ignored anchors.
box_weights: A flattened Tensor with shape [num_anchors], that serves as
masking / sample weight for regression loss. Its value is 1.0 for
positive matched anchors, and 0.0 for negative and ignored anchors.
"""
flattened_anchor_boxes = []
for anchors in anchor_boxes.values():
......@@ -286,25 +293,33 @@ class RpnAnchorLabeler(AnchorLabeler):
return (ignore_labels + positive_labels + negative_labels,
positive_labels, negative_labels)
def label_anchors(self, anchor_boxes, gt_boxes, gt_labels):
def label_anchors(
self, anchor_boxes: Dict[str, tf.Tensor], gt_boxes: tf.Tensor,
gt_labels: tf.Tensor
) -> Tuple[Dict[str, tf.Tensor], Dict[str, tf.Tensor]]:
"""Labels anchors with ground truth inputs.
Args:
anchor_boxes: A float tensor with shape [N, 4] representing anchor boxes.
For each row, it stores [y0, x0, y1, x1] for four corners of a box.
anchor_boxes: An ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with
shape [height_l, width_l, num_anchors_per_location * 4]. The height_l
and width_l represent the dimension of the feature pyramid at l-th
level. For each anchor box, the tensor stores [y0, x0, y1, x1] for the
four corners.
gt_boxes: A float tensor with shape [N, 4] representing groundtruth boxes.
For each row, it stores [y0, x0, y1, x1] for four corners of a box.
gt_labels: A integer tensor with shape [N, 1] representing groundtruth
classes.
Returns:
score_targets_dict: ordered dictionary with keys
score_targets_dict: An ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with
shape [height_l, width_l, num_anchors]. The height_l and width_l
represent the dimension of class logits at l-th level.
box_targets_dict: ordered dictionary with keys
shape [height_l, width_l, num_anchors_per_location]. The height_l and
width_l represent the dimension of class logits at l-th level.
box_targets_dict: An ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with
shape [height_l, width_l, num_anchors * 4]. The height_l and
width_l represent the dimension of bounding box regression output at
shape [height_l, width_l, num_anchors_per_location * 4]. The height_l
and width_l represent the dimension of bounding box regression output at
l-th level.
"""
flattened_anchor_boxes = []
......@@ -362,8 +377,27 @@ def build_anchor_generator(min_level, max_level, num_scales, aspect_ratios,
return anchor_gen
def unpack_targets(targets, anchor_boxes_dict):
"""Unpacks an array of labels into multiscales labels."""
def unpack_targets(
targets: tf.Tensor,
anchor_boxes_dict: Dict[str, tf.Tensor]) -> Dict[str, tf.Tensor]:
"""Unpacks an array of labels into multiscales labels.
Args:
targets: A tensor with shape [num_anchors, M] representing the packed
targets with M values stored for each anchor.
anchor_boxes_dict: An ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with shape
[height_l, width_l, num_anchors_per_location * 4]. The height_l and
width_l represent the dimension of the feature pyramid at l-th level. For
each anchor box, the tensor stores [y0, x0, y1, x1] for the four corners.
Returns:
unpacked_targets: An ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with shape
[height_l, width_l, num_anchors_per_location * M]. The height_l and
width_l represent the dimension of the feature pyramid at l-th level. M is
the number of values stored for each anchor.
"""
unpacked_targets = collections.OrderedDict()
count = 0
for level, anchor_boxes in anchor_boxes_dict.items():
......
......@@ -32,7 +32,8 @@ class ExportModule(export_base.ExportModule, metaclass=abc.ABCMeta):
input_image_size: List[int],
input_type: str = 'image_tensor',
num_channels: int = 3,
model: Optional[tf.keras.Model] = None):
model: Optional[tf.keras.Model] = None,
input_name: Optional[str] = None):
"""Initializes a module for export.
Args:
......@@ -43,12 +44,14 @@ class ExportModule(export_base.ExportModule, metaclass=abc.ABCMeta):
input_type: The input signature type.
num_channels: The number of the image channels.
model: A tf.keras.Model instance to be exported.
input_name: A customized input tensor name.
"""
self.params = params
self._batch_size = batch_size
self._input_image_size = input_image_size
self._num_channels = num_channels
self._input_type = input_type
self._input_name = input_name
if model is None:
model = self._build_model() # pylint: disable=assignment-from-none
super().__init__(params=params, model=model)
......@@ -163,19 +166,20 @@ class ExportModule(export_base.ExportModule, metaclass=abc.ABCMeta):
input_signature = tf.TensorSpec(
shape=[self._batch_size] + [None] * len(self._input_image_size) +
[self._num_channels],
dtype=tf.uint8)
dtype=tf.uint8,
name=self._input_name)
signatures[
def_name] = self.inference_from_image_tensors.get_concrete_function(
input_signature)
elif key == 'image_bytes':
input_signature = tf.TensorSpec(
shape=[self._batch_size], dtype=tf.string)
shape=[self._batch_size], dtype=tf.string, name=self._input_name)
signatures[
def_name] = self.inference_from_image_bytes.get_concrete_function(
input_signature)
elif key == 'serve_examples' or key == 'tf_example':
input_signature = tf.TensorSpec(
shape=[self._batch_size], dtype=tf.string)
shape=[self._batch_size], dtype=tf.string, name=self._input_name)
signatures[
def_name] = self.inference_from_tf_example.get_concrete_function(
input_signature)
......@@ -183,7 +187,8 @@ class ExportModule(export_base.ExportModule, metaclass=abc.ABCMeta):
input_signature = tf.TensorSpec(
shape=[self._batch_size] + self._input_image_size +
[self._num_channels],
dtype=tf.float32)
dtype=tf.float32,
name=self._input_name)
signatures[def_name] = self.inference_for_tflite.get_concrete_function(
input_signature)
else:
......
......@@ -45,11 +45,12 @@ from official.vision.serving import export_saved_model_lib
FLAGS = flags.FLAGS
flags.DEFINE_string('experiment', None,
'experiment type, e.g. retinanet_resnetfpn_coco')
flags.DEFINE_string('export_dir', None, 'The export directory.')
flags.DEFINE_string('checkpoint_path', None, 'Checkpoint path.')
flags.DEFINE_multi_string(
_EXPERIMENT = flags.DEFINE_string(
'experiment', None, 'experiment type, e.g. retinanet_resnetfpn_coco')
_EXPORT_DIR = flags.DEFINE_string('export_dir', None, 'The export directory.')
_CHECKPOINT_PATH = flags.DEFINE_string('checkpoint_path', None,
'Checkpoint path.')
_CONFIG_FILE = flags.DEFINE_multi_string(
'config_file',
default=None,
help='YAML/JSON files which specifies overrides. The override order '
......@@ -58,49 +59,57 @@ flags.DEFINE_multi_string(
'specified in Python. If the same parameter is specified in both '
'`--config_file` and `--params_override`, `config_file` will be used '
'first, followed by params_override.')
flags.DEFINE_string(
_PARAMS_OVERRIDE = flags.DEFINE_string(
'params_override', '',
'The JSON/YAML file or string which specifies the parameter to be overriden'
' on top of `config_file` template.')
flags.DEFINE_integer('batch_size', None, 'The batch size.')
flags.DEFINE_string(
_BATCH_SIZSE = flags.DEFINE_integer('batch_size', None, 'The batch size.')
_IMAGE_TYPE = flags.DEFINE_string(
'input_type', 'image_tensor',
'One of `image_tensor`, `image_bytes`, `tf_example` and `tflite`.')
flags.DEFINE_string(
_INPUT_IMAGE_SIZE = flags.DEFINE_string(
'input_image_size', '224,224',
'The comma-separated string of two integers representing the height,width '
'of the input to the model.')
flags.DEFINE_string('export_checkpoint_subdir', 'checkpoint',
'The subdirectory for checkpoints.')
flags.DEFINE_string('export_saved_model_subdir', 'saved_model',
'The subdirectory for saved model.')
flags.DEFINE_bool('log_model_flops_and_params', False,
'If true, logs model flops and parameters.')
_EXPORT_CHECKPOINT_SUBDIR = flags.DEFINE_string(
'export_checkpoint_subdir', 'checkpoint',
'The subdirectory for checkpoints.')
_EXPORT_SAVED_MODEL_SUBDIR = flags.DEFINE_string(
'export_saved_model_subdir', 'saved_model',
'The subdirectory for saved model.')
_LOG_MODEL_FLOPS_AND_PARAMS = flags.DEFINE_bool(
'log_model_flops_and_params', False,
'If true, logs model flops and parameters.')
_INPUT_NAME = flags.DEFINE_string(
'input_name', None,
'Input tensor name in signature def. Default at None which'
'produces input tensor name `inputs`.')
def main(_):
params = exp_factory.get_exp_config(FLAGS.experiment)
for config_file in FLAGS.config_file or []:
params = exp_factory.get_exp_config(_EXPERIMENT.value)
for config_file in _CONFIG_FILE.value or []:
params = hyperparams.override_params_dict(
params, config_file, is_strict=True)
if FLAGS.params_override:
if _PARAMS_OVERRIDE.value:
params = hyperparams.override_params_dict(
params, FLAGS.params_override, is_strict=True)
params, _PARAMS_OVERRIDE.value, is_strict=True)
params.validate()
params.lock()
export_saved_model_lib.export_inference_graph(
input_type=FLAGS.input_type,
batch_size=FLAGS.batch_size,
input_image_size=[int(x) for x in FLAGS.input_image_size.split(',')],
input_type=_IMAGE_TYPE.value,
batch_size=_BATCH_SIZSE.value,
input_image_size=[int(x) for x in _INPUT_IMAGE_SIZE.value.split(',')],
params=params,
checkpoint_path=FLAGS.checkpoint_path,
export_dir=FLAGS.export_dir,
export_checkpoint_subdir=FLAGS.export_checkpoint_subdir,
export_saved_model_subdir=FLAGS.export_saved_model_subdir,
log_model_flops_and_params=FLAGS.log_model_flops_and_params)
checkpoint_path=_CHECKPOINT_PATH.value,
export_dir=_EXPORT_DIR.value,
export_checkpoint_subdir=_EXPORT_CHECKPOINT_SUBDIR.value,
export_saved_model_subdir=_EXPORT_SAVED_MODEL_SUBDIR.value,
log_model_flops_and_params=_LOG_MODEL_FLOPS_AND_PARAMS.value,
input_name=_INPUT_NAME.value)
if __name__ == '__main__':
......
......@@ -43,7 +43,8 @@ def export_inference_graph(
export_saved_model_subdir: Optional[str] = None,
save_options: Optional[tf.saved_model.SaveOptions] = None,
log_model_flops_and_params: bool = False,
checkpoint: Optional[tf.train.Checkpoint] = None):
checkpoint: Optional[tf.train.Checkpoint] = None,
input_name: Optional[str] = None):
"""Exports inference graph for the model specified in the exp config.
Saved model is stored at export_dir/saved_model, checkpoint is saved
......@@ -69,6 +70,8 @@ def export_inference_graph(
and model parameters to model_params.txt.
checkpoint: An optional tf.train.Checkpoint. If provided, the export module
will use it to read the weights.
input_name: The input tensor name, default at `None` which produces input
tensor name `inputs`.
"""
if export_checkpoint_subdir:
......@@ -92,7 +95,8 @@ def export_inference_graph(
batch_size=batch_size,
input_image_size=input_image_size,
input_type=input_type,
num_channels=num_channels)
num_channels=num_channels,
input_name=input_name)
elif isinstance(params.task, configs.retinanet.RetinaNetTask) or isinstance(
params.task, configs.maskrcnn.MaskRCNNTask):
export_module = detection.DetectionModule(
......@@ -100,7 +104,8 @@ def export_inference_graph(
batch_size=batch_size,
input_image_size=input_image_size,
input_type=input_type,
num_channels=num_channels)
num_channels=num_channels,
input_name=input_name)
elif isinstance(params.task,
configs.semantic_segmentation.SemanticSegmentationTask):
export_module = semantic_segmentation.SegmentationModule(
......@@ -108,7 +113,8 @@ def export_inference_graph(
batch_size=batch_size,
input_image_size=input_image_size,
input_type=input_type,
num_channels=num_channels)
num_channels=num_channels,
input_name=input_name)
elif isinstance(params.task,
configs.video_classification.VideoClassificationTask):
export_module = video_classification.VideoClassificationModule(
......@@ -116,7 +122,8 @@ def export_inference_graph(
batch_size=batch_size,
input_image_size=input_image_size,
input_type=input_type,
num_channels=num_channels)
num_channels=num_channels,
input_name=input_name)
else:
raise ValueError('Export module not implemented for {} task.'.format(
type(params.task)))
......
......@@ -59,6 +59,10 @@ class RetinaNetTask(base_task.Task):
input_specs=input_specs,
model_config=self.task_config.model,
l2_regularizer=l2_regularizer)
if self.task_config.freeze_backbone:
model.backbone.trainable = False
return model
def initialize(self, model: tf.keras.Model):
......
......@@ -166,7 +166,7 @@ class SemanticSegmentationTask(base_task.Task):
**kwargs: other args.
"""
for metric in metrics:
if 'mask_scores_mse' is metric.name:
if 'mask_scores_mse' == metric.name:
actual_mask_scores = segmentation_losses.get_actual_mask_scores(
model_outputs['logits'], labels['masks'],
self.task_config.losses.ignore_label)
......
......@@ -77,8 +77,8 @@ class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler):
tf.zeros(input_length, tf.int32))
num_sampled_pos = tf.reduce_sum(
input_tensor=tf.cast(valid_positive_index, tf.int32))
max_num_positive_samples = tf.constant(
int(sample_size * self._positive_fraction), tf.int32)
max_num_positive_samples = tf.cast(
tf.cast(sample_size, tf.float32) * self._positive_fraction, tf.int32)
num_positive_samples = tf.minimum(max_num_positive_samples, num_sampled_pos)
num_negative_samples = tf.constant(sample_size,
tf.int32) - num_positive_samples
......@@ -219,7 +219,7 @@ class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler):
indicator: boolean tensor of shape [N] whose True entries can be sampled.
batch_size: desired batch size. If None, keeps all positive samples and
randomly selects negative samples so that the positive sample fraction
matches self._positive_fraction. It cannot be None is is_static is True.
matches self._positive_fraction. It cannot be None if is_static is True.
labels: boolean tensor of shape [N] denoting positive(=True) and negative
(=False) examples.
scope: name scope.
......@@ -259,7 +259,9 @@ class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler):
max_num_pos = tf.reduce_sum(
input_tensor=tf.cast(positive_idx, dtype=tf.int32))
else:
max_num_pos = int(self._positive_fraction * batch_size)
max_num_pos = tf.cast(
self._positive_fraction * tf.cast(batch_size, tf.float32),
tf.int32)
sampled_pos_idx = self.subsample_indicator(positive_idx, max_num_pos)
num_sampled_pos = tf.reduce_sum(
input_tensor=tf.cast(sampled_pos_idx, tf.int32))
......
......@@ -54,7 +54,7 @@ def area(boxlist, scope=None):
Returns:
a tensor with shape [N] representing box areas.
"""
with tf.name_scope(scope, 'Area'):
with tf.name_scope(scope or 'Area'):
y_min, x_min, y_max, x_max = tf.split(
value=boxlist.get(), num_or_size_splits=4, axis=1)
return tf.squeeze((y_max - y_min) * (x_max - x_min), [1])
......@@ -71,7 +71,7 @@ def height_width(boxlist, scope=None):
Height: A tensor with shape [N] representing box heights.
Width: A tensor with shape [N] representing box widths.
"""
with tf.name_scope(scope, 'HeightWidth'):
with tf.name_scope(scope or 'HeightWidth'):
y_min, x_min, y_max, x_max = tf.split(
value=boxlist.get(), num_or_size_splits=4, axis=1)
return tf.squeeze(y_max - y_min, [1]), tf.squeeze(x_max - x_min, [1])
......@@ -89,7 +89,7 @@ def scale(boxlist, y_scale, x_scale, scope=None):
Returns:
boxlist: BoxList holding N boxes
"""
with tf.name_scope(scope, 'Scale'):
with tf.name_scope(scope or 'Scale'):
y_scale = tf.cast(y_scale, tf.float32)
x_scale = tf.cast(x_scale, tf.float32)
y_min, x_min, y_max, x_max = tf.split(
......@@ -121,7 +121,7 @@ def clip_to_window(boxlist, window, filter_nonoverlapping=True, scope=None):
Returns:
a BoxList holding M_out boxes where M_out <= M_in
"""
with tf.name_scope(scope, 'ClipToWindow'):
with tf.name_scope(scope or 'ClipToWindow'):
y_min, x_min, y_max, x_max = tf.split(
value=boxlist.get(), num_or_size_splits=4, axis=1)
win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
......@@ -160,7 +160,7 @@ def prune_outside_window(boxlist, window, scope=None):
valid_indices: a tensor with shape [M_out] indexing the valid bounding boxes
in the input tensor.
"""
with tf.name_scope(scope, 'PruneOutsideWindow'):
with tf.name_scope(scope or 'PruneOutsideWindow'):
y_min, x_min, y_max, x_max = tf.split(
value=boxlist.get(), num_or_size_splits=4, axis=1)
win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
......@@ -194,7 +194,7 @@ def prune_completely_outside_window(boxlist, window, scope=None):
valid_indices: a tensor with shape [M_out] indexing the valid bounding boxes
in the input tensor.
"""
with tf.name_scope(scope, 'PruneCompleteleyOutsideWindow'):
with tf.name_scope(scope or 'PruneCompleteleyOutsideWindow'):
y_min, x_min, y_max, x_max = tf.split(
value=boxlist.get(), num_or_size_splits=4, axis=1)
win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
......@@ -220,7 +220,7 @@ def intersection(boxlist1, boxlist2, scope=None):
Returns:
a tensor with shape [N, M] representing pairwise intersections
"""
with tf.name_scope(scope, 'Intersection'):
with tf.name_scope(scope or 'Intersection'):
y_min1, x_min1, y_max1, x_max1 = tf.split(
value=boxlist1.get(), num_or_size_splits=4, axis=1)
y_min2, x_min2, y_max2, x_max2 = tf.split(
......@@ -245,7 +245,7 @@ def matched_intersection(boxlist1, boxlist2, scope=None):
Returns:
a tensor with shape [N] representing pairwise intersections
"""
with tf.name_scope(scope, 'MatchedIntersection'):
with tf.name_scope(scope or 'MatchedIntersection'):
y_min1, x_min1, y_max1, x_max1 = tf.split(
value=boxlist1.get(), num_or_size_splits=4, axis=1)
y_min2, x_min2, y_max2, x_max2 = tf.split(
......@@ -270,7 +270,7 @@ def iou(boxlist1, boxlist2, scope=None):
Returns:
a tensor with shape [N, M] representing pairwise iou scores.
"""
with tf.name_scope(scope, 'IOU'):
with tf.name_scope(scope or 'IOU'):
intersections = intersection(boxlist1, boxlist2)
areas1 = area(boxlist1)
areas2 = area(boxlist2)
......@@ -292,7 +292,7 @@ def matched_iou(boxlist1, boxlist2, scope=None):
Returns:
a tensor with shape [N] representing pairwise iou scores.
"""
with tf.name_scope(scope, 'MatchedIOU'):
with tf.name_scope(scope or 'MatchedIOU'):
intersections = matched_intersection(boxlist1, boxlist2)
areas1 = area(boxlist1)
areas2 = area(boxlist2)
......@@ -317,7 +317,7 @@ def ioa(boxlist1, boxlist2, scope=None):
Returns:
a tensor with shape [N, M] representing pairwise ioa scores.
"""
with tf.name_scope(scope, 'IOA'):
with tf.name_scope(scope or 'IOA'):
intersections = intersection(boxlist1, boxlist2)
areas = tf.expand_dims(area(boxlist2), 0)
return tf.truediv(intersections, areas)
......@@ -344,7 +344,7 @@ def prune_non_overlapping_boxes(boxlist1,
keep_inds: A tensor with shape [N'] indexing kept bounding boxes in the
first input BoxList `boxlist1`.
"""
with tf.name_scope(scope, 'PruneNonOverlappingBoxes'):
with tf.name_scope(scope or 'PruneNonOverlappingBoxes'):
ioa_ = ioa(boxlist2, boxlist1) # [M, N] tensor
ioa_ = tf.reduce_max(ioa_, reduction_indices=[0]) # [N] tensor
keep_bool = tf.greater_equal(ioa_, tf.constant(min_overlap))
......@@ -364,7 +364,7 @@ def prune_small_boxes(boxlist, min_side, scope=None):
Returns:
A pruned boxlist.
"""
with tf.name_scope(scope, 'PruneSmallBoxes'):
with tf.name_scope(scope or 'PruneSmallBoxes'):
height, width = height_width(boxlist)
is_valid = tf.logical_and(
tf.greater_equal(width, min_side), tf.greater_equal(height, min_side))
......@@ -391,7 +391,7 @@ def change_coordinate_frame(boxlist, window, scope=None):
Returns:
Returns a BoxList object with N boxes.
"""
with tf.name_scope(scope, 'ChangeCoordinateFrame'):
with tf.name_scope(scope or 'ChangeCoordinateFrame'):
win_height = window[2] - window[0]
win_width = window[3] - window[1]
boxlist_new = scale(
......@@ -423,7 +423,7 @@ def sq_dist(boxlist1, boxlist2, scope=None):
Returns:
a tensor with shape [N, M] representing pairwise distances
"""
with tf.name_scope(scope, 'SqDist'):
with tf.name_scope(scope or 'SqDist'):
sqnorm1 = tf.reduce_sum(tf.square(boxlist1.get()), 1, keep_dims=True)
sqnorm2 = tf.reduce_sum(tf.square(boxlist2.get()), 1, keep_dims=True)
innerprod = tf.matmul(
......@@ -463,7 +463,7 @@ def boolean_mask(boxlist,
Raises:
ValueError: if `indicator` is not a rank-1 boolean tensor.
"""
with tf.name_scope(scope, 'BooleanMask'):
with tf.name_scope(scope or 'BooleanMask'):
if indicator.shape.ndims != 1:
raise ValueError('indicator should have rank 1')
if indicator.dtype != tf.bool:
......@@ -521,7 +521,7 @@ def gather(boxlist, indices, fields=None, scope=None, use_static_shapes=False):
ValueError: if specified field is not contained in boxlist or if the
indices are not of type int32
"""
with tf.name_scope(scope, 'Gather'):
with tf.name_scope(scope or 'Gather'):
if len(indices.shape.as_list()) != 1:
raise ValueError('indices should have rank 1')
if indices.dtype != tf.int32 and indices.dtype != tf.int64:
......@@ -562,7 +562,7 @@ def concatenate(boxlists, fields=None, scope=None):
contains non BoxList objects), or if requested fields are not contained in
all boxlists
"""
with tf.name_scope(scope, 'Concatenate'):
with tf.name_scope(scope or 'Concatenate'):
if not isinstance(boxlists, list):
raise ValueError('boxlists should be a list')
if not boxlists:
......@@ -612,7 +612,7 @@ def sort_by_field(boxlist, field, order=SortOrder.descend, scope=None):
ValueError: if specified field does not exist
ValueError: if the order is not either descend or ascend
"""
with tf.name_scope(scope, 'SortByField'):
with tf.name_scope(scope or 'SortByField'):
if order != SortOrder.descend and order != SortOrder.ascend:
raise ValueError('Invalid sort order')
......@@ -653,7 +653,7 @@ def visualize_boxes_in_image(image, boxlist, normalized=False, scope=None):
Returns:
image_and_boxes: an image tensor with shape [height, width, 3]
"""
with tf.name_scope(scope, 'VisualizeBoxesInImage'):
with tf.name_scope(scope or 'VisualizeBoxesInImage'):
if not normalized:
height, width, _ = tf.unstack(tf.shape(image))
boxlist = scale(boxlist, 1.0 / tf.cast(height, tf.float32),
......@@ -679,7 +679,7 @@ def filter_field_value_equals(boxlist, field, value, scope=None):
ValueError: if boxlist not a BoxList object or if it does not have
the specified field.
"""
with tf.name_scope(scope, 'FilterFieldValueEquals'):
with tf.name_scope(scope or 'FilterFieldValueEquals'):
if not isinstance(boxlist, box_list.BoxList):
raise ValueError('boxlist must be a BoxList')
if not boxlist.has_field(field):
......@@ -710,7 +710,7 @@ def filter_greater_than(boxlist, thresh, scope=None):
ValueError: if boxlist not a BoxList object or if it does not
have a scores field
"""
with tf.name_scope(scope, 'FilterGreaterThan'):
with tf.name_scope(scope or 'FilterGreaterThan'):
if not isinstance(boxlist, box_list.BoxList):
raise ValueError('boxlist must be a BoxList')
if not boxlist.has_field('scores'):
......@@ -746,7 +746,7 @@ def non_max_suppression(boxlist, thresh, max_output_size, scope=None):
Raises:
ValueError: if thresh is not in [0, 1]
"""
with tf.name_scope(scope, 'NonMaxSuppression'):
with tf.name_scope(scope or 'NonMaxSuppression'):
if not 0 <= thresh <= 1.0:
raise ValueError('thresh must be between 0 and 1')
if not isinstance(boxlist, box_list.BoxList):
......@@ -802,7 +802,7 @@ def to_normalized_coordinates(boxlist,
Returns:
boxlist with normalized coordinates in [0, 1].
"""
with tf.name_scope(scope, 'ToNormalizedCoordinates'):
with tf.name_scope(scope or 'ToNormalizedCoordinates'):
height = tf.cast(height, tf.float32)
width = tf.cast(width, tf.float32)
......@@ -842,7 +842,7 @@ def to_absolute_coordinates(boxlist,
boxlist with absolute coordinates in terms of the image size.
"""
with tf.name_scope(scope, 'ToAbsoluteCoordinates'):
with tf.name_scope(scope or 'ToAbsoluteCoordinates'):
height = tf.cast(height, tf.float32)
width = tf.cast(width, tf.float32)
......@@ -987,10 +987,9 @@ def box_voting(selected_boxes, pool_boxes, iou_thresh=0.5):
# match to any boxes in pool_boxes. For such boxes without any matches, we
# should return the original boxes without voting.
match_assert = tf.Assert(
tf.reduce_all(tf.greater(num_matches, 0)), [
'Each box in selected_boxes must match with at least one box '
'in pool_boxes.'
])
tf.reduce_all(tf.greater(num_matches, 0)),
'Each box in selected_boxes must match with at least one box '
'in pool_boxes.')
scores = tf.expand_dims(pool_boxes.get_field('scores'), 1)
scores_assert = tf.Assert(
......@@ -1024,7 +1023,7 @@ def get_minimal_coverage_box(boxlist, default_box=None, scope=None):
boxes in the box list. If the boxlist does not contain any boxes, the
default box is returned.
"""
with tf.name_scope(scope, 'CreateCoverageBox'):
with tf.name_scope(scope or 'CreateCoverageBox'):
num_boxes = boxlist.num_boxes()
def coverage_box(bboxes):
......@@ -1068,7 +1067,7 @@ def sample_boxes_by_jittering(boxlist,
sampled_boxlist: A boxlist containing num_boxes_to_sample boxes in
normalized coordinates.
"""
with tf.name_scope(scope, 'SampleBoxesByJittering'):
with tf.name_scope(scope or 'SampleBoxesByJittering'):
num_boxes = boxlist.num_boxes()
box_indices = tf.random_uniform([num_boxes_to_sample],
minval=0,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment