Commit 88253ce5 authored by Hongkun Yu's avatar Hongkun Yu Committed by A. Unique TensorFlower
Browse files

Internal change

PiperOrigin-RevId: 326286926
parent 52371ffe
......@@ -93,9 +93,9 @@ def get_distribution_strategy(distribution_strategy="mirrored",
Args:
distribution_strategy: a string specifying which distribution strategy to
use. Accepted values are "off", "one_device", "mirrored",
"parameter_server", "multi_worker_mirrored", and "tpu" -- case insensitive.
"off" means not to use Distribution Strategy; "tpu" means to use
TPUStrategy using `tpu_address`.
"parameter_server", "multi_worker_mirrored", and "tpu" -- case
insensitive. "off" means not to use Distribution Strategy; "tpu" means to
use TPUStrategy using `tpu_address`.
num_gpus: Number of GPUs to run this model.
all_reduce_alg: Optional. Specifies which algorithm to use when performing
all-reduce. For `MirroredStrategy`, valid values are "nccl" and
......@@ -104,8 +104,9 @@ def get_distribution_strategy(distribution_strategy="mirrored",
device topology.
num_packs: Optional. Sets the `num_packs` in `tf.distribute.NcclAllReduce`
or `tf.distribute.HierarchicalCopyAllReduce` for `MirroredStrategy`.
tpu_address: Optional. String that represents TPU to connect to. Must not
be None if `distribution_strategy` is set to `tpu`.
tpu_address: Optional. String that represents TPU to connect to. Must not be
None if `distribution_strategy` is set to `tpu`.
Returns:
tf.distribute.DistibutionStrategy object.
Raises:
......@@ -119,9 +120,8 @@ def get_distribution_strategy(distribution_strategy="mirrored",
distribution_strategy = distribution_strategy.lower()
if distribution_strategy == "off":
if num_gpus > 1:
raise ValueError(
"When {} GPUs are specified, distribution_strategy "
"flag cannot be set to `off`.".format(num_gpus))
raise ValueError("When {} GPUs are specified, distribution_strategy "
"flag cannot be set to `off`.".format(num_gpus))
return None
if distribution_strategy == "tpu":
......@@ -153,8 +153,8 @@ def get_distribution_strategy(distribution_strategy="mirrored",
if distribution_strategy == "parameter_server":
return tf.distribute.experimental.ParameterServerStrategy()
raise ValueError(
"Unrecognized Distribution Strategy: %r" % distribution_strategy)
raise ValueError("Unrecognized Distribution Strategy: %r" %
distribution_strategy)
def configure_cluster(worker_hosts=None, task_index=-1):
......@@ -168,8 +168,9 @@ def configure_cluster(worker_hosts=None, task_index=-1):
"""
tf_config = json.loads(os.environ.get("TF_CONFIG", "{}"))
if tf_config:
num_workers = (len(tf_config["cluster"].get("chief", [])) +
len(tf_config["cluster"].get("worker", [])))
num_workers = (
len(tf_config["cluster"].get("chief", [])) +
len(tf_config["cluster"].get("worker", [])))
elif worker_hosts:
workers = worker_hosts.split(",")
num_workers = len(workers)
......@@ -180,7 +181,10 @@ def configure_cluster(worker_hosts=None, task_index=-1):
"cluster": {
"worker": workers
},
"task": {"type": "worker", "index": task_index}
"task": {
"type": "worker",
"index": task_index
}
})
else:
num_workers = 1
......
......@@ -25,6 +25,7 @@ from official.utils.misc import distribution_utils
class GetDistributionStrategyTest(tf.test.TestCase):
"""Tests for get_distribution_strategy."""
def test_one_device_strategy_cpu(self):
ds = distribution_utils.get_distribution_strategy(num_gpus=0)
self.assertEquals(ds.num_replicas_in_sync, 1)
......@@ -45,5 +46,5 @@ class GetDistributionStrategyTest(tf.test.TestCase):
self.assertIn('GPU', device)
if __name__ == "__main__":
if __name__ == '__main__':
tf.test.main()
......@@ -25,7 +25,6 @@ import time
from absl import logging
import tensorflow as tf
from tensorflow.python.eager import monitoring
global_batch_size_gauge = monitoring.IntGauge(
......@@ -121,8 +120,8 @@ class TimeHistory(tf.keras.callbacks.Callback):
# Record the timestamp of the first global step
if not self.timestamp_log:
self.timestamp_log.append(BatchTimestamp(self.global_steps,
self.start_time))
self.timestamp_log.append(
BatchTimestamp(self.global_steps, self.start_time))
def on_batch_end(self, batch, logs=None):
"""Records elapse time of the batch and calculates examples per second."""
......@@ -175,12 +174,12 @@ def set_session_config(enable_xla=False):
if enable_xla:
tf.config.optimizer.set_jit(True)
# TODO(hongkuny): remove set_config_v2 globally.
set_config_v2 = set_session_config
def set_gpu_thread_mode_and_count(gpu_thread_mode,
datasets_num_private_threads,
def set_gpu_thread_mode_and_count(gpu_thread_mode, datasets_num_private_threads,
num_gpus, per_gpu_thread_count):
"""Set GPU thread mode and count, and adjust dataset threads count."""
cpu_count = multiprocessing.cpu_count()
......@@ -190,10 +189,8 @@ def set_gpu_thread_mode_and_count(gpu_thread_mode,
per_gpu_thread_count = per_gpu_thread_count or 2
os.environ['TF_GPU_THREAD_MODE'] = gpu_thread_mode
os.environ['TF_GPU_THREAD_COUNT'] = str(per_gpu_thread_count)
logging.info('TF_GPU_THREAD_COUNT: %s',
os.environ['TF_GPU_THREAD_COUNT'])
logging.info('TF_GPU_THREAD_MODE: %s',
os.environ['TF_GPU_THREAD_MODE'])
logging.info('TF_GPU_THREAD_COUNT: %s', os.environ['TF_GPU_THREAD_COUNT'])
logging.info('TF_GPU_THREAD_MODE: %s', os.environ['TF_GPU_THREAD_MODE'])
# Limit data preprocessing threadpool to CPU cores minus number of total GPU
# private threads and memory copy threads.
......@@ -201,7 +198,6 @@ def set_gpu_thread_mode_and_count(gpu_thread_mode,
num_runtime_threads = num_gpus
if not datasets_num_private_threads:
datasets_num_private_threads = min(
cpu_count - total_gpu_thread_count - num_runtime_threads,
num_gpus * 8)
cpu_count - total_gpu_thread_count - num_runtime_threads, num_gpus * 8)
logging.info('Set datasets_num_private_threads to %s',
datasets_num_private_threads)
......@@ -58,9 +58,12 @@ def past_stop_threshold(stop_threshold, eval_metric):
return False
def generate_synthetic_data(
input_shape, input_value=0, input_dtype=None, label_shape=None,
label_value=0, label_dtype=None):
def generate_synthetic_data(input_shape,
input_value=0,
input_dtype=None,
label_shape=None,
label_value=0,
label_dtype=None):
"""Create a repeating dataset with constant values.
Args:
......
......@@ -51,19 +51,19 @@ class PastStopThresholdTest(tf.test.TestCase):
def test_past_stop_threshold_not_number(self):
"""Tests for error conditions."""
with self.assertRaises(ValueError):
model_helpers.past_stop_threshold("str", 1)
model_helpers.past_stop_threshold('str', 1)
with self.assertRaises(ValueError):
model_helpers.past_stop_threshold("str", tf.constant(5))
model_helpers.past_stop_threshold('str', tf.constant(5))
with self.assertRaises(ValueError):
model_helpers.past_stop_threshold("str", "another")
model_helpers.past_stop_threshold('str', 'another')
with self.assertRaises(ValueError):
model_helpers.past_stop_threshold(0, None)
with self.assertRaises(ValueError):
model_helpers.past_stop_threshold(0.7, "str")
model_helpers.past_stop_threshold(0.7, 'str')
with self.assertRaises(ValueError):
model_helpers.past_stop_threshold(tf.constant(4), None)
......@@ -74,12 +74,13 @@ class SyntheticDataTest(tf.test.TestCase):
def test_generate_synethetic_data(self):
input_element, label_element = tf.compat.v1.data.make_one_shot_iterator(
model_helpers.generate_synthetic_data(input_shape=tf.TensorShape([5]),
input_value=123,
input_dtype=tf.float32,
label_shape=tf.TensorShape([]),
label_value=456,
label_dtype=tf.int32)).get_next()
model_helpers.generate_synthetic_data(
input_shape=tf.TensorShape([5]),
input_value=123,
input_dtype=tf.float32,
label_shape=tf.TensorShape([]),
label_value=456,
label_dtype=tf.int32)).get_next()
with self.session() as sess:
for n in range(5):
......@@ -102,8 +103,13 @@ class SyntheticDataTest(tf.test.TestCase):
def test_generate_nested_data(self):
d = model_helpers.generate_synthetic_data(
input_shape={'a': tf.TensorShape([2]),
'b': {'c': tf.TensorShape([3]), 'd': tf.TensorShape([])}},
input_shape={
'a': tf.TensorShape([2]),
'b': {
'c': tf.TensorShape([3]),
'd': tf.TensorShape([])
}
},
input_value=1.1)
element = tf.compat.v1.data.make_one_shot_iterator(d).get_next()
......@@ -121,5 +127,5 @@ class SyntheticDataTest(tf.test.TestCase):
self.assertAllClose(inp['b']['d'], 1.1)
if __name__ == "__main__":
if __name__ == '__main__':
tf.test.main()
......@@ -31,18 +31,20 @@ class RegistryTest(tf.test.TestCase):
@registry.register(collection, 'functions/func_0')
def func_test():
pass
self.assertEqual(
registry.lookup(collection, 'functions/func_0'), func_test)
self.assertEqual(registry.lookup(collection, 'functions/func_0'), func_test)
@registry.register(collection, 'classes/cls_0')
class ClassRegistryKey:
pass
self.assertEqual(
registry.lookup(collection, 'classes/cls_0'), ClassRegistryKey)
@registry.register(collection, ClassRegistryKey)
class ClassRegistryValue:
pass
self.assertEqual(
registry.lookup(collection, ClassRegistryKey), ClassRegistryValue)
......@@ -52,12 +54,15 @@ class RegistryTest(tf.test.TestCase):
@registry.register(collection, 'functions/func_0')
def func_test0():
pass
@registry.register(collection, 'func_1')
def func_test1():
pass
@registry.register(collection, func_test1)
def func_test2():
pass
expected_collection = {
'functions': {
'func_0': func_test0,
......@@ -73,10 +78,13 @@ class RegistryTest(tf.test.TestCase):
@registry.register(collection, 'functions/func_0')
def func_test0(): # pylint: disable=unused-variable
pass
with self.assertRaises(KeyError):
@registry.register(collection, 'functions/func_0/sub_func')
def func_test1(): # pylint: disable=unused-variable
pass
with self.assertRaises(LookupError):
registry.lookup(collection, 'non-exist')
......
......@@ -12,8 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Helper code to run complete models from within python.
"""
"""Helper code to run complete models from within python."""
from __future__ import absolute_import
from __future__ import division
......@@ -31,7 +30,11 @@ from official.utils.flags import core as flags_core
@flagsaver.flagsaver
def run_synthetic(main, tmp_root, extra_flags=None, synth=True, train_epochs=1,
def run_synthetic(main,
tmp_root,
extra_flags=None,
synth=True,
train_epochs=1,
epochs_between_evals=1):
"""Performs a minimal run of a model.
......
......@@ -19,6 +19,7 @@ from __future__ import division
from __future__ import print_function
import collections
import tensorflow as tf
from official.vision.detection.utils.object_detection import argmax_matcher
from official.vision.detection.utils.object_detection import balanced_positive_negative_sampler
......@@ -31,30 +32,25 @@ from official.vision.detection.utils.object_detection import target_assigner
class Anchor(object):
"""Anchor class for anchor-based object detectors."""
def __init__(self,
min_level,
max_level,
num_scales,
aspect_ratios,
anchor_size,
image_size):
def __init__(self, min_level, max_level, num_scales, aspect_ratios,
anchor_size, image_size):
"""Constructs multiscale anchors.
Args:
min_level: integer number of minimum level of the output feature pyramid.
max_level: integer number of maximum level of the output feature pyramid.
num_scales: integer number representing intermediate scales added
on each level. For instances, num_scales=2 adds one additional
intermediate anchor scales [2^0, 2^0.5] on each level.
num_scales: integer number representing intermediate scales added on each
level. For instances, num_scales=2 adds one additional intermediate
anchor scales [2^0, 2^0.5] on each level.
aspect_ratios: list of float numbers representing the aspect ratio anchors
added on each level. The number indicates the ratio of width to height.
For instances, aspect_ratios=[1.0, 2.0, 0.5] adds three anchors on each
scale level.
anchor_size: float number representing the scale of size of the base
anchor to the feature stride 2^level.
image_size: a list of integer numbers or Tensors representing
[height, width] of the input image size.The image_size should be
divisible by the largest feature stride 2^max_level.
image_size: a list of integer numbers or Tensors representing [height,
width] of the input image size.The image_size should be divisible by the
largest feature stride 2^max_level.
"""
self.min_level = min_level
self.max_level = max_level
......@@ -76,11 +72,11 @@ class Anchor(object):
boxes_l = []
for scale in range(self.num_scales):
for aspect_ratio in self.aspect_ratios:
stride = 2 ** level
intermediate_scale = 2 ** (scale / float(self.num_scales))
stride = 2**level
intermediate_scale = 2**(scale / float(self.num_scales))
base_anchor_size = self.anchor_size * stride * intermediate_scale
aspect_x = aspect_ratio ** 0.5
aspect_y = aspect_ratio ** -0.5
aspect_x = aspect_ratio**0.5
aspect_y = aspect_ratio**-0.5
half_anchor_size_x = base_anchor_size * aspect_x / 2.0
half_anchor_size_y = base_anchor_size * aspect_y / 2.0
x = tf.range(stride / 2, self.image_size[1], stride)
......@@ -89,8 +85,10 @@ class Anchor(object):
xv = tf.cast(tf.reshape(xv, [-1]), dtype=tf.float32)
yv = tf.cast(tf.reshape(yv, [-1]), dtype=tf.float32)
# Tensor shape Nx4.
boxes = tf.stack([yv - half_anchor_size_y, xv - half_anchor_size_x,
yv + half_anchor_size_y, xv + half_anchor_size_x],
boxes = tf.stack([
yv - half_anchor_size_y, xv - half_anchor_size_x,
yv + half_anchor_size_y, xv + half_anchor_size_x
],
axis=1)
boxes_l.append(boxes)
# Concat anchors on the same level to tensor shape NxAx4.
......@@ -104,11 +102,11 @@ class Anchor(object):
unpacked_labels = collections.OrderedDict()
count = 0
for level in range(self.min_level, self.max_level + 1):
feat_size_y = tf.cast(self.image_size[0] / 2 ** level, tf.int32)
feat_size_x = tf.cast(self.image_size[1] / 2 ** level, tf.int32)
feat_size_y = tf.cast(self.image_size[0] / 2**level, tf.int32)
feat_size_x = tf.cast(self.image_size[1] / 2**level, tf.int32)
steps = feat_size_y * feat_size_x * self.anchors_per_location
unpacked_labels[level] = tf.reshape(
labels[count:count + steps], [feat_size_y, feat_size_x, -1])
unpacked_labels[level] = tf.reshape(labels[count:count + steps],
[feat_size_y, feat_size_x, -1])
count += steps
return unpacked_labels
......@@ -124,10 +122,7 @@ class Anchor(object):
class AnchorLabeler(object):
"""Labeler for dense object detector."""
def __init__(self,
anchor,
match_threshold=0.5,
unmatched_threshold=0.5):
def __init__(self, anchor, match_threshold=0.5, unmatched_threshold=0.5):
"""Constructs anchor labeler to assign labels to anchors.
Args:
......@@ -161,6 +156,7 @@ class AnchorLabeler(object):
For each row, it stores [y0, x0, y1, x1] for four corners of a box.
gt_labels: A integer tensor with shape [N, 1] representing groundtruth
classes.
Returns:
cls_targets_dict: ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with
......@@ -205,11 +201,14 @@ class AnchorLabeler(object):
class RpnAnchorLabeler(AnchorLabeler):
"""Labeler for Region Proposal Network."""
def __init__(self, anchor, match_threshold=0.7,
unmatched_threshold=0.3, rpn_batch_size_per_im=256,
def __init__(self,
anchor,
match_threshold=0.7,
unmatched_threshold=0.3,
rpn_batch_size_per_im=256,
rpn_fg_fraction=0.5):
AnchorLabeler.__init__(self, anchor, match_threshold=0.7,
unmatched_threshold=0.3)
AnchorLabeler.__init__(
self, anchor, match_threshold=0.7, unmatched_threshold=0.3)
self._rpn_batch_size_per_im = rpn_batch_size_per_im
self._rpn_fg_fraction = rpn_fg_fraction
......@@ -219,11 +218,12 @@ class RpnAnchorLabeler(AnchorLabeler):
This function performs subsampling for foreground (fg) and background (bg)
anchors.
Args:
match_results: A integer tensor with shape [N] representing the
matching results of anchors. (1) match_results[i]>=0,
meaning that column i is matched with row match_results[i].
(2) match_results[i]=-1, meaning that column i is not matched.
(3) match_results[i]=-2, meaning that column i is ignored.
match_results: A integer tensor with shape [N] representing the matching
results of anchors. (1) match_results[i]>=0, meaning that column i is
matched with row match_results[i]. (2) match_results[i]=-1, meaning that
column i is not matched. (3) match_results[i]=-2, meaning that column i
is ignored.
Returns:
score_targets: a integer tensor with the a shape of [N].
(1) score_targets[i]=1, the anchor is a positive sample.
......@@ -241,8 +241,7 @@ class RpnAnchorLabeler(AnchorLabeler):
indicator = tf.greater(match_results, -2)
labels = tf.greater(match_results, -1)
samples = sampler.subsample(
indicator, self._rpn_batch_size_per_im, labels)
samples = sampler.subsample(indicator, self._rpn_batch_size_per_im, labels)
positive_labels = tf.where(
tf.logical_and(samples, labels),
tf.constant(2, dtype=tf.int32, shape=match_results.shape),
......@@ -253,8 +252,8 @@ class RpnAnchorLabeler(AnchorLabeler):
tf.constant(0, dtype=tf.int32, shape=match_results.shape))
ignore_labels = tf.fill(match_results.shape, -1)
return (ignore_labels + positive_labels + negative_labels,
positive_labels, negative_labels)
return (ignore_labels + positive_labels + negative_labels, positive_labels,
negative_labels)
def label_anchors(self, gt_boxes, gt_labels):
"""Labels anchors with ground truth inputs.
......@@ -264,6 +263,7 @@ class RpnAnchorLabeler(AnchorLabeler):
For each row, it stores [y0, x0, y1, x1] for four corners of a box.
gt_labels: A integer tensor with shape [N, 1] representing groundtruth
classes.
Returns:
score_targets_dict: ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with
......
......@@ -91,7 +91,8 @@ class InputFn(object):
dataset = dataset.repeat()
dataset = dataset.interleave(
map_func=self._dataset_fn, cycle_length=32,
map_func=self._dataset_fn,
cycle_length=32,
num_parallel_calls=tf.data.experimental.AUTOTUNE)
if self._is_training:
......
......@@ -79,9 +79,9 @@ class Parser(object):
output_size should be divided by the largest feature stride 2^max_level.
min_level: `int` number of minimum level of the output feature pyramid.
max_level: `int` number of maximum level of the output feature pyramid.
num_scales: `int` number representing intermediate scales added
on each level. For instances, num_scales=2 adds one additional
intermediate anchor scales [2^0, 2^0.5] on each level.
num_scales: `int` number representing intermediate scales added on each
level. For instances, num_scales=2 adds one additional intermediate
anchor scales [2^0, 2^0.5] on each level.
aspect_ratios: `list` of float numbers representing the aspect raito
anchors added on each level. The number indicates the ratio of width to
height. For instances, aspect_ratios=[1.0, 2.0, 0.5] adds three anchors
......@@ -94,8 +94,8 @@ class Parser(object):
unmatched_threshold: `float` number between 0 and 1 representing the
upper-bound threshold to assign negative labels for anchors. An anchor
with a score below the threshold is labeled negative.
aug_rand_hflip: `bool`, if True, augment training with random
horizontal flip.
aug_rand_hflip: `bool`, if True, augment training with random horizontal
flip.
aug_scale_min: `float`, the minimum scale applied to `output_size` for
data augmentation during training.
aug_scale_max: `float`, the maximum scale applied to `output_size` for
......@@ -109,8 +109,8 @@ class Parser(object):
max_num_instances: `int` number of maximum number of instances in an
image. The groundtruth data will be padded to `max_num_instances`.
use_bfloat16: `bool`, if True, cast output image to tf.bfloat16.
mode: a ModeKeys. Specifies if this is training, evaluation, prediction
or prediction with groundtruths in the outputs.
mode: a ModeKeys. Specifies if this is training, evaluation, prediction or
prediction with groundtruths in the outputs.
"""
self._mode = mode
self._max_num_instances = max_num_instances
......@@ -232,8 +232,8 @@ class Parser(object):
image, image_info = input_utils.resize_and_crop_image(
image,
self._output_size,
padded_size=input_utils.compute_padded_size(
self._output_size, 2 ** self._max_level),
padded_size=input_utils.compute_padded_size(self._output_size,
2**self._max_level),
aug_scale_min=self._aug_scale_min,
aug_scale_max=self._aug_scale_max)
image_height, image_width, _ = image.get_shape().as_list()
......@@ -241,22 +241,21 @@ class Parser(object):
# Resizes and crops boxes.
image_scale = image_info[2, :]
offset = image_info[3, :]
boxes = input_utils.resize_and_crop_boxes(
boxes, image_scale, image_info[1, :], offset)
boxes = input_utils.resize_and_crop_boxes(boxes, image_scale,
image_info[1, :], offset)
# Filters out ground truth boxes that are all zeros.
indices = box_utils.get_non_empty_box_indices(boxes)
boxes = tf.gather(boxes, indices)
classes = tf.gather(classes, indices)
# Assigns anchors.
input_anchor = anchor.Anchor(
self._min_level, self._max_level, self._num_scales,
self._aspect_ratios, self._anchor_size, (image_height, image_width))
anchor_labeler = anchor.AnchorLabeler(
input_anchor, self._match_threshold, self._unmatched_threshold)
input_anchor = anchor.Anchor(self._min_level, self._max_level,
self._num_scales, self._aspect_ratios,
self._anchor_size, (image_height, image_width))
anchor_labeler = anchor.AnchorLabeler(input_anchor, self._match_threshold,
self._unmatched_threshold)
(cls_targets, box_targets, num_positives) = anchor_labeler.label_anchors(
boxes,
tf.cast(tf.expand_dims(classes, axis=1), tf.float32))
boxes, tf.cast(tf.expand_dims(classes, axis=1), tf.float32))
# If bfloat16 is used, casts input image to tf.bfloat16.
if self._use_bfloat16:
......@@ -292,8 +291,8 @@ class Parser(object):
image, image_info = input_utils.resize_and_crop_image(
image,
self._output_size,
padded_size=input_utils.compute_padded_size(
self._output_size, 2 ** self._max_level),
padded_size=input_utils.compute_padded_size(self._output_size,
2**self._max_level),
aug_scale_min=1.0,
aug_scale_max=1.0)
image_height, image_width, _ = image.get_shape().as_list()
......@@ -301,22 +300,21 @@ class Parser(object):
# Resizes and crops boxes.
image_scale = image_info[2, :]
offset = image_info[3, :]
boxes = input_utils.resize_and_crop_boxes(
boxes, image_scale, image_info[1, :], offset)
boxes = input_utils.resize_and_crop_boxes(boxes, image_scale,
image_info[1, :], offset)
# Filters out ground truth boxes that are all zeros.
indices = box_utils.get_non_empty_box_indices(boxes)
boxes = tf.gather(boxes, indices)
classes = tf.gather(classes, indices)
# Assigns anchors.
input_anchor = anchor.Anchor(
self._min_level, self._max_level, self._num_scales,
self._aspect_ratios, self._anchor_size, (image_height, image_width))
anchor_labeler = anchor.AnchorLabeler(
input_anchor, self._match_threshold, self._unmatched_threshold)
input_anchor = anchor.Anchor(self._min_level, self._max_level,
self._num_scales, self._aspect_ratios,
self._anchor_size, (image_height, image_width))
anchor_labeler = anchor.AnchorLabeler(input_anchor, self._match_threshold,
self._unmatched_threshold)
(cls_targets, box_targets, num_positives) = anchor_labeler.label_anchors(
boxes,
tf.cast(tf.expand_dims(classes, axis=1), tf.float32))
boxes, tf.cast(tf.expand_dims(classes, axis=1), tf.float32))
# If bfloat16 is used, casts input image to tf.bfloat16.
if self._use_bfloat16:
......@@ -324,18 +322,24 @@ class Parser(object):
# Sets up groundtruth data for evaluation.
groundtruths = {
'source_id': data['source_id'],
'num_groundtrtuhs': tf.shape(data['groundtruth_classes']),
'image_info': image_info,
'boxes': box_utils.denormalize_boxes(
data['groundtruth_boxes'], image_shape),
'classes': data['groundtruth_classes'],
'areas': data['groundtruth_area'],
'is_crowds': tf.cast(data['groundtruth_is_crowd'], tf.int32),
'source_id':
data['source_id'],
'num_groundtrtuhs':
tf.shape(data['groundtruth_classes']),
'image_info':
image_info,
'boxes':
box_utils.denormalize_boxes(data['groundtruth_boxes'], image_shape),
'classes':
data['groundtruth_classes'],
'areas':
data['groundtruth_area'],
'is_crowds':
tf.cast(data['groundtruth_is_crowd'], tf.int32),
}
groundtruths['source_id'] = process_source_id(groundtruths['source_id'])
groundtruths = pad_groundtruths_to_fixed_size(
groundtruths, self._max_num_instances)
groundtruths = pad_groundtruths_to_fixed_size(groundtruths,
self._max_num_instances)
# Packs labels for model_fn outputs.
labels = {
......@@ -361,8 +365,8 @@ class Parser(object):
image, image_info = input_utils.resize_and_crop_image(
image,
self._output_size,
padded_size=input_utils.compute_padded_size(
self._output_size, 2 ** self._max_level),
padded_size=input_utils.compute_padded_size(self._output_size,
2**self._max_level),
aug_scale_min=1.0,
aug_scale_max=1.0)
image_height, image_width, _ = image.get_shape().as_list()
......@@ -372,9 +376,9 @@ class Parser(object):
image = tf.cast(image, dtype=tf.bfloat16)
# Compute Anchor boxes.
input_anchor = anchor.Anchor(
self._min_level, self._max_level, self._num_scales,
self._aspect_ratios, self._anchor_size, (image_height, image_width))
input_anchor = anchor.Anchor(self._min_level, self._max_level,
self._num_scales, self._aspect_ratios,
self._anchor_size, (image_height, image_width))
labels = {
'anchor_boxes': input_anchor.multilevel_boxes,
......@@ -384,8 +388,8 @@ class Parser(object):
# in labels.
if self._mode == ModeKeys.PREDICT_WITH_GT:
# Converts boxes from normalized coordinates to pixel coordinates.
boxes = box_utils.denormalize_boxes(
data['groundtruth_boxes'], image_shape)
boxes = box_utils.denormalize_boxes(data['groundtruth_boxes'],
image_shape)
groundtruths = {
'source_id': data['source_id'],
'num_detections': tf.shape(data['groundtruth_classes']),
......@@ -395,8 +399,8 @@ class Parser(object):
'is_crowds': tf.cast(data['groundtruth_is_crowd'], tf.int32),
}
groundtruths['source_id'] = process_source_id(groundtruths['source_id'])
groundtruths = pad_groundtruths_to_fixed_size(
groundtruths, self._max_num_instances)
groundtruths = pad_groundtruths_to_fixed_size(groundtruths,
self._max_num_instances)
labels['groundtruths'] = groundtruths
# Computes training objective for evaluation loss.
......@@ -404,18 +408,17 @@ class Parser(object):
image_scale = image_info[2, :]
offset = image_info[3, :]
boxes = input_utils.resize_and_crop_boxes(
boxes, image_scale, image_info[1, :], offset)
boxes = input_utils.resize_and_crop_boxes(boxes, image_scale,
image_info[1, :], offset)
# Filters out ground truth boxes that are all zeros.
indices = box_utils.get_non_empty_box_indices(boxes)
boxes = tf.gather(boxes, indices)
# Assigns anchors.
anchor_labeler = anchor.AnchorLabeler(
input_anchor, self._match_threshold, self._unmatched_threshold)
anchor_labeler = anchor.AnchorLabeler(input_anchor, self._match_threshold,
self._unmatched_threshold)
(cls_targets, box_targets, num_positives) = anchor_labeler.label_anchors(
boxes,
tf.cast(tf.expand_dims(classes, axis=1), tf.float32))
boxes, tf.cast(tf.expand_dims(classes, axis=1), tf.float32))
labels['cls_targets'] = cls_targets
labels['box_targets'] = box_targets
labels['num_positives'] = num_positives
......
......@@ -21,7 +21,6 @@ Weicheng Kuo, Anelia Angelova, Jitendra Malik, Tsung-Yi Lin
ShapeMask: Learning to Segment Novel Objects by Refining Shape Priors.
arXiv:1904.03239.
"""
import tensorflow as tf
from official.vision.detection.dataloader import anchor
......
......@@ -32,6 +32,7 @@ from __future__ import print_function
import atexit
import tempfile
import numpy as np
from absl import logging
from pycocotools import cocoeval
......@@ -197,22 +198,21 @@ class COCOEvaluator(object):
"""Update and aggregate detection results and groundtruth data.
Args:
predictions: a dictionary of numpy arrays including the fields below.
See different parsers under `../dataloader` for more details.
predictions: a dictionary of numpy arrays including the fields below. See
different parsers under `../dataloader` for more details.
Required fields:
- source_id: a numpy array of int or string of shape [batch_size].
- image_info [if `need_rescale_bboxes` is True]: a numpy array of
float of shape [batch_size, 4, 2].
- num_detections: a numpy array of
int of shape [batch_size].
- num_detections: a numpy array of int of shape [batch_size].
- detection_boxes: a numpy array of float of shape [batch_size, K, 4].
- detection_classes: a numpy array of int of shape [batch_size, K].
- detection_scores: a numpy array of float of shape [batch_size, K].
Optional fields:
- detection_masks: a numpy array of float of shape
[batch_size, K, mask_height, mask_width].
groundtruths: a dictionary of numpy arrays including the fields below.
See also different parsers under `../dataloader` for more details.
- detection_masks: a numpy array of float of shape [batch_size, K,
mask_height, mask_width].
groundtruths: a dictionary of numpy arrays including the fields below. See
also different parsers under `../dataloader` for more details.
Required fields:
- source_id: a numpy array of int or string of shape [batch_size].
- height: a numpy array of int of shape [batch_size].
......@@ -222,12 +222,12 @@ class COCOEvaluator(object):
- classes: a numpy array of int of shape [batch_size, K].
Optional fields:
- is_crowds: a numpy array of int of shape [batch_size, K]. If the
field is absent, it is assumed that this instance is not crowd.
- areas: a numy array of float of shape [batch_size, K]. If the
field is absent, the area is calculated using either boxes or
masks depending on which one is available.
- masks: a numpy array of float of shape
[batch_size, K, mask_height, mask_width],
field is absent, it is assumed that this instance is not crowd.
- areas: a numy array of float of shape [batch_size, K]. If the field
is absent, the area is calculated using either boxes or masks
depending on which one is available.
- masks: a numpy array of float of shape [batch_size, K, mask_height,
mask_width],
Raises:
ValueError: if the required prediction or groundtruth fields are not
......@@ -318,8 +318,7 @@ class ShapeMaskCOCOEvaluator(COCOEvaluator):
metrics = np.hstack((coco_metrics, mcoco_eval.stats))
else:
mask_coco_metrics = mcoco_eval.category_stats
val_catg_idx = np.isin(mcoco_eval.params.catIds,
self._eval_categories)
val_catg_idx = np.isin(mcoco_eval.params.catIds, self._eval_categories)
# Gather the valid evaluation of the eval categories.
if np.any(val_catg_idx):
mean_val_metrics = []
......
......@@ -23,6 +23,7 @@ import functools
import pprint
# pylint: disable=g-bad-import-order
# Import libraries
import tensorflow as tf
from absl import app
......
......@@ -31,17 +31,17 @@ from official.vision.detection.ops import spatial_transform_ops
class RpnHead(tf.keras.layers.Layer):
"""Region Proposal Network head."""
def __init__(self,
min_level,
max_level,
anchors_per_location,
num_convs=2,
num_filters=256,
use_separable_conv=False,
activation='relu',
use_batch_norm=True,
norm_activation=nn_ops.norm_activation_builder(
activation='relu')):
def __init__(
self,
min_level,
max_level,
anchors_per_location,
num_convs=2,
num_filters=256,
use_separable_conv=False,
activation='relu',
use_batch_norm=True,
norm_activation=nn_ops.norm_activation_builder(activation='relu')):
"""Initialize params to build Region Proposal Network head.
Args:
......@@ -57,8 +57,8 @@ class RpnHead(tf.keras.layers.Layer):
is used.
activation: activation function. Support 'relu' and 'swish'.
use_batch_norm: 'bool', indicating whether batchnorm layers are added.
norm_activation: an operation that includes a normalization layer
followed by an optional activation layer.
norm_activation: an operation that includes a normalization layer followed
by an optional activation layer.
"""
self._min_level = min_level
self._max_level = max_level
......@@ -140,17 +140,17 @@ class RpnHead(tf.keras.layers.Layer):
class FastrcnnHead(tf.keras.layers.Layer):
"""Fast R-CNN box head."""
def __init__(self,
num_classes,
num_convs=0,
num_filters=256,
use_separable_conv=False,
num_fcs=2,
fc_dims=1024,
activation='relu',
use_batch_norm=True,
norm_activation=nn_ops.norm_activation_builder(
activation='relu')):
def __init__(
self,
num_classes,
num_convs=0,
num_filters=256,
use_separable_conv=False,
num_fcs=2,
fc_dims=1024,
activation='relu',
use_batch_norm=True,
norm_activation=nn_ops.norm_activation_builder(activation='relu')):
"""Initialize params to build Fast R-CNN box head.
Args:
......@@ -167,8 +167,8 @@ class FastrcnnHead(tf.keras.layers.Layer):
layers.
activation: activation function. Support 'relu' and 'swish'.
use_batch_norm: 'bool', indicating whether batchnorm layers are added.
norm_activation: an operation that includes a normalization layer
followed by an optional activation layer.
norm_activation: an operation that includes a normalization layer followed
by an optional activation layer.
"""
self._num_classes = num_classes
......@@ -207,7 +207,8 @@ class FastrcnnHead(tf.keras.layers.Layer):
strides=(1, 1),
padding='same',
dilation_rate=(1, 1),
activation=(None if self._use_batch_norm else self._activation_op),
activation=(None
if self._use_batch_norm else self._activation_op),
name='conv_{}'.format(i)))
if self._use_batch_norm:
self._conv_bn_ops.append(self._norm_activation())
......@@ -218,7 +219,8 @@ class FastrcnnHead(tf.keras.layers.Layer):
self._fc_ops.append(
tf.keras.layers.Dense(
units=self._fc_dims,
activation=(None if self._use_batch_norm else self._activation_op),
activation=(None
if self._use_batch_norm else self._activation_op),
name='fc{}'.format(i)))
if self._use_batch_norm:
self._fc_bn_ops.append(self._norm_activation(fused=False))
......@@ -238,8 +240,8 @@ class FastrcnnHead(tf.keras.layers.Layer):
"""Box and class branches for the Mask-RCNN model.
Args:
roi_features: A ROI feature tensor of shape
[batch_size, num_rois, height_l, width_l, num_filters].
roi_features: A ROI feature tensor of shape [batch_size, num_rois,
height_l, width_l, num_filters].
is_training: `boolean`, if True if model is in training mode.
Returns:
......@@ -277,16 +279,16 @@ class FastrcnnHead(tf.keras.layers.Layer):
class MaskrcnnHead(tf.keras.layers.Layer):
"""Mask R-CNN head."""
def __init__(self,
num_classes,
mask_target_size,
num_convs=4,
num_filters=256,
use_separable_conv=False,
activation='relu',
use_batch_norm=True,
norm_activation=nn_ops.norm_activation_builder(
activation='relu')):
def __init__(
self,
num_classes,
mask_target_size,
num_convs=4,
num_filters=256,
use_separable_conv=False,
activation='relu',
use_batch_norm=True,
norm_activation=nn_ops.norm_activation_builder(activation='relu')):
"""Initialize params to build Fast R-CNN head.
Args:
......@@ -300,8 +302,8 @@ class MaskrcnnHead(tf.keras.layers.Layer):
is used.
activation: activation function. Support 'relu' and 'swish'.
use_batch_norm: 'bool', indicating whether batchnorm layers are added.
norm_activation: an operation that includes a normalization layer
followed by an optional activation layer.
norm_activation: an operation that includes a normalization layer followed
by an optional activation layer.
"""
self._num_classes = num_classes
self._mask_target_size = mask_target_size
......@@ -336,7 +338,8 @@ class MaskrcnnHead(tf.keras.layers.Layer):
strides=(1, 1),
padding='same',
dilation_rate=(1, 1),
activation=(None if self._use_batch_norm else self._activation_op),
activation=(None
if self._use_batch_norm else self._activation_op),
name='mask-conv-l%d' % i))
self._mask_conv_transpose = tf.keras.layers.Conv2DTranspose(
self._num_filters,
......@@ -353,10 +356,10 @@ class MaskrcnnHead(tf.keras.layers.Layer):
"""Mask branch for the Mask-RCNN model.
Args:
roi_features: A ROI feature tensor of shape
[batch_size, num_rois, height_l, width_l, num_filters].
class_indices: a Tensor of shape [batch_size, num_rois], indicating
which class the ROI is.
roi_features: A ROI feature tensor of shape [batch_size, num_rois,
height_l, width_l, num_filters].
class_indices: a Tensor of shape [batch_size, num_rois], indicating which
class the ROI is.
is_training: `boolean`, if True if model is in training mode.
Returns:
......@@ -415,16 +418,16 @@ class MaskrcnnHead(tf.keras.layers.Layer):
class RetinanetHead(object):
"""RetinaNet head."""
def __init__(self,
min_level,
max_level,
num_classes,
anchors_per_location,
num_convs=4,
num_filters=256,
use_separable_conv=False,
norm_activation=nn_ops.norm_activation_builder(
activation='relu')):
def __init__(
self,
min_level,
max_level,
num_classes,
anchors_per_location,
num_convs=4,
num_filters=256,
use_separable_conv=False,
norm_activation=nn_ops.norm_activation_builder(activation='relu')):
"""Initialize params to build RetinaNet head.
Args:
......@@ -437,8 +440,8 @@ class RetinanetHead(object):
num_filters: `int` number of filters used in the head architecture.
use_separable_conv: `bool` to indicate whether to use separable
convoluation.
norm_activation: an operation that includes a normalization layer
followed by an optional activation layer.
norm_activation: an operation that includes a normalization layer followed
by an optional activation layer.
"""
self._min_level = min_level
self._max_level = max_level
......@@ -600,12 +603,8 @@ class RetinanetHead(object):
class ShapemaskPriorHead(object):
"""ShapeMask Prior head."""
def __init__(self,
num_classes,
num_downsample_channels,
mask_crop_size,
use_category_for_mask,
shape_prior_path):
def __init__(self, num_classes, num_downsample_channels, mask_crop_size,
use_category_for_mask, shape_prior_path):
"""Initialize params to build RetinaNet head.
Args:
......@@ -632,12 +631,12 @@ class ShapemaskPriorHead(object):
Args:
fpn_features: a dictionary of FPN features.
boxes: a float tensor of shape [batch_size, num_instances, 4]
representing the tight gt boxes from dataloader/detection.
boxes: a float tensor of shape [batch_size, num_instances, 4] representing
the tight gt boxes from dataloader/detection.
outer_boxes: a float tensor of shape [batch_size, num_instances, 4]
representing the loose gt boxes from dataloader/detection.
classes: a int Tensor of shape [batch_size, num_instances]
of instance classes.
classes: a int Tensor of shape [batch_size, num_instances] of instance
classes.
is_training: training mode or not.
Returns:
......@@ -658,8 +657,9 @@ class ShapemaskPriorHead(object):
shape_priors = self._get_priors()
# Get uniform priors for each outer box.
uniform_priors = tf.ones([batch_size, num_instances, self._mask_crop_size,
self._mask_crop_size])
uniform_priors = tf.ones([
batch_size, num_instances, self._mask_crop_size, self._mask_crop_size
])
uniform_priors = spatial_transform_ops.crop_mask_in_target_box(
uniform_priors, boxes, outer_boxes, self._mask_crop_size)
......@@ -668,8 +668,9 @@ class ShapemaskPriorHead(object):
tf.cast(instance_features, tf.float32), uniform_priors, classes)
instance_priors = tf.gather(shape_priors, classes)
instance_priors *= tf.expand_dims(tf.expand_dims(
tf.cast(prior_distribution, tf.float32), axis=-1), axis=-1)
instance_priors *= tf.expand_dims(
tf.expand_dims(tf.cast(prior_distribution, tf.float32), axis=-1),
axis=-1)
instance_priors = tf.reduce_sum(instance_priors, axis=2)
detection_priors = spatial_transform_ops.crop_mask_in_target_box(
instance_priors, boxes, outer_boxes, self._mask_crop_size)
......@@ -688,8 +689,10 @@ class ShapemaskPriorHead(object):
# If prior path does not exist, do not use priors, i.e., pirors equal to
# uniform empty 32x32 patch.
self._num_clusters = 1
priors = tf.zeros([self._mask_num_classes, self._num_clusters,
self._mask_crop_size, self._mask_crop_size])
priors = tf.zeros([
self._mask_num_classes, self._num_clusters, self._mask_crop_size,
self._mask_crop_size
])
return priors
def _classify_shape_priors(self, features, uniform_priors, classes):
......@@ -699,12 +702,12 @@ class ShapemaskPriorHead(object):
category.
Args:
features: A float Tensor of shape [batch_size, num_instances,
mask_size, mask_size, num_channels].
features: A float Tensor of shape [batch_size, num_instances, mask_size,
mask_size, num_channels].
uniform_priors: A float Tensor of shape [batch_size, num_instances,
mask_size, mask_size] representing the uniform detection priors.
classes: A int Tensor of shape [batch_size, num_instances]
of detection class ids.
classes: A int Tensor of shape [batch_size, num_instances] of detection
class ids.
Returns:
prior_distribution: A float Tensor of shape
......@@ -719,10 +722,11 @@ class ShapemaskPriorHead(object):
features = tf.reduce_mean(features, axis=(2, 3))
logits = tf.keras.layers.Dense(
self._mask_num_classes * self._num_clusters,
kernel_initializer=tf.random_normal_initializer(stddev=0.01))(features)
logits = tf.reshape(logits,
[batch_size, num_instances,
self._mask_num_classes, self._num_clusters])
kernel_initializer=tf.random_normal_initializer(stddev=0.01))(
features)
logits = tf.reshape(
logits,
[batch_size, num_instances, self._mask_num_classes, self._num_clusters])
if self._use_category_for_mask:
logits = tf.gather(logits, tf.expand_dims(classes, axis=-1), batch_dims=2)
logits = tf.squeeze(logits, axis=2)
......@@ -752,8 +756,8 @@ class ShapemaskCoarsemaskHead(object):
use_category_for_mask: use class information in mask branch.
num_convs: `int` number of stacked convolution before the last prediction
layer.
norm_activation: an operation that includes a normalization layer
followed by an optional activation layer.
norm_activation: an operation that includes a normalization layer followed
by an optional activation layer.
"""
self._mask_num_classes = num_classes if use_category_for_mask else 1
self._use_category_for_mask = use_category_for_mask
......@@ -769,13 +773,15 @@ class ShapemaskCoarsemaskHead(object):
self._class_norm_activation = []
for i in range(self._num_convs):
self._class_conv.append(tf.keras.layers.Conv2D(
self._num_downsample_channels,
kernel_size=(3, 3),
bias_initializer=tf.zeros_initializer(),
kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01),
padding='same',
name='coarse-mask-class-%d' % i))
self._class_conv.append(
tf.keras.layers.Conv2D(
self._num_downsample_channels,
kernel_size=(3, 3),
bias_initializer=tf.zeros_initializer(),
kernel_initializer=tf.keras.initializers.RandomNormal(
stddev=0.01),
padding='same',
name='coarse-mask-class-%d' % i))
self._class_norm_activation.append(
norm_activation(name='coarse-mask-class-%d-bn' % i))
......@@ -800,10 +806,10 @@ class ShapemaskCoarsemaskHead(object):
mask_crop_size, mask_crop_size, num_downsample_channels]. This is the
instance feature crop.
detection_priors: a float Tensor of shape [batch_size, num_instances,
mask_crop_size, mask_crop_size, 1]. This is the detection prior for
the instance.
classes: a int Tensor of shape [batch_size, num_instances]
of instance classes.
mask_crop_size, mask_crop_size, 1]. This is the detection prior for the
instance.
classes: a int Tensor of shape [batch_size, num_instances] of instance
classes.
is_training: a bool indicating whether in training mode.
Returns:
......@@ -820,8 +826,8 @@ class ShapemaskCoarsemaskHead(object):
# Gather the logits with right input class.
if self._use_category_for_mask:
mask_logits = tf.transpose(mask_logits, [0, 1, 4, 2, 3])
mask_logits = tf.gather(mask_logits, tf.expand_dims(classes, -1),
batch_dims=2)
mask_logits = tf.gather(
mask_logits, tf.expand_dims(classes, -1), batch_dims=2)
mask_logits = tf.squeeze(mask_logits, axis=2)
else:
mask_logits = mask_logits[..., 0]
......@@ -841,16 +847,17 @@ class ShapemaskCoarsemaskHead(object):
"""
(batch_size, num_instances, height, width,
num_channels) = features.get_shape().as_list()
features = tf.reshape(features, [batch_size * num_instances, height, width,
num_channels])
features = tf.reshape(
features, [batch_size * num_instances, height, width, num_channels])
for i in range(self._num_convs):
features = self._class_conv[i](features)
features = self._class_norm_activation[i](features,
is_training=is_training)
features = self._class_norm_activation[i](
features, is_training=is_training)
mask_logits = self._class_predict(features)
mask_logits = tf.reshape(mask_logits, [batch_size, num_instances, height,
width, self._mask_num_classes])
mask_logits = tf.reshape(
mask_logits,
[batch_size, num_instances, height, width, self._mask_num_classes])
return mask_logits
......@@ -907,8 +914,8 @@ class ShapemaskFinemaskHead(object):
activation=None,
padding='same',
name='fine-mask-class-%d' % i))
self._fine_class_bn.append(norm_activation(
name='fine-mask-class-%d-bn' % i))
self._fine_class_bn.append(
norm_activation(name='fine-mask-class-%d-bn' % i))
self._class_predict_conv = tf.keras.layers.Conv2D(
self._mask_num_classes,
......@@ -926,14 +933,13 @@ class ShapemaskFinemaskHead(object):
https://arxiv.org/pdf/1904.03239.pdf
Args:
features: a float Tensor of shape
[batch_size, num_instances, mask_crop_size, mask_crop_size,
num_downsample_channels]. This is the instance feature crop.
mask_logits: a float Tensor of shape
[batch_size, num_instances, mask_crop_size, mask_crop_size] indicating
predicted mask logits.
classes: a int Tensor of shape [batch_size, num_instances]
of instance classes.
features: a float Tensor of shape [batch_size, num_instances,
mask_crop_size, mask_crop_size, num_downsample_channels]. This is the
instance feature crop.
mask_logits: a float Tensor of shape [batch_size, num_instances,
mask_crop_size, mask_crop_size] indicating predicted mask logits.
classes: a int Tensor of shape [batch_size, num_instances] of instance
classes.
is_training: a bool indicating whether in training mode.
Returns:
......@@ -960,8 +966,8 @@ class ShapemaskFinemaskHead(object):
mask_logits = self.decoder_net(features, is_training)
if self._use_category_for_mask:
mask_logits = tf.transpose(mask_logits, [0, 1, 4, 2, 3])
mask_logits = tf.gather(mask_logits,
tf.expand_dims(classes, -1), batch_dims=2)
mask_logits = tf.gather(
mask_logits, tf.expand_dims(classes, -1), batch_dims=2)
mask_logits = tf.squeeze(mask_logits, axis=2)
else:
mask_logits = mask_logits[..., 0]
......@@ -982,8 +988,8 @@ class ShapemaskFinemaskHead(object):
"""
(batch_size, num_instances, height, width,
num_channels) = features.get_shape().as_list()
features = tf.reshape(features, [batch_size * num_instances, height, width,
num_channels])
features = tf.reshape(
features, [batch_size * num_instances, height, width, num_channels])
for i in range(self._num_convs):
features = self._fine_class_conv[i](features)
features = self._fine_class_bn[i](features, is_training=is_training)
......@@ -994,9 +1000,8 @@ class ShapemaskFinemaskHead(object):
# Predict per-class instance masks.
mask_logits = self._class_predict_conv(features)
mask_logits = tf.reshape(mask_logits,
[batch_size, num_instances,
height * self.up_sample_factor,
width * self.up_sample_factor,
self._mask_num_classes])
mask_logits = tf.reshape(mask_logits, [
batch_size, num_instances, height * self.up_sample_factor,
width * self.up_sample_factor, self._mask_num_classes
])
return mask_logits
......@@ -19,6 +19,7 @@ from __future__ import division
from __future__ import print_function
import functools
import tensorflow as tf
......@@ -43,7 +44,7 @@ class NormActivation(tf.keras.layers.Layer):
GraphKeys.TRAINABLE_VARIABLES. If False, freeze batch normalization
layer.
init_zero: `bool` if True, initializes scale parameter of batch
normalization with 0. If False, initialize it with 1.
normalization with 0. If False, initialize it with 1.
fused: `bool` fused option in batch normalziation.
use_actiation: `bool`, whether to add the optional activation layer after
the batch normalization layer.
......
......@@ -28,22 +28,23 @@ import tensorflow as tf
from official.vision.detection.modeling.architecture import keras_utils
from official.vision.detection.modeling.architecture import nn_ops
# TODO(b/140112644): Refactor the code with Keras style, i.e. build and call.
class Resnet(object):
"""Class to build ResNet family model."""
def __init__(self,
resnet_depth,
activation='relu',
norm_activation=nn_ops.norm_activation_builder(
activation='relu'),
data_format='channels_last'):
def __init__(
self,
resnet_depth,
activation='relu',
norm_activation=nn_ops.norm_activation_builder(activation='relu'),
data_format='channels_last'):
"""ResNet initialization function.
Args:
resnet_depth: `int` depth of ResNet backbone model.
norm_activation: an operation that includes a normalization layer
followed by an optional activation layer.
norm_activation: an operation that includes a normalization layer followed
by an optional activation layer.
data_format: `str` either "channels_first" for `[batch, channels, height,
width]` or "channels_last for `[batch, height, width, channels]`.
"""
......@@ -58,24 +59,45 @@ class Resnet(object):
self._data_format = data_format
model_params = {
10: {'block': self.residual_block, 'layers': [1, 1, 1, 1]},
18: {'block': self.residual_block, 'layers': [2, 2, 2, 2]},
34: {'block': self.residual_block, 'layers': [3, 4, 6, 3]},
50: {'block': self.bottleneck_block, 'layers': [3, 4, 6, 3]},
101: {'block': self.bottleneck_block, 'layers': [3, 4, 23, 3]},
152: {'block': self.bottleneck_block, 'layers': [3, 8, 36, 3]},
200: {'block': self.bottleneck_block, 'layers': [3, 24, 36, 3]}
10: {
'block': self.residual_block,
'layers': [1, 1, 1, 1]
},
18: {
'block': self.residual_block,
'layers': [2, 2, 2, 2]
},
34: {
'block': self.residual_block,
'layers': [3, 4, 6, 3]
},
50: {
'block': self.bottleneck_block,
'layers': [3, 4, 6, 3]
},
101: {
'block': self.bottleneck_block,
'layers': [3, 4, 23, 3]
},
152: {
'block': self.bottleneck_block,
'layers': [3, 8, 36, 3]
},
200: {
'block': self.bottleneck_block,
'layers': [3, 24, 36, 3]
}
}
if resnet_depth not in model_params:
valid_resnet_depths = ', '.join(
[str(depth) for depth in sorted(model_params.keys())])
raise ValueError(
'The resnet_depth should be in [%s]. Not a valid resnet_depth:'%(
valid_resnet_depths), self._resnet_depth)
'The resnet_depth should be in [%s]. Not a valid resnet_depth:' %
(valid_resnet_depths), self._resnet_depth)
params = model_params[resnet_depth]
self._resnet_fn = self.resnet_v1_generator(
params['block'], params['layers'])
self._resnet_fn = self.resnet_v1_generator(params['block'],
params['layers'])
def __call__(self, inputs, is_training=None):
"""Returns the ResNet model for a given size and number of output classes.
......@@ -98,10 +120,10 @@ class Resnet(object):
"""Pads the input along the spatial dimensions independently of input size.
Args:
inputs: `Tensor` of size `[batch, channels, height, width]` or
`[batch, height, width, channels]` depending on `data_format`.
inputs: `Tensor` of size `[batch, channels, height, width]` or `[batch,
height, width, channels]` depending on `data_format`.
kernel_size: `int` kernel size to be used for `conv2d` or max_pool2d`
operations. Should be a positive integer.
operations. Should be a positive integer.
Returns:
A padded `Tensor` of the same `data_format` with size either intact
......@@ -160,14 +182,15 @@ class Resnet(object):
Args:
inputs: `Tensor` of size `[batch, channels, height, width]`.
filters: `int` number of filters for the first two convolutions. Note that
the third and final convolution will use 4 times as many filters.
the third and final convolution will use 4 times as many filters.
strides: `int` block stride. If greater than 1, this block will ultimately
downsample the input.
downsample the input.
use_projection: `bool` for whether this block should use a projection
shortcut (versus the default identity shortcut). This is usually
`True` for the first block of a block group, which may change the
number of filters and the resolution.
shortcut (versus the default identity shortcut). This is usually `True`
for the first block of a block group, which may change the number of
filters and the resolution.
is_training: `bool` if True, the model is in training mode.
Returns:
The output `Tensor` of the block.
"""
......@@ -185,8 +208,9 @@ class Resnet(object):
inputs = self.conv2d_fixed_padding(
inputs=inputs, filters=filters, kernel_size=3, strides=1)
inputs = self._norm_activation(use_activation=False, init_zero=True)(
inputs, is_training=is_training)
inputs = self._norm_activation(
use_activation=False, init_zero=True)(
inputs, is_training=is_training)
return self._activation_op(inputs + shortcut)
......@@ -201,13 +225,13 @@ class Resnet(object):
Args:
inputs: `Tensor` of size `[batch, channels, height, width]`.
filters: `int` number of filters for the first two convolutions. Note that
the third and final convolution will use 4 times as many filters.
the third and final convolution will use 4 times as many filters.
strides: `int` block stride. If greater than 1, this block will ultimately
downsample the input.
downsample the input.
use_projection: `bool` for whether this block should use a projection
shortcut (versus the default identity shortcut). This is usually
`True` for the first block of a block group, which may change the
number of filters and the resolution.
shortcut (versus the default identity shortcut). This is usually `True`
for the first block of a block group, which may change the number of
filters and the resolution.
is_training: `bool` if True, the model is in training mode.
Returns:
......@@ -233,8 +257,9 @@ class Resnet(object):
inputs = self.conv2d_fixed_padding(
inputs=inputs, filters=4 * filters, kernel_size=1, strides=1)
inputs = self._norm_activation(use_activation=False, init_zero=True)(
inputs, is_training=is_training)
inputs = self._norm_activation(
use_activation=False, init_zero=True)(
inputs, is_training=is_training)
return self._activation_op(inputs + shortcut)
......@@ -248,7 +273,7 @@ class Resnet(object):
block_fn: `function` for the block to use within the model
blocks: `int` number of blocks contained in the layer.
strides: `int` stride to use for the first convolution of the layer. If
greater than 1, this layer will downsample the input.
greater than 1, this layer will downsample the input.
name: `str`name for the Tensor output of the block layer.
is_training: `bool` if True, the model is in training mode.
......@@ -256,8 +281,8 @@ class Resnet(object):
The output `Tensor` of the block layer.
"""
# Only the first block per block_group uses projection shortcut and strides.
inputs = block_fn(inputs, filters, strides, use_projection=True,
is_training=is_training)
inputs = block_fn(
inputs, filters, strides, use_projection=True, is_training=is_training)
for _ in range(1, blocks):
inputs = block_fn(inputs, filters, 1, is_training=is_training)
......@@ -269,7 +294,7 @@ class Resnet(object):
Args:
block_fn: `function` for the block to use within the model. Either
`residual_block` or `bottleneck_block`.
`residual_block` or `bottleneck_block`.
layers: list of 4 `int`s denoting the number of blocks to include in each
of the 4 block groups. Each group consists of blocks that take inputs of
the same resolution.
......@@ -293,17 +318,37 @@ class Resnet(object):
inputs = tf.identity(inputs, 'initial_max_pool')
c2 = self.block_group(
inputs=inputs, filters=64, block_fn=block_fn, blocks=layers[0],
strides=1, name='block_group1', is_training=is_training)
inputs=inputs,
filters=64,
block_fn=block_fn,
blocks=layers[0],
strides=1,
name='block_group1',
is_training=is_training)
c3 = self.block_group(
inputs=c2, filters=128, block_fn=block_fn, blocks=layers[1],
strides=2, name='block_group2', is_training=is_training)
inputs=c2,
filters=128,
block_fn=block_fn,
blocks=layers[1],
strides=2,
name='block_group2',
is_training=is_training)
c4 = self.block_group(
inputs=c3, filters=256, block_fn=block_fn, blocks=layers[2],
strides=2, name='block_group3', is_training=is_training)
inputs=c3,
filters=256,
block_fn=block_fn,
blocks=layers[2],
strides=2,
name='block_group3',
is_training=is_training)
c5 = self.block_group(
inputs=c4, filters=512, block_fn=block_fn, blocks=layers[3],
strides=2, name='block_group4', is_training=is_training)
inputs=c4,
filters=512,
block_fn=block_fn,
blocks=layers[3],
strides=2,
name='block_group4',
is_training=is_training)
return {2: c2, 3: c3, 4: c4, 5: c5}
return model
......@@ -21,6 +21,7 @@ from __future__ import print_function
import abc
import functools
import re
import tensorflow as tf
from official.vision.detection.modeling import checkpoint_utils
from official.vision.detection.modeling import learning_rates
......@@ -42,8 +43,7 @@ def _make_filter_trainable_variables_fn(frozen_variable_prefix):
# frozen_variable_prefix: a regex string specifing the prefix pattern of
# the frozen variables' names.
filtered_variables = [
v for v in variables
if not frozen_variable_prefix or
v for v in variables if not frozen_variable_prefix or
not re.match(frozen_variable_prefix, v.name)
]
return filtered_variables
......@@ -115,8 +115,8 @@ class Model(object):
def weight_decay_loss(self, trainable_variables):
reg_variables = [
v for v in trainable_variables
if self._regularization_var_regex is None
or re.match(self._regularization_var_regex, v.name)
if self._regularization_var_regex is None or
re.match(self._regularization_var_regex, v.name)
]
return self._l2_weight_decay * tf.add_n(
......
......@@ -12,7 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Util functions for loading checkpoints. Especially for loading Tensorflow 1.x
"""Util functions for loading checkpoints.
Especially for loading Tensorflow 1.x
checkpoint to Tensorflow 2.x (keras) model.
"""
......@@ -20,18 +22,19 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import re
from absl import logging
import tensorflow as tf
def _build_assignment_map(keras_model,
prefix='',
skip_variables_regex=None,
var_to_shape_map=None):
prefix='',
skip_variables_regex=None,
var_to_shape_map=None):
"""Compute an assignment mapping for loading older checkpoints into a Keras
model. Variable names are remapped from the original TPUEstimator model to
the new Keras name.
......@@ -48,12 +51,12 @@ def _build_assignment_map(keras_model,
"""
assignment_map = {}
checkpoint_names = None
if var_to_shape_map:
checkpoint_names = list(filter(
lambda x: not x.endswith('Momentum') and not x.endswith(
'global_step'), var_to_shape_map.keys()))
checkpoint_names = list(
filter(
lambda x: not x.endswith('Momentum') and not x.endswith(
'global_step'), var_to_shape_map.keys()))
for var in keras_model.variables:
var_name = var.name
......@@ -95,14 +98,15 @@ def _get_checkpoint_map(checkpoint_path):
def make_restore_checkpoint_fn(checkpoint_path, prefix='', skip_regex=None):
"""Returns scaffold function to restore parameters from v1 checkpoint.
Args:
checkpoint_path: path of the checkpoint folder or file.
Example 1: '/path/to/model_dir/'
Example 2: '/path/to/model.ckpt-22500'
prefix: prefix in the variable name to be remove for alignment with names in
the checkpoint.
skip_regex: regular expression to math the names of variables that
do not need to be assign.
skip_regex: regular expression to math the names of variables that do not
need to be assign.
Returns:
Callable[tf.kears.Model] -> void. Fn to load v1 checkpoint to keras model.
......@@ -125,7 +129,6 @@ def make_restore_checkpoint_fn(checkpoint_path, prefix='', skip_regex=None):
var_to_shape_map=var_to_shape_map)
if not vars_to_load:
raise ValueError('Variables to load is empty.')
tf.compat.v1.train.init_from_checkpoint(checkpoint_path,
vars_to_load)
tf.compat.v1.train.init_from_checkpoint(checkpoint_path, vars_to_load)
return _restore_checkpoint_fn
......@@ -25,7 +25,8 @@ import tensorflow as tf
from official.modeling.hyperparams import params_dict
class StepLearningRateWithLinearWarmup(tf.keras.optimizers.schedules.LearningRateSchedule):
class StepLearningRateWithLinearWarmup(
tf.keras.optimizers.schedules.LearningRateSchedule):
"""Class to generate learning rate tensor."""
def __init__(self, total_steps, params):
......@@ -57,7 +58,8 @@ class StepLearningRateWithLinearWarmup(tf.keras.optimizers.schedules.LearningRat
return {'_params': self._params.as_dict()}
class CosineLearningRateWithLinearWarmup(tf.keras.optimizers.schedules.LearningRateSchedule):
class CosineLearningRateWithLinearWarmup(
tf.keras.optimizers.schedules.LearningRateSchedule):
"""Class to generate learning rate tensor."""
def __init__(self, total_steps, params):
......
......@@ -118,9 +118,7 @@ class MaskrcnnModel(base_model.Model):
box_targets = tf.where(
tf.tile(
tf.expand_dims(tf.equal(matched_gt_classes, 0), axis=-1),
[1, 1, 4]),
tf.zeros_like(box_targets),
box_targets)
[1, 1, 4]), tf.zeros_like(box_targets), box_targets)
model_outputs.update({
'class_targets': matched_gt_classes,
'box_targets': box_targets,
......@@ -183,9 +181,7 @@ class MaskrcnnModel(base_model.Model):
mask_outputs),
})
else:
model_outputs.update({
'detection_masks': tf.nn.sigmoid(mask_outputs)
})
model_outputs.update({'detection_masks': tf.nn.sigmoid(mask_outputs)})
return model_outputs
......@@ -312,8 +308,8 @@ class MaskrcnnModel(base_model.Model):
required_output_fields = ['class_outputs', 'box_outputs']
for field in required_output_fields:
if field not in outputs:
raise ValueError('"%s" is missing in outputs, requried %s found %s'
%(field, required_output_fields, outputs.keys()))
raise ValueError('"%s" is missing in outputs, requried %s found %s' %
(field, required_output_fields, outputs.keys()))
predictions = {
'image_info': labels['image_info'],
'num_detections': outputs['num_detections'],
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment