Commit 88253ce5 authored by Hongkun Yu's avatar Hongkun Yu Committed by A. Unique TensorFlower
Browse files

Internal change

PiperOrigin-RevId: 326286926
parent 52371ffe
...@@ -93,9 +93,9 @@ def get_distribution_strategy(distribution_strategy="mirrored", ...@@ -93,9 +93,9 @@ def get_distribution_strategy(distribution_strategy="mirrored",
Args: Args:
distribution_strategy: a string specifying which distribution strategy to distribution_strategy: a string specifying which distribution strategy to
use. Accepted values are "off", "one_device", "mirrored", use. Accepted values are "off", "one_device", "mirrored",
"parameter_server", "multi_worker_mirrored", and "tpu" -- case insensitive. "parameter_server", "multi_worker_mirrored", and "tpu" -- case
"off" means not to use Distribution Strategy; "tpu" means to use insensitive. "off" means not to use Distribution Strategy; "tpu" means to
TPUStrategy using `tpu_address`. use TPUStrategy using `tpu_address`.
num_gpus: Number of GPUs to run this model. num_gpus: Number of GPUs to run this model.
all_reduce_alg: Optional. Specifies which algorithm to use when performing all_reduce_alg: Optional. Specifies which algorithm to use when performing
all-reduce. For `MirroredStrategy`, valid values are "nccl" and all-reduce. For `MirroredStrategy`, valid values are "nccl" and
...@@ -104,8 +104,9 @@ def get_distribution_strategy(distribution_strategy="mirrored", ...@@ -104,8 +104,9 @@ def get_distribution_strategy(distribution_strategy="mirrored",
device topology. device topology.
num_packs: Optional. Sets the `num_packs` in `tf.distribute.NcclAllReduce` num_packs: Optional. Sets the `num_packs` in `tf.distribute.NcclAllReduce`
or `tf.distribute.HierarchicalCopyAllReduce` for `MirroredStrategy`. or `tf.distribute.HierarchicalCopyAllReduce` for `MirroredStrategy`.
tpu_address: Optional. String that represents TPU to connect to. Must not tpu_address: Optional. String that represents TPU to connect to. Must not be
be None if `distribution_strategy` is set to `tpu`. None if `distribution_strategy` is set to `tpu`.
Returns: Returns:
tf.distribute.DistibutionStrategy object. tf.distribute.DistibutionStrategy object.
Raises: Raises:
...@@ -119,9 +120,8 @@ def get_distribution_strategy(distribution_strategy="mirrored", ...@@ -119,9 +120,8 @@ def get_distribution_strategy(distribution_strategy="mirrored",
distribution_strategy = distribution_strategy.lower() distribution_strategy = distribution_strategy.lower()
if distribution_strategy == "off": if distribution_strategy == "off":
if num_gpus > 1: if num_gpus > 1:
raise ValueError( raise ValueError("When {} GPUs are specified, distribution_strategy "
"When {} GPUs are specified, distribution_strategy " "flag cannot be set to `off`.".format(num_gpus))
"flag cannot be set to `off`.".format(num_gpus))
return None return None
if distribution_strategy == "tpu": if distribution_strategy == "tpu":
...@@ -153,8 +153,8 @@ def get_distribution_strategy(distribution_strategy="mirrored", ...@@ -153,8 +153,8 @@ def get_distribution_strategy(distribution_strategy="mirrored",
if distribution_strategy == "parameter_server": if distribution_strategy == "parameter_server":
return tf.distribute.experimental.ParameterServerStrategy() return tf.distribute.experimental.ParameterServerStrategy()
raise ValueError( raise ValueError("Unrecognized Distribution Strategy: %r" %
"Unrecognized Distribution Strategy: %r" % distribution_strategy) distribution_strategy)
def configure_cluster(worker_hosts=None, task_index=-1): def configure_cluster(worker_hosts=None, task_index=-1):
...@@ -168,8 +168,9 @@ def configure_cluster(worker_hosts=None, task_index=-1): ...@@ -168,8 +168,9 @@ def configure_cluster(worker_hosts=None, task_index=-1):
""" """
tf_config = json.loads(os.environ.get("TF_CONFIG", "{}")) tf_config = json.loads(os.environ.get("TF_CONFIG", "{}"))
if tf_config: if tf_config:
num_workers = (len(tf_config["cluster"].get("chief", [])) + num_workers = (
len(tf_config["cluster"].get("worker", []))) len(tf_config["cluster"].get("chief", [])) +
len(tf_config["cluster"].get("worker", [])))
elif worker_hosts: elif worker_hosts:
workers = worker_hosts.split(",") workers = worker_hosts.split(",")
num_workers = len(workers) num_workers = len(workers)
...@@ -180,7 +181,10 @@ def configure_cluster(worker_hosts=None, task_index=-1): ...@@ -180,7 +181,10 @@ def configure_cluster(worker_hosts=None, task_index=-1):
"cluster": { "cluster": {
"worker": workers "worker": workers
}, },
"task": {"type": "worker", "index": task_index} "task": {
"type": "worker",
"index": task_index
}
}) })
else: else:
num_workers = 1 num_workers = 1
......
...@@ -25,6 +25,7 @@ from official.utils.misc import distribution_utils ...@@ -25,6 +25,7 @@ from official.utils.misc import distribution_utils
class GetDistributionStrategyTest(tf.test.TestCase): class GetDistributionStrategyTest(tf.test.TestCase):
"""Tests for get_distribution_strategy.""" """Tests for get_distribution_strategy."""
def test_one_device_strategy_cpu(self): def test_one_device_strategy_cpu(self):
ds = distribution_utils.get_distribution_strategy(num_gpus=0) ds = distribution_utils.get_distribution_strategy(num_gpus=0)
self.assertEquals(ds.num_replicas_in_sync, 1) self.assertEquals(ds.num_replicas_in_sync, 1)
...@@ -45,5 +46,5 @@ class GetDistributionStrategyTest(tf.test.TestCase): ...@@ -45,5 +46,5 @@ class GetDistributionStrategyTest(tf.test.TestCase):
self.assertIn('GPU', device) self.assertIn('GPU', device)
if __name__ == "__main__": if __name__ == '__main__':
tf.test.main() tf.test.main()
...@@ -25,7 +25,6 @@ import time ...@@ -25,7 +25,6 @@ import time
from absl import logging from absl import logging
import tensorflow as tf import tensorflow as tf
from tensorflow.python.eager import monitoring from tensorflow.python.eager import monitoring
global_batch_size_gauge = monitoring.IntGauge( global_batch_size_gauge = monitoring.IntGauge(
...@@ -121,8 +120,8 @@ class TimeHistory(tf.keras.callbacks.Callback): ...@@ -121,8 +120,8 @@ class TimeHistory(tf.keras.callbacks.Callback):
# Record the timestamp of the first global step # Record the timestamp of the first global step
if not self.timestamp_log: if not self.timestamp_log:
self.timestamp_log.append(BatchTimestamp(self.global_steps, self.timestamp_log.append(
self.start_time)) BatchTimestamp(self.global_steps, self.start_time))
def on_batch_end(self, batch, logs=None): def on_batch_end(self, batch, logs=None):
"""Records elapse time of the batch and calculates examples per second.""" """Records elapse time of the batch and calculates examples per second."""
...@@ -175,12 +174,12 @@ def set_session_config(enable_xla=False): ...@@ -175,12 +174,12 @@ def set_session_config(enable_xla=False):
if enable_xla: if enable_xla:
tf.config.optimizer.set_jit(True) tf.config.optimizer.set_jit(True)
# TODO(hongkuny): remove set_config_v2 globally. # TODO(hongkuny): remove set_config_v2 globally.
set_config_v2 = set_session_config set_config_v2 = set_session_config
def set_gpu_thread_mode_and_count(gpu_thread_mode, def set_gpu_thread_mode_and_count(gpu_thread_mode, datasets_num_private_threads,
datasets_num_private_threads,
num_gpus, per_gpu_thread_count): num_gpus, per_gpu_thread_count):
"""Set GPU thread mode and count, and adjust dataset threads count.""" """Set GPU thread mode and count, and adjust dataset threads count."""
cpu_count = multiprocessing.cpu_count() cpu_count = multiprocessing.cpu_count()
...@@ -190,10 +189,8 @@ def set_gpu_thread_mode_and_count(gpu_thread_mode, ...@@ -190,10 +189,8 @@ def set_gpu_thread_mode_and_count(gpu_thread_mode,
per_gpu_thread_count = per_gpu_thread_count or 2 per_gpu_thread_count = per_gpu_thread_count or 2
os.environ['TF_GPU_THREAD_MODE'] = gpu_thread_mode os.environ['TF_GPU_THREAD_MODE'] = gpu_thread_mode
os.environ['TF_GPU_THREAD_COUNT'] = str(per_gpu_thread_count) os.environ['TF_GPU_THREAD_COUNT'] = str(per_gpu_thread_count)
logging.info('TF_GPU_THREAD_COUNT: %s', logging.info('TF_GPU_THREAD_COUNT: %s', os.environ['TF_GPU_THREAD_COUNT'])
os.environ['TF_GPU_THREAD_COUNT']) logging.info('TF_GPU_THREAD_MODE: %s', os.environ['TF_GPU_THREAD_MODE'])
logging.info('TF_GPU_THREAD_MODE: %s',
os.environ['TF_GPU_THREAD_MODE'])
# Limit data preprocessing threadpool to CPU cores minus number of total GPU # Limit data preprocessing threadpool to CPU cores minus number of total GPU
# private threads and memory copy threads. # private threads and memory copy threads.
...@@ -201,7 +198,6 @@ def set_gpu_thread_mode_and_count(gpu_thread_mode, ...@@ -201,7 +198,6 @@ def set_gpu_thread_mode_and_count(gpu_thread_mode,
num_runtime_threads = num_gpus num_runtime_threads = num_gpus
if not datasets_num_private_threads: if not datasets_num_private_threads:
datasets_num_private_threads = min( datasets_num_private_threads = min(
cpu_count - total_gpu_thread_count - num_runtime_threads, cpu_count - total_gpu_thread_count - num_runtime_threads, num_gpus * 8)
num_gpus * 8)
logging.info('Set datasets_num_private_threads to %s', logging.info('Set datasets_num_private_threads to %s',
datasets_num_private_threads) datasets_num_private_threads)
...@@ -58,9 +58,12 @@ def past_stop_threshold(stop_threshold, eval_metric): ...@@ -58,9 +58,12 @@ def past_stop_threshold(stop_threshold, eval_metric):
return False return False
def generate_synthetic_data( def generate_synthetic_data(input_shape,
input_shape, input_value=0, input_dtype=None, label_shape=None, input_value=0,
label_value=0, label_dtype=None): input_dtype=None,
label_shape=None,
label_value=0,
label_dtype=None):
"""Create a repeating dataset with constant values. """Create a repeating dataset with constant values.
Args: Args:
......
...@@ -51,19 +51,19 @@ class PastStopThresholdTest(tf.test.TestCase): ...@@ -51,19 +51,19 @@ class PastStopThresholdTest(tf.test.TestCase):
def test_past_stop_threshold_not_number(self): def test_past_stop_threshold_not_number(self):
"""Tests for error conditions.""" """Tests for error conditions."""
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
model_helpers.past_stop_threshold("str", 1) model_helpers.past_stop_threshold('str', 1)
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
model_helpers.past_stop_threshold("str", tf.constant(5)) model_helpers.past_stop_threshold('str', tf.constant(5))
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
model_helpers.past_stop_threshold("str", "another") model_helpers.past_stop_threshold('str', 'another')
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
model_helpers.past_stop_threshold(0, None) model_helpers.past_stop_threshold(0, None)
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
model_helpers.past_stop_threshold(0.7, "str") model_helpers.past_stop_threshold(0.7, 'str')
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
model_helpers.past_stop_threshold(tf.constant(4), None) model_helpers.past_stop_threshold(tf.constant(4), None)
...@@ -74,12 +74,13 @@ class SyntheticDataTest(tf.test.TestCase): ...@@ -74,12 +74,13 @@ class SyntheticDataTest(tf.test.TestCase):
def test_generate_synethetic_data(self): def test_generate_synethetic_data(self):
input_element, label_element = tf.compat.v1.data.make_one_shot_iterator( input_element, label_element = tf.compat.v1.data.make_one_shot_iterator(
model_helpers.generate_synthetic_data(input_shape=tf.TensorShape([5]), model_helpers.generate_synthetic_data(
input_value=123, input_shape=tf.TensorShape([5]),
input_dtype=tf.float32, input_value=123,
label_shape=tf.TensorShape([]), input_dtype=tf.float32,
label_value=456, label_shape=tf.TensorShape([]),
label_dtype=tf.int32)).get_next() label_value=456,
label_dtype=tf.int32)).get_next()
with self.session() as sess: with self.session() as sess:
for n in range(5): for n in range(5):
...@@ -102,8 +103,13 @@ class SyntheticDataTest(tf.test.TestCase): ...@@ -102,8 +103,13 @@ class SyntheticDataTest(tf.test.TestCase):
def test_generate_nested_data(self): def test_generate_nested_data(self):
d = model_helpers.generate_synthetic_data( d = model_helpers.generate_synthetic_data(
input_shape={'a': tf.TensorShape([2]), input_shape={
'b': {'c': tf.TensorShape([3]), 'd': tf.TensorShape([])}}, 'a': tf.TensorShape([2]),
'b': {
'c': tf.TensorShape([3]),
'd': tf.TensorShape([])
}
},
input_value=1.1) input_value=1.1)
element = tf.compat.v1.data.make_one_shot_iterator(d).get_next() element = tf.compat.v1.data.make_one_shot_iterator(d).get_next()
...@@ -121,5 +127,5 @@ class SyntheticDataTest(tf.test.TestCase): ...@@ -121,5 +127,5 @@ class SyntheticDataTest(tf.test.TestCase):
self.assertAllClose(inp['b']['d'], 1.1) self.assertAllClose(inp['b']['d'], 1.1)
if __name__ == "__main__": if __name__ == '__main__':
tf.test.main() tf.test.main()
...@@ -31,18 +31,20 @@ class RegistryTest(tf.test.TestCase): ...@@ -31,18 +31,20 @@ class RegistryTest(tf.test.TestCase):
@registry.register(collection, 'functions/func_0') @registry.register(collection, 'functions/func_0')
def func_test(): def func_test():
pass pass
self.assertEqual(
registry.lookup(collection, 'functions/func_0'), func_test) self.assertEqual(registry.lookup(collection, 'functions/func_0'), func_test)
@registry.register(collection, 'classes/cls_0') @registry.register(collection, 'classes/cls_0')
class ClassRegistryKey: class ClassRegistryKey:
pass pass
self.assertEqual( self.assertEqual(
registry.lookup(collection, 'classes/cls_0'), ClassRegistryKey) registry.lookup(collection, 'classes/cls_0'), ClassRegistryKey)
@registry.register(collection, ClassRegistryKey) @registry.register(collection, ClassRegistryKey)
class ClassRegistryValue: class ClassRegistryValue:
pass pass
self.assertEqual( self.assertEqual(
registry.lookup(collection, ClassRegistryKey), ClassRegistryValue) registry.lookup(collection, ClassRegistryKey), ClassRegistryValue)
...@@ -52,12 +54,15 @@ class RegistryTest(tf.test.TestCase): ...@@ -52,12 +54,15 @@ class RegistryTest(tf.test.TestCase):
@registry.register(collection, 'functions/func_0') @registry.register(collection, 'functions/func_0')
def func_test0(): def func_test0():
pass pass
@registry.register(collection, 'func_1') @registry.register(collection, 'func_1')
def func_test1(): def func_test1():
pass pass
@registry.register(collection, func_test1) @registry.register(collection, func_test1)
def func_test2(): def func_test2():
pass pass
expected_collection = { expected_collection = {
'functions': { 'functions': {
'func_0': func_test0, 'func_0': func_test0,
...@@ -73,10 +78,13 @@ class RegistryTest(tf.test.TestCase): ...@@ -73,10 +78,13 @@ class RegistryTest(tf.test.TestCase):
@registry.register(collection, 'functions/func_0') @registry.register(collection, 'functions/func_0')
def func_test0(): # pylint: disable=unused-variable def func_test0(): # pylint: disable=unused-variable
pass pass
with self.assertRaises(KeyError): with self.assertRaises(KeyError):
@registry.register(collection, 'functions/func_0/sub_func') @registry.register(collection, 'functions/func_0/sub_func')
def func_test1(): # pylint: disable=unused-variable def func_test1(): # pylint: disable=unused-variable
pass pass
with self.assertRaises(LookupError): with self.assertRaises(LookupError):
registry.lookup(collection, 'non-exist') registry.lookup(collection, 'non-exist')
......
...@@ -12,8 +12,7 @@ ...@@ -12,8 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# ============================================================================== # ==============================================================================
"""Helper code to run complete models from within python. """Helper code to run complete models from within python."""
"""
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
...@@ -31,7 +30,11 @@ from official.utils.flags import core as flags_core ...@@ -31,7 +30,11 @@ from official.utils.flags import core as flags_core
@flagsaver.flagsaver @flagsaver.flagsaver
def run_synthetic(main, tmp_root, extra_flags=None, synth=True, train_epochs=1, def run_synthetic(main,
tmp_root,
extra_flags=None,
synth=True,
train_epochs=1,
epochs_between_evals=1): epochs_between_evals=1):
"""Performs a minimal run of a model. """Performs a minimal run of a model.
......
...@@ -19,6 +19,7 @@ from __future__ import division ...@@ -19,6 +19,7 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import collections import collections
import tensorflow as tf import tensorflow as tf
from official.vision.detection.utils.object_detection import argmax_matcher from official.vision.detection.utils.object_detection import argmax_matcher
from official.vision.detection.utils.object_detection import balanced_positive_negative_sampler from official.vision.detection.utils.object_detection import balanced_positive_negative_sampler
...@@ -31,30 +32,25 @@ from official.vision.detection.utils.object_detection import target_assigner ...@@ -31,30 +32,25 @@ from official.vision.detection.utils.object_detection import target_assigner
class Anchor(object): class Anchor(object):
"""Anchor class for anchor-based object detectors.""" """Anchor class for anchor-based object detectors."""
def __init__(self, def __init__(self, min_level, max_level, num_scales, aspect_ratios,
min_level, anchor_size, image_size):
max_level,
num_scales,
aspect_ratios,
anchor_size,
image_size):
"""Constructs multiscale anchors. """Constructs multiscale anchors.
Args: Args:
min_level: integer number of minimum level of the output feature pyramid. min_level: integer number of minimum level of the output feature pyramid.
max_level: integer number of maximum level of the output feature pyramid. max_level: integer number of maximum level of the output feature pyramid.
num_scales: integer number representing intermediate scales added num_scales: integer number representing intermediate scales added on each
on each level. For instances, num_scales=2 adds one additional level. For instances, num_scales=2 adds one additional intermediate
intermediate anchor scales [2^0, 2^0.5] on each level. anchor scales [2^0, 2^0.5] on each level.
aspect_ratios: list of float numbers representing the aspect ratio anchors aspect_ratios: list of float numbers representing the aspect ratio anchors
added on each level. The number indicates the ratio of width to height. added on each level. The number indicates the ratio of width to height.
For instances, aspect_ratios=[1.0, 2.0, 0.5] adds three anchors on each For instances, aspect_ratios=[1.0, 2.0, 0.5] adds three anchors on each
scale level. scale level.
anchor_size: float number representing the scale of size of the base anchor_size: float number representing the scale of size of the base
anchor to the feature stride 2^level. anchor to the feature stride 2^level.
image_size: a list of integer numbers or Tensors representing image_size: a list of integer numbers or Tensors representing [height,
[height, width] of the input image size.The image_size should be width] of the input image size.The image_size should be divisible by the
divisible by the largest feature stride 2^max_level. largest feature stride 2^max_level.
""" """
self.min_level = min_level self.min_level = min_level
self.max_level = max_level self.max_level = max_level
...@@ -76,11 +72,11 @@ class Anchor(object): ...@@ -76,11 +72,11 @@ class Anchor(object):
boxes_l = [] boxes_l = []
for scale in range(self.num_scales): for scale in range(self.num_scales):
for aspect_ratio in self.aspect_ratios: for aspect_ratio in self.aspect_ratios:
stride = 2 ** level stride = 2**level
intermediate_scale = 2 ** (scale / float(self.num_scales)) intermediate_scale = 2**(scale / float(self.num_scales))
base_anchor_size = self.anchor_size * stride * intermediate_scale base_anchor_size = self.anchor_size * stride * intermediate_scale
aspect_x = aspect_ratio ** 0.5 aspect_x = aspect_ratio**0.5
aspect_y = aspect_ratio ** -0.5 aspect_y = aspect_ratio**-0.5
half_anchor_size_x = base_anchor_size * aspect_x / 2.0 half_anchor_size_x = base_anchor_size * aspect_x / 2.0
half_anchor_size_y = base_anchor_size * aspect_y / 2.0 half_anchor_size_y = base_anchor_size * aspect_y / 2.0
x = tf.range(stride / 2, self.image_size[1], stride) x = tf.range(stride / 2, self.image_size[1], stride)
...@@ -89,8 +85,10 @@ class Anchor(object): ...@@ -89,8 +85,10 @@ class Anchor(object):
xv = tf.cast(tf.reshape(xv, [-1]), dtype=tf.float32) xv = tf.cast(tf.reshape(xv, [-1]), dtype=tf.float32)
yv = tf.cast(tf.reshape(yv, [-1]), dtype=tf.float32) yv = tf.cast(tf.reshape(yv, [-1]), dtype=tf.float32)
# Tensor shape Nx4. # Tensor shape Nx4.
boxes = tf.stack([yv - half_anchor_size_y, xv - half_anchor_size_x, boxes = tf.stack([
yv + half_anchor_size_y, xv + half_anchor_size_x], yv - half_anchor_size_y, xv - half_anchor_size_x,
yv + half_anchor_size_y, xv + half_anchor_size_x
],
axis=1) axis=1)
boxes_l.append(boxes) boxes_l.append(boxes)
# Concat anchors on the same level to tensor shape NxAx4. # Concat anchors on the same level to tensor shape NxAx4.
...@@ -104,11 +102,11 @@ class Anchor(object): ...@@ -104,11 +102,11 @@ class Anchor(object):
unpacked_labels = collections.OrderedDict() unpacked_labels = collections.OrderedDict()
count = 0 count = 0
for level in range(self.min_level, self.max_level + 1): for level in range(self.min_level, self.max_level + 1):
feat_size_y = tf.cast(self.image_size[0] / 2 ** level, tf.int32) feat_size_y = tf.cast(self.image_size[0] / 2**level, tf.int32)
feat_size_x = tf.cast(self.image_size[1] / 2 ** level, tf.int32) feat_size_x = tf.cast(self.image_size[1] / 2**level, tf.int32)
steps = feat_size_y * feat_size_x * self.anchors_per_location steps = feat_size_y * feat_size_x * self.anchors_per_location
unpacked_labels[level] = tf.reshape( unpacked_labels[level] = tf.reshape(labels[count:count + steps],
labels[count:count + steps], [feat_size_y, feat_size_x, -1]) [feat_size_y, feat_size_x, -1])
count += steps count += steps
return unpacked_labels return unpacked_labels
...@@ -124,10 +122,7 @@ class Anchor(object): ...@@ -124,10 +122,7 @@ class Anchor(object):
class AnchorLabeler(object): class AnchorLabeler(object):
"""Labeler for dense object detector.""" """Labeler for dense object detector."""
def __init__(self, def __init__(self, anchor, match_threshold=0.5, unmatched_threshold=0.5):
anchor,
match_threshold=0.5,
unmatched_threshold=0.5):
"""Constructs anchor labeler to assign labels to anchors. """Constructs anchor labeler to assign labels to anchors.
Args: Args:
...@@ -161,6 +156,7 @@ class AnchorLabeler(object): ...@@ -161,6 +156,7 @@ class AnchorLabeler(object):
For each row, it stores [y0, x0, y1, x1] for four corners of a box. For each row, it stores [y0, x0, y1, x1] for four corners of a box.
gt_labels: A integer tensor with shape [N, 1] representing groundtruth gt_labels: A integer tensor with shape [N, 1] representing groundtruth
classes. classes.
Returns: Returns:
cls_targets_dict: ordered dictionary with keys cls_targets_dict: ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with [min_level, min_level+1, ..., max_level]. The values are tensor with
...@@ -205,11 +201,14 @@ class AnchorLabeler(object): ...@@ -205,11 +201,14 @@ class AnchorLabeler(object):
class RpnAnchorLabeler(AnchorLabeler): class RpnAnchorLabeler(AnchorLabeler):
"""Labeler for Region Proposal Network.""" """Labeler for Region Proposal Network."""
def __init__(self, anchor, match_threshold=0.7, def __init__(self,
unmatched_threshold=0.3, rpn_batch_size_per_im=256, anchor,
match_threshold=0.7,
unmatched_threshold=0.3,
rpn_batch_size_per_im=256,
rpn_fg_fraction=0.5): rpn_fg_fraction=0.5):
AnchorLabeler.__init__(self, anchor, match_threshold=0.7, AnchorLabeler.__init__(
unmatched_threshold=0.3) self, anchor, match_threshold=0.7, unmatched_threshold=0.3)
self._rpn_batch_size_per_im = rpn_batch_size_per_im self._rpn_batch_size_per_im = rpn_batch_size_per_im
self._rpn_fg_fraction = rpn_fg_fraction self._rpn_fg_fraction = rpn_fg_fraction
...@@ -219,11 +218,12 @@ class RpnAnchorLabeler(AnchorLabeler): ...@@ -219,11 +218,12 @@ class RpnAnchorLabeler(AnchorLabeler):
This function performs subsampling for foreground (fg) and background (bg) This function performs subsampling for foreground (fg) and background (bg)
anchors. anchors.
Args: Args:
match_results: A integer tensor with shape [N] representing the match_results: A integer tensor with shape [N] representing the matching
matching results of anchors. (1) match_results[i]>=0, results of anchors. (1) match_results[i]>=0, meaning that column i is
meaning that column i is matched with row match_results[i]. matched with row match_results[i]. (2) match_results[i]=-1, meaning that
(2) match_results[i]=-1, meaning that column i is not matched. column i is not matched. (3) match_results[i]=-2, meaning that column i
(3) match_results[i]=-2, meaning that column i is ignored. is ignored.
Returns: Returns:
score_targets: a integer tensor with the a shape of [N]. score_targets: a integer tensor with the a shape of [N].
(1) score_targets[i]=1, the anchor is a positive sample. (1) score_targets[i]=1, the anchor is a positive sample.
...@@ -241,8 +241,7 @@ class RpnAnchorLabeler(AnchorLabeler): ...@@ -241,8 +241,7 @@ class RpnAnchorLabeler(AnchorLabeler):
indicator = tf.greater(match_results, -2) indicator = tf.greater(match_results, -2)
labels = tf.greater(match_results, -1) labels = tf.greater(match_results, -1)
samples = sampler.subsample( samples = sampler.subsample(indicator, self._rpn_batch_size_per_im, labels)
indicator, self._rpn_batch_size_per_im, labels)
positive_labels = tf.where( positive_labels = tf.where(
tf.logical_and(samples, labels), tf.logical_and(samples, labels),
tf.constant(2, dtype=tf.int32, shape=match_results.shape), tf.constant(2, dtype=tf.int32, shape=match_results.shape),
...@@ -253,8 +252,8 @@ class RpnAnchorLabeler(AnchorLabeler): ...@@ -253,8 +252,8 @@ class RpnAnchorLabeler(AnchorLabeler):
tf.constant(0, dtype=tf.int32, shape=match_results.shape)) tf.constant(0, dtype=tf.int32, shape=match_results.shape))
ignore_labels = tf.fill(match_results.shape, -1) ignore_labels = tf.fill(match_results.shape, -1)
return (ignore_labels + positive_labels + negative_labels, return (ignore_labels + positive_labels + negative_labels, positive_labels,
positive_labels, negative_labels) negative_labels)
def label_anchors(self, gt_boxes, gt_labels): def label_anchors(self, gt_boxes, gt_labels):
"""Labels anchors with ground truth inputs. """Labels anchors with ground truth inputs.
...@@ -264,6 +263,7 @@ class RpnAnchorLabeler(AnchorLabeler): ...@@ -264,6 +263,7 @@ class RpnAnchorLabeler(AnchorLabeler):
For each row, it stores [y0, x0, y1, x1] for four corners of a box. For each row, it stores [y0, x0, y1, x1] for four corners of a box.
gt_labels: A integer tensor with shape [N, 1] representing groundtruth gt_labels: A integer tensor with shape [N, 1] representing groundtruth
classes. classes.
Returns: Returns:
score_targets_dict: ordered dictionary with keys score_targets_dict: ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with [min_level, min_level+1, ..., max_level]. The values are tensor with
......
...@@ -91,7 +91,8 @@ class InputFn(object): ...@@ -91,7 +91,8 @@ class InputFn(object):
dataset = dataset.repeat() dataset = dataset.repeat()
dataset = dataset.interleave( dataset = dataset.interleave(
map_func=self._dataset_fn, cycle_length=32, map_func=self._dataset_fn,
cycle_length=32,
num_parallel_calls=tf.data.experimental.AUTOTUNE) num_parallel_calls=tf.data.experimental.AUTOTUNE)
if self._is_training: if self._is_training:
......
...@@ -79,9 +79,9 @@ class Parser(object): ...@@ -79,9 +79,9 @@ class Parser(object):
output_size should be divided by the largest feature stride 2^max_level. output_size should be divided by the largest feature stride 2^max_level.
min_level: `int` number of minimum level of the output feature pyramid. min_level: `int` number of minimum level of the output feature pyramid.
max_level: `int` number of maximum level of the output feature pyramid. max_level: `int` number of maximum level of the output feature pyramid.
num_scales: `int` number representing intermediate scales added num_scales: `int` number representing intermediate scales added on each
on each level. For instances, num_scales=2 adds one additional level. For instances, num_scales=2 adds one additional intermediate
intermediate anchor scales [2^0, 2^0.5] on each level. anchor scales [2^0, 2^0.5] on each level.
aspect_ratios: `list` of float numbers representing the aspect raito aspect_ratios: `list` of float numbers representing the aspect raito
anchors added on each level. The number indicates the ratio of width to anchors added on each level. The number indicates the ratio of width to
height. For instances, aspect_ratios=[1.0, 2.0, 0.5] adds three anchors height. For instances, aspect_ratios=[1.0, 2.0, 0.5] adds three anchors
...@@ -94,8 +94,8 @@ class Parser(object): ...@@ -94,8 +94,8 @@ class Parser(object):
unmatched_threshold: `float` number between 0 and 1 representing the unmatched_threshold: `float` number between 0 and 1 representing the
upper-bound threshold to assign negative labels for anchors. An anchor upper-bound threshold to assign negative labels for anchors. An anchor
with a score below the threshold is labeled negative. with a score below the threshold is labeled negative.
aug_rand_hflip: `bool`, if True, augment training with random aug_rand_hflip: `bool`, if True, augment training with random horizontal
horizontal flip. flip.
aug_scale_min: `float`, the minimum scale applied to `output_size` for aug_scale_min: `float`, the minimum scale applied to `output_size` for
data augmentation during training. data augmentation during training.
aug_scale_max: `float`, the maximum scale applied to `output_size` for aug_scale_max: `float`, the maximum scale applied to `output_size` for
...@@ -109,8 +109,8 @@ class Parser(object): ...@@ -109,8 +109,8 @@ class Parser(object):
max_num_instances: `int` number of maximum number of instances in an max_num_instances: `int` number of maximum number of instances in an
image. The groundtruth data will be padded to `max_num_instances`. image. The groundtruth data will be padded to `max_num_instances`.
use_bfloat16: `bool`, if True, cast output image to tf.bfloat16. use_bfloat16: `bool`, if True, cast output image to tf.bfloat16.
mode: a ModeKeys. Specifies if this is training, evaluation, prediction mode: a ModeKeys. Specifies if this is training, evaluation, prediction or
or prediction with groundtruths in the outputs. prediction with groundtruths in the outputs.
""" """
self._mode = mode self._mode = mode
self._max_num_instances = max_num_instances self._max_num_instances = max_num_instances
...@@ -232,8 +232,8 @@ class Parser(object): ...@@ -232,8 +232,8 @@ class Parser(object):
image, image_info = input_utils.resize_and_crop_image( image, image_info = input_utils.resize_and_crop_image(
image, image,
self._output_size, self._output_size,
padded_size=input_utils.compute_padded_size( padded_size=input_utils.compute_padded_size(self._output_size,
self._output_size, 2 ** self._max_level), 2**self._max_level),
aug_scale_min=self._aug_scale_min, aug_scale_min=self._aug_scale_min,
aug_scale_max=self._aug_scale_max) aug_scale_max=self._aug_scale_max)
image_height, image_width, _ = image.get_shape().as_list() image_height, image_width, _ = image.get_shape().as_list()
...@@ -241,22 +241,21 @@ class Parser(object): ...@@ -241,22 +241,21 @@ class Parser(object):
# Resizes and crops boxes. # Resizes and crops boxes.
image_scale = image_info[2, :] image_scale = image_info[2, :]
offset = image_info[3, :] offset = image_info[3, :]
boxes = input_utils.resize_and_crop_boxes( boxes = input_utils.resize_and_crop_boxes(boxes, image_scale,
boxes, image_scale, image_info[1, :], offset) image_info[1, :], offset)
# Filters out ground truth boxes that are all zeros. # Filters out ground truth boxes that are all zeros.
indices = box_utils.get_non_empty_box_indices(boxes) indices = box_utils.get_non_empty_box_indices(boxes)
boxes = tf.gather(boxes, indices) boxes = tf.gather(boxes, indices)
classes = tf.gather(classes, indices) classes = tf.gather(classes, indices)
# Assigns anchors. # Assigns anchors.
input_anchor = anchor.Anchor( input_anchor = anchor.Anchor(self._min_level, self._max_level,
self._min_level, self._max_level, self._num_scales, self._num_scales, self._aspect_ratios,
self._aspect_ratios, self._anchor_size, (image_height, image_width)) self._anchor_size, (image_height, image_width))
anchor_labeler = anchor.AnchorLabeler( anchor_labeler = anchor.AnchorLabeler(input_anchor, self._match_threshold,
input_anchor, self._match_threshold, self._unmatched_threshold) self._unmatched_threshold)
(cls_targets, box_targets, num_positives) = anchor_labeler.label_anchors( (cls_targets, box_targets, num_positives) = anchor_labeler.label_anchors(
boxes, boxes, tf.cast(tf.expand_dims(classes, axis=1), tf.float32))
tf.cast(tf.expand_dims(classes, axis=1), tf.float32))
# If bfloat16 is used, casts input image to tf.bfloat16. # If bfloat16 is used, casts input image to tf.bfloat16.
if self._use_bfloat16: if self._use_bfloat16:
...@@ -292,8 +291,8 @@ class Parser(object): ...@@ -292,8 +291,8 @@ class Parser(object):
image, image_info = input_utils.resize_and_crop_image( image, image_info = input_utils.resize_and_crop_image(
image, image,
self._output_size, self._output_size,
padded_size=input_utils.compute_padded_size( padded_size=input_utils.compute_padded_size(self._output_size,
self._output_size, 2 ** self._max_level), 2**self._max_level),
aug_scale_min=1.0, aug_scale_min=1.0,
aug_scale_max=1.0) aug_scale_max=1.0)
image_height, image_width, _ = image.get_shape().as_list() image_height, image_width, _ = image.get_shape().as_list()
...@@ -301,22 +300,21 @@ class Parser(object): ...@@ -301,22 +300,21 @@ class Parser(object):
# Resizes and crops boxes. # Resizes and crops boxes.
image_scale = image_info[2, :] image_scale = image_info[2, :]
offset = image_info[3, :] offset = image_info[3, :]
boxes = input_utils.resize_and_crop_boxes( boxes = input_utils.resize_and_crop_boxes(boxes, image_scale,
boxes, image_scale, image_info[1, :], offset) image_info[1, :], offset)
# Filters out ground truth boxes that are all zeros. # Filters out ground truth boxes that are all zeros.
indices = box_utils.get_non_empty_box_indices(boxes) indices = box_utils.get_non_empty_box_indices(boxes)
boxes = tf.gather(boxes, indices) boxes = tf.gather(boxes, indices)
classes = tf.gather(classes, indices) classes = tf.gather(classes, indices)
# Assigns anchors. # Assigns anchors.
input_anchor = anchor.Anchor( input_anchor = anchor.Anchor(self._min_level, self._max_level,
self._min_level, self._max_level, self._num_scales, self._num_scales, self._aspect_ratios,
self._aspect_ratios, self._anchor_size, (image_height, image_width)) self._anchor_size, (image_height, image_width))
anchor_labeler = anchor.AnchorLabeler( anchor_labeler = anchor.AnchorLabeler(input_anchor, self._match_threshold,
input_anchor, self._match_threshold, self._unmatched_threshold) self._unmatched_threshold)
(cls_targets, box_targets, num_positives) = anchor_labeler.label_anchors( (cls_targets, box_targets, num_positives) = anchor_labeler.label_anchors(
boxes, boxes, tf.cast(tf.expand_dims(classes, axis=1), tf.float32))
tf.cast(tf.expand_dims(classes, axis=1), tf.float32))
# If bfloat16 is used, casts input image to tf.bfloat16. # If bfloat16 is used, casts input image to tf.bfloat16.
if self._use_bfloat16: if self._use_bfloat16:
...@@ -324,18 +322,24 @@ class Parser(object): ...@@ -324,18 +322,24 @@ class Parser(object):
# Sets up groundtruth data for evaluation. # Sets up groundtruth data for evaluation.
groundtruths = { groundtruths = {
'source_id': data['source_id'], 'source_id':
'num_groundtrtuhs': tf.shape(data['groundtruth_classes']), data['source_id'],
'image_info': image_info, 'num_groundtrtuhs':
'boxes': box_utils.denormalize_boxes( tf.shape(data['groundtruth_classes']),
data['groundtruth_boxes'], image_shape), 'image_info':
'classes': data['groundtruth_classes'], image_info,
'areas': data['groundtruth_area'], 'boxes':
'is_crowds': tf.cast(data['groundtruth_is_crowd'], tf.int32), box_utils.denormalize_boxes(data['groundtruth_boxes'], image_shape),
'classes':
data['groundtruth_classes'],
'areas':
data['groundtruth_area'],
'is_crowds':
tf.cast(data['groundtruth_is_crowd'], tf.int32),
} }
groundtruths['source_id'] = process_source_id(groundtruths['source_id']) groundtruths['source_id'] = process_source_id(groundtruths['source_id'])
groundtruths = pad_groundtruths_to_fixed_size( groundtruths = pad_groundtruths_to_fixed_size(groundtruths,
groundtruths, self._max_num_instances) self._max_num_instances)
# Packs labels for model_fn outputs. # Packs labels for model_fn outputs.
labels = { labels = {
...@@ -361,8 +365,8 @@ class Parser(object): ...@@ -361,8 +365,8 @@ class Parser(object):
image, image_info = input_utils.resize_and_crop_image( image, image_info = input_utils.resize_and_crop_image(
image, image,
self._output_size, self._output_size,
padded_size=input_utils.compute_padded_size( padded_size=input_utils.compute_padded_size(self._output_size,
self._output_size, 2 ** self._max_level), 2**self._max_level),
aug_scale_min=1.0, aug_scale_min=1.0,
aug_scale_max=1.0) aug_scale_max=1.0)
image_height, image_width, _ = image.get_shape().as_list() image_height, image_width, _ = image.get_shape().as_list()
...@@ -372,9 +376,9 @@ class Parser(object): ...@@ -372,9 +376,9 @@ class Parser(object):
image = tf.cast(image, dtype=tf.bfloat16) image = tf.cast(image, dtype=tf.bfloat16)
# Compute Anchor boxes. # Compute Anchor boxes.
input_anchor = anchor.Anchor( input_anchor = anchor.Anchor(self._min_level, self._max_level,
self._min_level, self._max_level, self._num_scales, self._num_scales, self._aspect_ratios,
self._aspect_ratios, self._anchor_size, (image_height, image_width)) self._anchor_size, (image_height, image_width))
labels = { labels = {
'anchor_boxes': input_anchor.multilevel_boxes, 'anchor_boxes': input_anchor.multilevel_boxes,
...@@ -384,8 +388,8 @@ class Parser(object): ...@@ -384,8 +388,8 @@ class Parser(object):
# in labels. # in labels.
if self._mode == ModeKeys.PREDICT_WITH_GT: if self._mode == ModeKeys.PREDICT_WITH_GT:
# Converts boxes from normalized coordinates to pixel coordinates. # Converts boxes from normalized coordinates to pixel coordinates.
boxes = box_utils.denormalize_boxes( boxes = box_utils.denormalize_boxes(data['groundtruth_boxes'],
data['groundtruth_boxes'], image_shape) image_shape)
groundtruths = { groundtruths = {
'source_id': data['source_id'], 'source_id': data['source_id'],
'num_detections': tf.shape(data['groundtruth_classes']), 'num_detections': tf.shape(data['groundtruth_classes']),
...@@ -395,8 +399,8 @@ class Parser(object): ...@@ -395,8 +399,8 @@ class Parser(object):
'is_crowds': tf.cast(data['groundtruth_is_crowd'], tf.int32), 'is_crowds': tf.cast(data['groundtruth_is_crowd'], tf.int32),
} }
groundtruths['source_id'] = process_source_id(groundtruths['source_id']) groundtruths['source_id'] = process_source_id(groundtruths['source_id'])
groundtruths = pad_groundtruths_to_fixed_size( groundtruths = pad_groundtruths_to_fixed_size(groundtruths,
groundtruths, self._max_num_instances) self._max_num_instances)
labels['groundtruths'] = groundtruths labels['groundtruths'] = groundtruths
# Computes training objective for evaluation loss. # Computes training objective for evaluation loss.
...@@ -404,18 +408,17 @@ class Parser(object): ...@@ -404,18 +408,17 @@ class Parser(object):
image_scale = image_info[2, :] image_scale = image_info[2, :]
offset = image_info[3, :] offset = image_info[3, :]
boxes = input_utils.resize_and_crop_boxes( boxes = input_utils.resize_and_crop_boxes(boxes, image_scale,
boxes, image_scale, image_info[1, :], offset) image_info[1, :], offset)
# Filters out ground truth boxes that are all zeros. # Filters out ground truth boxes that are all zeros.
indices = box_utils.get_non_empty_box_indices(boxes) indices = box_utils.get_non_empty_box_indices(boxes)
boxes = tf.gather(boxes, indices) boxes = tf.gather(boxes, indices)
# Assigns anchors. # Assigns anchors.
anchor_labeler = anchor.AnchorLabeler( anchor_labeler = anchor.AnchorLabeler(input_anchor, self._match_threshold,
input_anchor, self._match_threshold, self._unmatched_threshold) self._unmatched_threshold)
(cls_targets, box_targets, num_positives) = anchor_labeler.label_anchors( (cls_targets, box_targets, num_positives) = anchor_labeler.label_anchors(
boxes, boxes, tf.cast(tf.expand_dims(classes, axis=1), tf.float32))
tf.cast(tf.expand_dims(classes, axis=1), tf.float32))
labels['cls_targets'] = cls_targets labels['cls_targets'] = cls_targets
labels['box_targets'] = box_targets labels['box_targets'] = box_targets
labels['num_positives'] = num_positives labels['num_positives'] = num_positives
......
...@@ -21,7 +21,6 @@ Weicheng Kuo, Anelia Angelova, Jitendra Malik, Tsung-Yi Lin ...@@ -21,7 +21,6 @@ Weicheng Kuo, Anelia Angelova, Jitendra Malik, Tsung-Yi Lin
ShapeMask: Learning to Segment Novel Objects by Refining Shape Priors. ShapeMask: Learning to Segment Novel Objects by Refining Shape Priors.
arXiv:1904.03239. arXiv:1904.03239.
""" """
import tensorflow as tf import tensorflow as tf
from official.vision.detection.dataloader import anchor from official.vision.detection.dataloader import anchor
......
...@@ -32,6 +32,7 @@ from __future__ import print_function ...@@ -32,6 +32,7 @@ from __future__ import print_function
import atexit import atexit
import tempfile import tempfile
import numpy as np import numpy as np
from absl import logging from absl import logging
from pycocotools import cocoeval from pycocotools import cocoeval
...@@ -197,22 +198,21 @@ class COCOEvaluator(object): ...@@ -197,22 +198,21 @@ class COCOEvaluator(object):
"""Update and aggregate detection results and groundtruth data. """Update and aggregate detection results and groundtruth data.
Args: Args:
predictions: a dictionary of numpy arrays including the fields below. predictions: a dictionary of numpy arrays including the fields below. See
See different parsers under `../dataloader` for more details. different parsers under `../dataloader` for more details.
Required fields: Required fields:
- source_id: a numpy array of int or string of shape [batch_size]. - source_id: a numpy array of int or string of shape [batch_size].
- image_info [if `need_rescale_bboxes` is True]: a numpy array of - image_info [if `need_rescale_bboxes` is True]: a numpy array of
float of shape [batch_size, 4, 2]. float of shape [batch_size, 4, 2].
- num_detections: a numpy array of - num_detections: a numpy array of int of shape [batch_size].
int of shape [batch_size].
- detection_boxes: a numpy array of float of shape [batch_size, K, 4]. - detection_boxes: a numpy array of float of shape [batch_size, K, 4].
- detection_classes: a numpy array of int of shape [batch_size, K]. - detection_classes: a numpy array of int of shape [batch_size, K].
- detection_scores: a numpy array of float of shape [batch_size, K]. - detection_scores: a numpy array of float of shape [batch_size, K].
Optional fields: Optional fields:
- detection_masks: a numpy array of float of shape - detection_masks: a numpy array of float of shape [batch_size, K,
[batch_size, K, mask_height, mask_width]. mask_height, mask_width].
groundtruths: a dictionary of numpy arrays including the fields below. groundtruths: a dictionary of numpy arrays including the fields below. See
See also different parsers under `../dataloader` for more details. also different parsers under `../dataloader` for more details.
Required fields: Required fields:
- source_id: a numpy array of int or string of shape [batch_size]. - source_id: a numpy array of int or string of shape [batch_size].
- height: a numpy array of int of shape [batch_size]. - height: a numpy array of int of shape [batch_size].
...@@ -222,12 +222,12 @@ class COCOEvaluator(object): ...@@ -222,12 +222,12 @@ class COCOEvaluator(object):
- classes: a numpy array of int of shape [batch_size, K]. - classes: a numpy array of int of shape [batch_size, K].
Optional fields: Optional fields:
- is_crowds: a numpy array of int of shape [batch_size, K]. If the - is_crowds: a numpy array of int of shape [batch_size, K]. If the
field is absent, it is assumed that this instance is not crowd. field is absent, it is assumed that this instance is not crowd.
- areas: a numy array of float of shape [batch_size, K]. If the - areas: a numy array of float of shape [batch_size, K]. If the field
field is absent, the area is calculated using either boxes or is absent, the area is calculated using either boxes or masks
masks depending on which one is available. depending on which one is available.
- masks: a numpy array of float of shape - masks: a numpy array of float of shape [batch_size, K, mask_height,
[batch_size, K, mask_height, mask_width], mask_width],
Raises: Raises:
ValueError: if the required prediction or groundtruth fields are not ValueError: if the required prediction or groundtruth fields are not
...@@ -318,8 +318,7 @@ class ShapeMaskCOCOEvaluator(COCOEvaluator): ...@@ -318,8 +318,7 @@ class ShapeMaskCOCOEvaluator(COCOEvaluator):
metrics = np.hstack((coco_metrics, mcoco_eval.stats)) metrics = np.hstack((coco_metrics, mcoco_eval.stats))
else: else:
mask_coco_metrics = mcoco_eval.category_stats mask_coco_metrics = mcoco_eval.category_stats
val_catg_idx = np.isin(mcoco_eval.params.catIds, val_catg_idx = np.isin(mcoco_eval.params.catIds, self._eval_categories)
self._eval_categories)
# Gather the valid evaluation of the eval categories. # Gather the valid evaluation of the eval categories.
if np.any(val_catg_idx): if np.any(val_catg_idx):
mean_val_metrics = [] mean_val_metrics = []
......
...@@ -23,6 +23,7 @@ import functools ...@@ -23,6 +23,7 @@ import functools
import pprint import pprint
# pylint: disable=g-bad-import-order # pylint: disable=g-bad-import-order
# Import libraries
import tensorflow as tf import tensorflow as tf
from absl import app from absl import app
......
...@@ -31,17 +31,17 @@ from official.vision.detection.ops import spatial_transform_ops ...@@ -31,17 +31,17 @@ from official.vision.detection.ops import spatial_transform_ops
class RpnHead(tf.keras.layers.Layer): class RpnHead(tf.keras.layers.Layer):
"""Region Proposal Network head.""" """Region Proposal Network head."""
def __init__(self, def __init__(
min_level, self,
max_level, min_level,
anchors_per_location, max_level,
num_convs=2, anchors_per_location,
num_filters=256, num_convs=2,
use_separable_conv=False, num_filters=256,
activation='relu', use_separable_conv=False,
use_batch_norm=True, activation='relu',
norm_activation=nn_ops.norm_activation_builder( use_batch_norm=True,
activation='relu')): norm_activation=nn_ops.norm_activation_builder(activation='relu')):
"""Initialize params to build Region Proposal Network head. """Initialize params to build Region Proposal Network head.
Args: Args:
...@@ -57,8 +57,8 @@ class RpnHead(tf.keras.layers.Layer): ...@@ -57,8 +57,8 @@ class RpnHead(tf.keras.layers.Layer):
is used. is used.
activation: activation function. Support 'relu' and 'swish'. activation: activation function. Support 'relu' and 'swish'.
use_batch_norm: 'bool', indicating whether batchnorm layers are added. use_batch_norm: 'bool', indicating whether batchnorm layers are added.
norm_activation: an operation that includes a normalization layer norm_activation: an operation that includes a normalization layer followed
followed by an optional activation layer. by an optional activation layer.
""" """
self._min_level = min_level self._min_level = min_level
self._max_level = max_level self._max_level = max_level
...@@ -140,17 +140,17 @@ class RpnHead(tf.keras.layers.Layer): ...@@ -140,17 +140,17 @@ class RpnHead(tf.keras.layers.Layer):
class FastrcnnHead(tf.keras.layers.Layer): class FastrcnnHead(tf.keras.layers.Layer):
"""Fast R-CNN box head.""" """Fast R-CNN box head."""
def __init__(self, def __init__(
num_classes, self,
num_convs=0, num_classes,
num_filters=256, num_convs=0,
use_separable_conv=False, num_filters=256,
num_fcs=2, use_separable_conv=False,
fc_dims=1024, num_fcs=2,
activation='relu', fc_dims=1024,
use_batch_norm=True, activation='relu',
norm_activation=nn_ops.norm_activation_builder( use_batch_norm=True,
activation='relu')): norm_activation=nn_ops.norm_activation_builder(activation='relu')):
"""Initialize params to build Fast R-CNN box head. """Initialize params to build Fast R-CNN box head.
Args: Args:
...@@ -167,8 +167,8 @@ class FastrcnnHead(tf.keras.layers.Layer): ...@@ -167,8 +167,8 @@ class FastrcnnHead(tf.keras.layers.Layer):
layers. layers.
activation: activation function. Support 'relu' and 'swish'. activation: activation function. Support 'relu' and 'swish'.
use_batch_norm: 'bool', indicating whether batchnorm layers are added. use_batch_norm: 'bool', indicating whether batchnorm layers are added.
norm_activation: an operation that includes a normalization layer norm_activation: an operation that includes a normalization layer followed
followed by an optional activation layer. by an optional activation layer.
""" """
self._num_classes = num_classes self._num_classes = num_classes
...@@ -207,7 +207,8 @@ class FastrcnnHead(tf.keras.layers.Layer): ...@@ -207,7 +207,8 @@ class FastrcnnHead(tf.keras.layers.Layer):
strides=(1, 1), strides=(1, 1),
padding='same', padding='same',
dilation_rate=(1, 1), dilation_rate=(1, 1),
activation=(None if self._use_batch_norm else self._activation_op), activation=(None
if self._use_batch_norm else self._activation_op),
name='conv_{}'.format(i))) name='conv_{}'.format(i)))
if self._use_batch_norm: if self._use_batch_norm:
self._conv_bn_ops.append(self._norm_activation()) self._conv_bn_ops.append(self._norm_activation())
...@@ -218,7 +219,8 @@ class FastrcnnHead(tf.keras.layers.Layer): ...@@ -218,7 +219,8 @@ class FastrcnnHead(tf.keras.layers.Layer):
self._fc_ops.append( self._fc_ops.append(
tf.keras.layers.Dense( tf.keras.layers.Dense(
units=self._fc_dims, units=self._fc_dims,
activation=(None if self._use_batch_norm else self._activation_op), activation=(None
if self._use_batch_norm else self._activation_op),
name='fc{}'.format(i))) name='fc{}'.format(i)))
if self._use_batch_norm: if self._use_batch_norm:
self._fc_bn_ops.append(self._norm_activation(fused=False)) self._fc_bn_ops.append(self._norm_activation(fused=False))
...@@ -238,8 +240,8 @@ class FastrcnnHead(tf.keras.layers.Layer): ...@@ -238,8 +240,8 @@ class FastrcnnHead(tf.keras.layers.Layer):
"""Box and class branches for the Mask-RCNN model. """Box and class branches for the Mask-RCNN model.
Args: Args:
roi_features: A ROI feature tensor of shape roi_features: A ROI feature tensor of shape [batch_size, num_rois,
[batch_size, num_rois, height_l, width_l, num_filters]. height_l, width_l, num_filters].
is_training: `boolean`, if True if model is in training mode. is_training: `boolean`, if True if model is in training mode.
Returns: Returns:
...@@ -277,16 +279,16 @@ class FastrcnnHead(tf.keras.layers.Layer): ...@@ -277,16 +279,16 @@ class FastrcnnHead(tf.keras.layers.Layer):
class MaskrcnnHead(tf.keras.layers.Layer): class MaskrcnnHead(tf.keras.layers.Layer):
"""Mask R-CNN head.""" """Mask R-CNN head."""
def __init__(self, def __init__(
num_classes, self,
mask_target_size, num_classes,
num_convs=4, mask_target_size,
num_filters=256, num_convs=4,
use_separable_conv=False, num_filters=256,
activation='relu', use_separable_conv=False,
use_batch_norm=True, activation='relu',
norm_activation=nn_ops.norm_activation_builder( use_batch_norm=True,
activation='relu')): norm_activation=nn_ops.norm_activation_builder(activation='relu')):
"""Initialize params to build Fast R-CNN head. """Initialize params to build Fast R-CNN head.
Args: Args:
...@@ -300,8 +302,8 @@ class MaskrcnnHead(tf.keras.layers.Layer): ...@@ -300,8 +302,8 @@ class MaskrcnnHead(tf.keras.layers.Layer):
is used. is used.
activation: activation function. Support 'relu' and 'swish'. activation: activation function. Support 'relu' and 'swish'.
use_batch_norm: 'bool', indicating whether batchnorm layers are added. use_batch_norm: 'bool', indicating whether batchnorm layers are added.
norm_activation: an operation that includes a normalization layer norm_activation: an operation that includes a normalization layer followed
followed by an optional activation layer. by an optional activation layer.
""" """
self._num_classes = num_classes self._num_classes = num_classes
self._mask_target_size = mask_target_size self._mask_target_size = mask_target_size
...@@ -336,7 +338,8 @@ class MaskrcnnHead(tf.keras.layers.Layer): ...@@ -336,7 +338,8 @@ class MaskrcnnHead(tf.keras.layers.Layer):
strides=(1, 1), strides=(1, 1),
padding='same', padding='same',
dilation_rate=(1, 1), dilation_rate=(1, 1),
activation=(None if self._use_batch_norm else self._activation_op), activation=(None
if self._use_batch_norm else self._activation_op),
name='mask-conv-l%d' % i)) name='mask-conv-l%d' % i))
self._mask_conv_transpose = tf.keras.layers.Conv2DTranspose( self._mask_conv_transpose = tf.keras.layers.Conv2DTranspose(
self._num_filters, self._num_filters,
...@@ -353,10 +356,10 @@ class MaskrcnnHead(tf.keras.layers.Layer): ...@@ -353,10 +356,10 @@ class MaskrcnnHead(tf.keras.layers.Layer):
"""Mask branch for the Mask-RCNN model. """Mask branch for the Mask-RCNN model.
Args: Args:
roi_features: A ROI feature tensor of shape roi_features: A ROI feature tensor of shape [batch_size, num_rois,
[batch_size, num_rois, height_l, width_l, num_filters]. height_l, width_l, num_filters].
class_indices: a Tensor of shape [batch_size, num_rois], indicating class_indices: a Tensor of shape [batch_size, num_rois], indicating which
which class the ROI is. class the ROI is.
is_training: `boolean`, if True if model is in training mode. is_training: `boolean`, if True if model is in training mode.
Returns: Returns:
...@@ -415,16 +418,16 @@ class MaskrcnnHead(tf.keras.layers.Layer): ...@@ -415,16 +418,16 @@ class MaskrcnnHead(tf.keras.layers.Layer):
class RetinanetHead(object): class RetinanetHead(object):
"""RetinaNet head.""" """RetinaNet head."""
def __init__(self, def __init__(
min_level, self,
max_level, min_level,
num_classes, max_level,
anchors_per_location, num_classes,
num_convs=4, anchors_per_location,
num_filters=256, num_convs=4,
use_separable_conv=False, num_filters=256,
norm_activation=nn_ops.norm_activation_builder( use_separable_conv=False,
activation='relu')): norm_activation=nn_ops.norm_activation_builder(activation='relu')):
"""Initialize params to build RetinaNet head. """Initialize params to build RetinaNet head.
Args: Args:
...@@ -437,8 +440,8 @@ class RetinanetHead(object): ...@@ -437,8 +440,8 @@ class RetinanetHead(object):
num_filters: `int` number of filters used in the head architecture. num_filters: `int` number of filters used in the head architecture.
use_separable_conv: `bool` to indicate whether to use separable use_separable_conv: `bool` to indicate whether to use separable
convoluation. convoluation.
norm_activation: an operation that includes a normalization layer norm_activation: an operation that includes a normalization layer followed
followed by an optional activation layer. by an optional activation layer.
""" """
self._min_level = min_level self._min_level = min_level
self._max_level = max_level self._max_level = max_level
...@@ -600,12 +603,8 @@ class RetinanetHead(object): ...@@ -600,12 +603,8 @@ class RetinanetHead(object):
class ShapemaskPriorHead(object): class ShapemaskPriorHead(object):
"""ShapeMask Prior head.""" """ShapeMask Prior head."""
def __init__(self, def __init__(self, num_classes, num_downsample_channels, mask_crop_size,
num_classes, use_category_for_mask, shape_prior_path):
num_downsample_channels,
mask_crop_size,
use_category_for_mask,
shape_prior_path):
"""Initialize params to build RetinaNet head. """Initialize params to build RetinaNet head.
Args: Args:
...@@ -632,12 +631,12 @@ class ShapemaskPriorHead(object): ...@@ -632,12 +631,12 @@ class ShapemaskPriorHead(object):
Args: Args:
fpn_features: a dictionary of FPN features. fpn_features: a dictionary of FPN features.
boxes: a float tensor of shape [batch_size, num_instances, 4] boxes: a float tensor of shape [batch_size, num_instances, 4] representing
representing the tight gt boxes from dataloader/detection. the tight gt boxes from dataloader/detection.
outer_boxes: a float tensor of shape [batch_size, num_instances, 4] outer_boxes: a float tensor of shape [batch_size, num_instances, 4]
representing the loose gt boxes from dataloader/detection. representing the loose gt boxes from dataloader/detection.
classes: a int Tensor of shape [batch_size, num_instances] classes: a int Tensor of shape [batch_size, num_instances] of instance
of instance classes. classes.
is_training: training mode or not. is_training: training mode or not.
Returns: Returns:
...@@ -658,8 +657,9 @@ class ShapemaskPriorHead(object): ...@@ -658,8 +657,9 @@ class ShapemaskPriorHead(object):
shape_priors = self._get_priors() shape_priors = self._get_priors()
# Get uniform priors for each outer box. # Get uniform priors for each outer box.
uniform_priors = tf.ones([batch_size, num_instances, self._mask_crop_size, uniform_priors = tf.ones([
self._mask_crop_size]) batch_size, num_instances, self._mask_crop_size, self._mask_crop_size
])
uniform_priors = spatial_transform_ops.crop_mask_in_target_box( uniform_priors = spatial_transform_ops.crop_mask_in_target_box(
uniform_priors, boxes, outer_boxes, self._mask_crop_size) uniform_priors, boxes, outer_boxes, self._mask_crop_size)
...@@ -668,8 +668,9 @@ class ShapemaskPriorHead(object): ...@@ -668,8 +668,9 @@ class ShapemaskPriorHead(object):
tf.cast(instance_features, tf.float32), uniform_priors, classes) tf.cast(instance_features, tf.float32), uniform_priors, classes)
instance_priors = tf.gather(shape_priors, classes) instance_priors = tf.gather(shape_priors, classes)
instance_priors *= tf.expand_dims(tf.expand_dims( instance_priors *= tf.expand_dims(
tf.cast(prior_distribution, tf.float32), axis=-1), axis=-1) tf.expand_dims(tf.cast(prior_distribution, tf.float32), axis=-1),
axis=-1)
instance_priors = tf.reduce_sum(instance_priors, axis=2) instance_priors = tf.reduce_sum(instance_priors, axis=2)
detection_priors = spatial_transform_ops.crop_mask_in_target_box( detection_priors = spatial_transform_ops.crop_mask_in_target_box(
instance_priors, boxes, outer_boxes, self._mask_crop_size) instance_priors, boxes, outer_boxes, self._mask_crop_size)
...@@ -688,8 +689,10 @@ class ShapemaskPriorHead(object): ...@@ -688,8 +689,10 @@ class ShapemaskPriorHead(object):
# If prior path does not exist, do not use priors, i.e., pirors equal to # If prior path does not exist, do not use priors, i.e., pirors equal to
# uniform empty 32x32 patch. # uniform empty 32x32 patch.
self._num_clusters = 1 self._num_clusters = 1
priors = tf.zeros([self._mask_num_classes, self._num_clusters, priors = tf.zeros([
self._mask_crop_size, self._mask_crop_size]) self._mask_num_classes, self._num_clusters, self._mask_crop_size,
self._mask_crop_size
])
return priors return priors
def _classify_shape_priors(self, features, uniform_priors, classes): def _classify_shape_priors(self, features, uniform_priors, classes):
...@@ -699,12 +702,12 @@ class ShapemaskPriorHead(object): ...@@ -699,12 +702,12 @@ class ShapemaskPriorHead(object):
category. category.
Args: Args:
features: A float Tensor of shape [batch_size, num_instances, features: A float Tensor of shape [batch_size, num_instances, mask_size,
mask_size, mask_size, num_channels]. mask_size, num_channels].
uniform_priors: A float Tensor of shape [batch_size, num_instances, uniform_priors: A float Tensor of shape [batch_size, num_instances,
mask_size, mask_size] representing the uniform detection priors. mask_size, mask_size] representing the uniform detection priors.
classes: A int Tensor of shape [batch_size, num_instances] classes: A int Tensor of shape [batch_size, num_instances] of detection
of detection class ids. class ids.
Returns: Returns:
prior_distribution: A float Tensor of shape prior_distribution: A float Tensor of shape
...@@ -719,10 +722,11 @@ class ShapemaskPriorHead(object): ...@@ -719,10 +722,11 @@ class ShapemaskPriorHead(object):
features = tf.reduce_mean(features, axis=(2, 3)) features = tf.reduce_mean(features, axis=(2, 3))
logits = tf.keras.layers.Dense( logits = tf.keras.layers.Dense(
self._mask_num_classes * self._num_clusters, self._mask_num_classes * self._num_clusters,
kernel_initializer=tf.random_normal_initializer(stddev=0.01))(features) kernel_initializer=tf.random_normal_initializer(stddev=0.01))(
logits = tf.reshape(logits, features)
[batch_size, num_instances, logits = tf.reshape(
self._mask_num_classes, self._num_clusters]) logits,
[batch_size, num_instances, self._mask_num_classes, self._num_clusters])
if self._use_category_for_mask: if self._use_category_for_mask:
logits = tf.gather(logits, tf.expand_dims(classes, axis=-1), batch_dims=2) logits = tf.gather(logits, tf.expand_dims(classes, axis=-1), batch_dims=2)
logits = tf.squeeze(logits, axis=2) logits = tf.squeeze(logits, axis=2)
...@@ -752,8 +756,8 @@ class ShapemaskCoarsemaskHead(object): ...@@ -752,8 +756,8 @@ class ShapemaskCoarsemaskHead(object):
use_category_for_mask: use class information in mask branch. use_category_for_mask: use class information in mask branch.
num_convs: `int` number of stacked convolution before the last prediction num_convs: `int` number of stacked convolution before the last prediction
layer. layer.
norm_activation: an operation that includes a normalization layer norm_activation: an operation that includes a normalization layer followed
followed by an optional activation layer. by an optional activation layer.
""" """
self._mask_num_classes = num_classes if use_category_for_mask else 1 self._mask_num_classes = num_classes if use_category_for_mask else 1
self._use_category_for_mask = use_category_for_mask self._use_category_for_mask = use_category_for_mask
...@@ -769,13 +773,15 @@ class ShapemaskCoarsemaskHead(object): ...@@ -769,13 +773,15 @@ class ShapemaskCoarsemaskHead(object):
self._class_norm_activation = [] self._class_norm_activation = []
for i in range(self._num_convs): for i in range(self._num_convs):
self._class_conv.append(tf.keras.layers.Conv2D( self._class_conv.append(
self._num_downsample_channels, tf.keras.layers.Conv2D(
kernel_size=(3, 3), self._num_downsample_channels,
bias_initializer=tf.zeros_initializer(), kernel_size=(3, 3),
kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01), bias_initializer=tf.zeros_initializer(),
padding='same', kernel_initializer=tf.keras.initializers.RandomNormal(
name='coarse-mask-class-%d' % i)) stddev=0.01),
padding='same',
name='coarse-mask-class-%d' % i))
self._class_norm_activation.append( self._class_norm_activation.append(
norm_activation(name='coarse-mask-class-%d-bn' % i)) norm_activation(name='coarse-mask-class-%d-bn' % i))
...@@ -800,10 +806,10 @@ class ShapemaskCoarsemaskHead(object): ...@@ -800,10 +806,10 @@ class ShapemaskCoarsemaskHead(object):
mask_crop_size, mask_crop_size, num_downsample_channels]. This is the mask_crop_size, mask_crop_size, num_downsample_channels]. This is the
instance feature crop. instance feature crop.
detection_priors: a float Tensor of shape [batch_size, num_instances, detection_priors: a float Tensor of shape [batch_size, num_instances,
mask_crop_size, mask_crop_size, 1]. This is the detection prior for mask_crop_size, mask_crop_size, 1]. This is the detection prior for the
the instance. instance.
classes: a int Tensor of shape [batch_size, num_instances] classes: a int Tensor of shape [batch_size, num_instances] of instance
of instance classes. classes.
is_training: a bool indicating whether in training mode. is_training: a bool indicating whether in training mode.
Returns: Returns:
...@@ -820,8 +826,8 @@ class ShapemaskCoarsemaskHead(object): ...@@ -820,8 +826,8 @@ class ShapemaskCoarsemaskHead(object):
# Gather the logits with right input class. # Gather the logits with right input class.
if self._use_category_for_mask: if self._use_category_for_mask:
mask_logits = tf.transpose(mask_logits, [0, 1, 4, 2, 3]) mask_logits = tf.transpose(mask_logits, [0, 1, 4, 2, 3])
mask_logits = tf.gather(mask_logits, tf.expand_dims(classes, -1), mask_logits = tf.gather(
batch_dims=2) mask_logits, tf.expand_dims(classes, -1), batch_dims=2)
mask_logits = tf.squeeze(mask_logits, axis=2) mask_logits = tf.squeeze(mask_logits, axis=2)
else: else:
mask_logits = mask_logits[..., 0] mask_logits = mask_logits[..., 0]
...@@ -841,16 +847,17 @@ class ShapemaskCoarsemaskHead(object): ...@@ -841,16 +847,17 @@ class ShapemaskCoarsemaskHead(object):
""" """
(batch_size, num_instances, height, width, (batch_size, num_instances, height, width,
num_channels) = features.get_shape().as_list() num_channels) = features.get_shape().as_list()
features = tf.reshape(features, [batch_size * num_instances, height, width, features = tf.reshape(
num_channels]) features, [batch_size * num_instances, height, width, num_channels])
for i in range(self._num_convs): for i in range(self._num_convs):
features = self._class_conv[i](features) features = self._class_conv[i](features)
features = self._class_norm_activation[i](features, features = self._class_norm_activation[i](
is_training=is_training) features, is_training=is_training)
mask_logits = self._class_predict(features) mask_logits = self._class_predict(features)
mask_logits = tf.reshape(mask_logits, [batch_size, num_instances, height, mask_logits = tf.reshape(
width, self._mask_num_classes]) mask_logits,
[batch_size, num_instances, height, width, self._mask_num_classes])
return mask_logits return mask_logits
...@@ -907,8 +914,8 @@ class ShapemaskFinemaskHead(object): ...@@ -907,8 +914,8 @@ class ShapemaskFinemaskHead(object):
activation=None, activation=None,
padding='same', padding='same',
name='fine-mask-class-%d' % i)) name='fine-mask-class-%d' % i))
self._fine_class_bn.append(norm_activation( self._fine_class_bn.append(
name='fine-mask-class-%d-bn' % i)) norm_activation(name='fine-mask-class-%d-bn' % i))
self._class_predict_conv = tf.keras.layers.Conv2D( self._class_predict_conv = tf.keras.layers.Conv2D(
self._mask_num_classes, self._mask_num_classes,
...@@ -926,14 +933,13 @@ class ShapemaskFinemaskHead(object): ...@@ -926,14 +933,13 @@ class ShapemaskFinemaskHead(object):
https://arxiv.org/pdf/1904.03239.pdf https://arxiv.org/pdf/1904.03239.pdf
Args: Args:
features: a float Tensor of shape features: a float Tensor of shape [batch_size, num_instances,
[batch_size, num_instances, mask_crop_size, mask_crop_size, mask_crop_size, mask_crop_size, num_downsample_channels]. This is the
num_downsample_channels]. This is the instance feature crop. instance feature crop.
mask_logits: a float Tensor of shape mask_logits: a float Tensor of shape [batch_size, num_instances,
[batch_size, num_instances, mask_crop_size, mask_crop_size] indicating mask_crop_size, mask_crop_size] indicating predicted mask logits.
predicted mask logits. classes: a int Tensor of shape [batch_size, num_instances] of instance
classes: a int Tensor of shape [batch_size, num_instances] classes.
of instance classes.
is_training: a bool indicating whether in training mode. is_training: a bool indicating whether in training mode.
Returns: Returns:
...@@ -960,8 +966,8 @@ class ShapemaskFinemaskHead(object): ...@@ -960,8 +966,8 @@ class ShapemaskFinemaskHead(object):
mask_logits = self.decoder_net(features, is_training) mask_logits = self.decoder_net(features, is_training)
if self._use_category_for_mask: if self._use_category_for_mask:
mask_logits = tf.transpose(mask_logits, [0, 1, 4, 2, 3]) mask_logits = tf.transpose(mask_logits, [0, 1, 4, 2, 3])
mask_logits = tf.gather(mask_logits, mask_logits = tf.gather(
tf.expand_dims(classes, -1), batch_dims=2) mask_logits, tf.expand_dims(classes, -1), batch_dims=2)
mask_logits = tf.squeeze(mask_logits, axis=2) mask_logits = tf.squeeze(mask_logits, axis=2)
else: else:
mask_logits = mask_logits[..., 0] mask_logits = mask_logits[..., 0]
...@@ -982,8 +988,8 @@ class ShapemaskFinemaskHead(object): ...@@ -982,8 +988,8 @@ class ShapemaskFinemaskHead(object):
""" """
(batch_size, num_instances, height, width, (batch_size, num_instances, height, width,
num_channels) = features.get_shape().as_list() num_channels) = features.get_shape().as_list()
features = tf.reshape(features, [batch_size * num_instances, height, width, features = tf.reshape(
num_channels]) features, [batch_size * num_instances, height, width, num_channels])
for i in range(self._num_convs): for i in range(self._num_convs):
features = self._fine_class_conv[i](features) features = self._fine_class_conv[i](features)
features = self._fine_class_bn[i](features, is_training=is_training) features = self._fine_class_bn[i](features, is_training=is_training)
...@@ -994,9 +1000,8 @@ class ShapemaskFinemaskHead(object): ...@@ -994,9 +1000,8 @@ class ShapemaskFinemaskHead(object):
# Predict per-class instance masks. # Predict per-class instance masks.
mask_logits = self._class_predict_conv(features) mask_logits = self._class_predict_conv(features)
mask_logits = tf.reshape(mask_logits, mask_logits = tf.reshape(mask_logits, [
[batch_size, num_instances, batch_size, num_instances, height * self.up_sample_factor,
height * self.up_sample_factor, width * self.up_sample_factor, self._mask_num_classes
width * self.up_sample_factor, ])
self._mask_num_classes])
return mask_logits return mask_logits
...@@ -19,6 +19,7 @@ from __future__ import division ...@@ -19,6 +19,7 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import functools import functools
import tensorflow as tf import tensorflow as tf
...@@ -43,7 +44,7 @@ class NormActivation(tf.keras.layers.Layer): ...@@ -43,7 +44,7 @@ class NormActivation(tf.keras.layers.Layer):
GraphKeys.TRAINABLE_VARIABLES. If False, freeze batch normalization GraphKeys.TRAINABLE_VARIABLES. If False, freeze batch normalization
layer. layer.
init_zero: `bool` if True, initializes scale parameter of batch init_zero: `bool` if True, initializes scale parameter of batch
normalization with 0. If False, initialize it with 1. normalization with 0. If False, initialize it with 1.
fused: `bool` fused option in batch normalziation. fused: `bool` fused option in batch normalziation.
use_actiation: `bool`, whether to add the optional activation layer after use_actiation: `bool`, whether to add the optional activation layer after
the batch normalization layer. the batch normalization layer.
......
...@@ -28,22 +28,23 @@ import tensorflow as tf ...@@ -28,22 +28,23 @@ import tensorflow as tf
from official.vision.detection.modeling.architecture import keras_utils from official.vision.detection.modeling.architecture import keras_utils
from official.vision.detection.modeling.architecture import nn_ops from official.vision.detection.modeling.architecture import nn_ops
# TODO(b/140112644): Refactor the code with Keras style, i.e. build and call. # TODO(b/140112644): Refactor the code with Keras style, i.e. build and call.
class Resnet(object): class Resnet(object):
"""Class to build ResNet family model.""" """Class to build ResNet family model."""
def __init__(self, def __init__(
resnet_depth, self,
activation='relu', resnet_depth,
norm_activation=nn_ops.norm_activation_builder( activation='relu',
activation='relu'), norm_activation=nn_ops.norm_activation_builder(activation='relu'),
data_format='channels_last'): data_format='channels_last'):
"""ResNet initialization function. """ResNet initialization function.
Args: Args:
resnet_depth: `int` depth of ResNet backbone model. resnet_depth: `int` depth of ResNet backbone model.
norm_activation: an operation that includes a normalization layer norm_activation: an operation that includes a normalization layer followed
followed by an optional activation layer. by an optional activation layer.
data_format: `str` either "channels_first" for `[batch, channels, height, data_format: `str` either "channels_first" for `[batch, channels, height,
width]` or "channels_last for `[batch, height, width, channels]`. width]` or "channels_last for `[batch, height, width, channels]`.
""" """
...@@ -58,24 +59,45 @@ class Resnet(object): ...@@ -58,24 +59,45 @@ class Resnet(object):
self._data_format = data_format self._data_format = data_format
model_params = { model_params = {
10: {'block': self.residual_block, 'layers': [1, 1, 1, 1]}, 10: {
18: {'block': self.residual_block, 'layers': [2, 2, 2, 2]}, 'block': self.residual_block,
34: {'block': self.residual_block, 'layers': [3, 4, 6, 3]}, 'layers': [1, 1, 1, 1]
50: {'block': self.bottleneck_block, 'layers': [3, 4, 6, 3]}, },
101: {'block': self.bottleneck_block, 'layers': [3, 4, 23, 3]}, 18: {
152: {'block': self.bottleneck_block, 'layers': [3, 8, 36, 3]}, 'block': self.residual_block,
200: {'block': self.bottleneck_block, 'layers': [3, 24, 36, 3]} 'layers': [2, 2, 2, 2]
},
34: {
'block': self.residual_block,
'layers': [3, 4, 6, 3]
},
50: {
'block': self.bottleneck_block,
'layers': [3, 4, 6, 3]
},
101: {
'block': self.bottleneck_block,
'layers': [3, 4, 23, 3]
},
152: {
'block': self.bottleneck_block,
'layers': [3, 8, 36, 3]
},
200: {
'block': self.bottleneck_block,
'layers': [3, 24, 36, 3]
}
} }
if resnet_depth not in model_params: if resnet_depth not in model_params:
valid_resnet_depths = ', '.join( valid_resnet_depths = ', '.join(
[str(depth) for depth in sorted(model_params.keys())]) [str(depth) for depth in sorted(model_params.keys())])
raise ValueError( raise ValueError(
'The resnet_depth should be in [%s]. Not a valid resnet_depth:'%( 'The resnet_depth should be in [%s]. Not a valid resnet_depth:' %
valid_resnet_depths), self._resnet_depth) (valid_resnet_depths), self._resnet_depth)
params = model_params[resnet_depth] params = model_params[resnet_depth]
self._resnet_fn = self.resnet_v1_generator( self._resnet_fn = self.resnet_v1_generator(params['block'],
params['block'], params['layers']) params['layers'])
def __call__(self, inputs, is_training=None): def __call__(self, inputs, is_training=None):
"""Returns the ResNet model for a given size and number of output classes. """Returns the ResNet model for a given size and number of output classes.
...@@ -98,10 +120,10 @@ class Resnet(object): ...@@ -98,10 +120,10 @@ class Resnet(object):
"""Pads the input along the spatial dimensions independently of input size. """Pads the input along the spatial dimensions independently of input size.
Args: Args:
inputs: `Tensor` of size `[batch, channels, height, width]` or inputs: `Tensor` of size `[batch, channels, height, width]` or `[batch,
`[batch, height, width, channels]` depending on `data_format`. height, width, channels]` depending on `data_format`.
kernel_size: `int` kernel size to be used for `conv2d` or max_pool2d` kernel_size: `int` kernel size to be used for `conv2d` or max_pool2d`
operations. Should be a positive integer. operations. Should be a positive integer.
Returns: Returns:
A padded `Tensor` of the same `data_format` with size either intact A padded `Tensor` of the same `data_format` with size either intact
...@@ -160,14 +182,15 @@ class Resnet(object): ...@@ -160,14 +182,15 @@ class Resnet(object):
Args: Args:
inputs: `Tensor` of size `[batch, channels, height, width]`. inputs: `Tensor` of size `[batch, channels, height, width]`.
filters: `int` number of filters for the first two convolutions. Note that filters: `int` number of filters for the first two convolutions. Note that
the third and final convolution will use 4 times as many filters. the third and final convolution will use 4 times as many filters.
strides: `int` block stride. If greater than 1, this block will ultimately strides: `int` block stride. If greater than 1, this block will ultimately
downsample the input. downsample the input.
use_projection: `bool` for whether this block should use a projection use_projection: `bool` for whether this block should use a projection
shortcut (versus the default identity shortcut). This is usually shortcut (versus the default identity shortcut). This is usually `True`
`True` for the first block of a block group, which may change the for the first block of a block group, which may change the number of
number of filters and the resolution. filters and the resolution.
is_training: `bool` if True, the model is in training mode. is_training: `bool` if True, the model is in training mode.
Returns: Returns:
The output `Tensor` of the block. The output `Tensor` of the block.
""" """
...@@ -185,8 +208,9 @@ class Resnet(object): ...@@ -185,8 +208,9 @@ class Resnet(object):
inputs = self.conv2d_fixed_padding( inputs = self.conv2d_fixed_padding(
inputs=inputs, filters=filters, kernel_size=3, strides=1) inputs=inputs, filters=filters, kernel_size=3, strides=1)
inputs = self._norm_activation(use_activation=False, init_zero=True)( inputs = self._norm_activation(
inputs, is_training=is_training) use_activation=False, init_zero=True)(
inputs, is_training=is_training)
return self._activation_op(inputs + shortcut) return self._activation_op(inputs + shortcut)
...@@ -201,13 +225,13 @@ class Resnet(object): ...@@ -201,13 +225,13 @@ class Resnet(object):
Args: Args:
inputs: `Tensor` of size `[batch, channels, height, width]`. inputs: `Tensor` of size `[batch, channels, height, width]`.
filters: `int` number of filters for the first two convolutions. Note that filters: `int` number of filters for the first two convolutions. Note that
the third and final convolution will use 4 times as many filters. the third and final convolution will use 4 times as many filters.
strides: `int` block stride. If greater than 1, this block will ultimately strides: `int` block stride. If greater than 1, this block will ultimately
downsample the input. downsample the input.
use_projection: `bool` for whether this block should use a projection use_projection: `bool` for whether this block should use a projection
shortcut (versus the default identity shortcut). This is usually shortcut (versus the default identity shortcut). This is usually `True`
`True` for the first block of a block group, which may change the for the first block of a block group, which may change the number of
number of filters and the resolution. filters and the resolution.
is_training: `bool` if True, the model is in training mode. is_training: `bool` if True, the model is in training mode.
Returns: Returns:
...@@ -233,8 +257,9 @@ class Resnet(object): ...@@ -233,8 +257,9 @@ class Resnet(object):
inputs = self.conv2d_fixed_padding( inputs = self.conv2d_fixed_padding(
inputs=inputs, filters=4 * filters, kernel_size=1, strides=1) inputs=inputs, filters=4 * filters, kernel_size=1, strides=1)
inputs = self._norm_activation(use_activation=False, init_zero=True)( inputs = self._norm_activation(
inputs, is_training=is_training) use_activation=False, init_zero=True)(
inputs, is_training=is_training)
return self._activation_op(inputs + shortcut) return self._activation_op(inputs + shortcut)
...@@ -248,7 +273,7 @@ class Resnet(object): ...@@ -248,7 +273,7 @@ class Resnet(object):
block_fn: `function` for the block to use within the model block_fn: `function` for the block to use within the model
blocks: `int` number of blocks contained in the layer. blocks: `int` number of blocks contained in the layer.
strides: `int` stride to use for the first convolution of the layer. If strides: `int` stride to use for the first convolution of the layer. If
greater than 1, this layer will downsample the input. greater than 1, this layer will downsample the input.
name: `str`name for the Tensor output of the block layer. name: `str`name for the Tensor output of the block layer.
is_training: `bool` if True, the model is in training mode. is_training: `bool` if True, the model is in training mode.
...@@ -256,8 +281,8 @@ class Resnet(object): ...@@ -256,8 +281,8 @@ class Resnet(object):
The output `Tensor` of the block layer. The output `Tensor` of the block layer.
""" """
# Only the first block per block_group uses projection shortcut and strides. # Only the first block per block_group uses projection shortcut and strides.
inputs = block_fn(inputs, filters, strides, use_projection=True, inputs = block_fn(
is_training=is_training) inputs, filters, strides, use_projection=True, is_training=is_training)
for _ in range(1, blocks): for _ in range(1, blocks):
inputs = block_fn(inputs, filters, 1, is_training=is_training) inputs = block_fn(inputs, filters, 1, is_training=is_training)
...@@ -269,7 +294,7 @@ class Resnet(object): ...@@ -269,7 +294,7 @@ class Resnet(object):
Args: Args:
block_fn: `function` for the block to use within the model. Either block_fn: `function` for the block to use within the model. Either
`residual_block` or `bottleneck_block`. `residual_block` or `bottleneck_block`.
layers: list of 4 `int`s denoting the number of blocks to include in each layers: list of 4 `int`s denoting the number of blocks to include in each
of the 4 block groups. Each group consists of blocks that take inputs of of the 4 block groups. Each group consists of blocks that take inputs of
the same resolution. the same resolution.
...@@ -293,17 +318,37 @@ class Resnet(object): ...@@ -293,17 +318,37 @@ class Resnet(object):
inputs = tf.identity(inputs, 'initial_max_pool') inputs = tf.identity(inputs, 'initial_max_pool')
c2 = self.block_group( c2 = self.block_group(
inputs=inputs, filters=64, block_fn=block_fn, blocks=layers[0], inputs=inputs,
strides=1, name='block_group1', is_training=is_training) filters=64,
block_fn=block_fn,
blocks=layers[0],
strides=1,
name='block_group1',
is_training=is_training)
c3 = self.block_group( c3 = self.block_group(
inputs=c2, filters=128, block_fn=block_fn, blocks=layers[1], inputs=c2,
strides=2, name='block_group2', is_training=is_training) filters=128,
block_fn=block_fn,
blocks=layers[1],
strides=2,
name='block_group2',
is_training=is_training)
c4 = self.block_group( c4 = self.block_group(
inputs=c3, filters=256, block_fn=block_fn, blocks=layers[2], inputs=c3,
strides=2, name='block_group3', is_training=is_training) filters=256,
block_fn=block_fn,
blocks=layers[2],
strides=2,
name='block_group3',
is_training=is_training)
c5 = self.block_group( c5 = self.block_group(
inputs=c4, filters=512, block_fn=block_fn, blocks=layers[3], inputs=c4,
strides=2, name='block_group4', is_training=is_training) filters=512,
block_fn=block_fn,
blocks=layers[3],
strides=2,
name='block_group4',
is_training=is_training)
return {2: c2, 3: c3, 4: c4, 5: c5} return {2: c2, 3: c3, 4: c4, 5: c5}
return model return model
...@@ -21,6 +21,7 @@ from __future__ import print_function ...@@ -21,6 +21,7 @@ from __future__ import print_function
import abc import abc
import functools import functools
import re import re
import tensorflow as tf import tensorflow as tf
from official.vision.detection.modeling import checkpoint_utils from official.vision.detection.modeling import checkpoint_utils
from official.vision.detection.modeling import learning_rates from official.vision.detection.modeling import learning_rates
...@@ -42,8 +43,7 @@ def _make_filter_trainable_variables_fn(frozen_variable_prefix): ...@@ -42,8 +43,7 @@ def _make_filter_trainable_variables_fn(frozen_variable_prefix):
# frozen_variable_prefix: a regex string specifing the prefix pattern of # frozen_variable_prefix: a regex string specifing the prefix pattern of
# the frozen variables' names. # the frozen variables' names.
filtered_variables = [ filtered_variables = [
v for v in variables v for v in variables if not frozen_variable_prefix or
if not frozen_variable_prefix or
not re.match(frozen_variable_prefix, v.name) not re.match(frozen_variable_prefix, v.name)
] ]
return filtered_variables return filtered_variables
...@@ -115,8 +115,8 @@ class Model(object): ...@@ -115,8 +115,8 @@ class Model(object):
def weight_decay_loss(self, trainable_variables): def weight_decay_loss(self, trainable_variables):
reg_variables = [ reg_variables = [
v for v in trainable_variables v for v in trainable_variables
if self._regularization_var_regex is None if self._regularization_var_regex is None or
or re.match(self._regularization_var_regex, v.name) re.match(self._regularization_var_regex, v.name)
] ]
return self._l2_weight_decay * tf.add_n( return self._l2_weight_decay * tf.add_n(
......
...@@ -12,7 +12,9 @@ ...@@ -12,7 +12,9 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# ============================================================================== # ==============================================================================
"""Util functions for loading checkpoints. Especially for loading Tensorflow 1.x """Util functions for loading checkpoints.
Especially for loading Tensorflow 1.x
checkpoint to Tensorflow 2.x (keras) model. checkpoint to Tensorflow 2.x (keras) model.
""" """
...@@ -20,18 +22,19 @@ from __future__ import absolute_import ...@@ -20,18 +22,19 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import re import re
from absl import logging from absl import logging
import tensorflow as tf import tensorflow as tf
def _build_assignment_map(keras_model, def _build_assignment_map(keras_model,
prefix='', prefix='',
skip_variables_regex=None, skip_variables_regex=None,
var_to_shape_map=None): var_to_shape_map=None):
"""Compute an assignment mapping for loading older checkpoints into a Keras """Compute an assignment mapping for loading older checkpoints into a Keras
model. Variable names are remapped from the original TPUEstimator model to model. Variable names are remapped from the original TPUEstimator model to
the new Keras name. the new Keras name.
...@@ -48,12 +51,12 @@ def _build_assignment_map(keras_model, ...@@ -48,12 +51,12 @@ def _build_assignment_map(keras_model,
""" """
assignment_map = {} assignment_map = {}
checkpoint_names = None checkpoint_names = None
if var_to_shape_map: if var_to_shape_map:
checkpoint_names = list(filter( checkpoint_names = list(
lambda x: not x.endswith('Momentum') and not x.endswith( filter(
'global_step'), var_to_shape_map.keys())) lambda x: not x.endswith('Momentum') and not x.endswith(
'global_step'), var_to_shape_map.keys()))
for var in keras_model.variables: for var in keras_model.variables:
var_name = var.name var_name = var.name
...@@ -95,14 +98,15 @@ def _get_checkpoint_map(checkpoint_path): ...@@ -95,14 +98,15 @@ def _get_checkpoint_map(checkpoint_path):
def make_restore_checkpoint_fn(checkpoint_path, prefix='', skip_regex=None): def make_restore_checkpoint_fn(checkpoint_path, prefix='', skip_regex=None):
"""Returns scaffold function to restore parameters from v1 checkpoint. """Returns scaffold function to restore parameters from v1 checkpoint.
Args: Args:
checkpoint_path: path of the checkpoint folder or file. checkpoint_path: path of the checkpoint folder or file.
Example 1: '/path/to/model_dir/' Example 1: '/path/to/model_dir/'
Example 2: '/path/to/model.ckpt-22500' Example 2: '/path/to/model.ckpt-22500'
prefix: prefix in the variable name to be remove for alignment with names in prefix: prefix in the variable name to be remove for alignment with names in
the checkpoint. the checkpoint.
skip_regex: regular expression to math the names of variables that skip_regex: regular expression to math the names of variables that do not
do not need to be assign. need to be assign.
Returns: Returns:
Callable[tf.kears.Model] -> void. Fn to load v1 checkpoint to keras model. Callable[tf.kears.Model] -> void. Fn to load v1 checkpoint to keras model.
...@@ -125,7 +129,6 @@ def make_restore_checkpoint_fn(checkpoint_path, prefix='', skip_regex=None): ...@@ -125,7 +129,6 @@ def make_restore_checkpoint_fn(checkpoint_path, prefix='', skip_regex=None):
var_to_shape_map=var_to_shape_map) var_to_shape_map=var_to_shape_map)
if not vars_to_load: if not vars_to_load:
raise ValueError('Variables to load is empty.') raise ValueError('Variables to load is empty.')
tf.compat.v1.train.init_from_checkpoint(checkpoint_path, tf.compat.v1.train.init_from_checkpoint(checkpoint_path, vars_to_load)
vars_to_load)
return _restore_checkpoint_fn return _restore_checkpoint_fn
...@@ -25,7 +25,8 @@ import tensorflow as tf ...@@ -25,7 +25,8 @@ import tensorflow as tf
from official.modeling.hyperparams import params_dict from official.modeling.hyperparams import params_dict
class StepLearningRateWithLinearWarmup(tf.keras.optimizers.schedules.LearningRateSchedule): class StepLearningRateWithLinearWarmup(
tf.keras.optimizers.schedules.LearningRateSchedule):
"""Class to generate learning rate tensor.""" """Class to generate learning rate tensor."""
def __init__(self, total_steps, params): def __init__(self, total_steps, params):
...@@ -57,7 +58,8 @@ class StepLearningRateWithLinearWarmup(tf.keras.optimizers.schedules.LearningRat ...@@ -57,7 +58,8 @@ class StepLearningRateWithLinearWarmup(tf.keras.optimizers.schedules.LearningRat
return {'_params': self._params.as_dict()} return {'_params': self._params.as_dict()}
class CosineLearningRateWithLinearWarmup(tf.keras.optimizers.schedules.LearningRateSchedule): class CosineLearningRateWithLinearWarmup(
tf.keras.optimizers.schedules.LearningRateSchedule):
"""Class to generate learning rate tensor.""" """Class to generate learning rate tensor."""
def __init__(self, total_steps, params): def __init__(self, total_steps, params):
......
...@@ -118,9 +118,7 @@ class MaskrcnnModel(base_model.Model): ...@@ -118,9 +118,7 @@ class MaskrcnnModel(base_model.Model):
box_targets = tf.where( box_targets = tf.where(
tf.tile( tf.tile(
tf.expand_dims(tf.equal(matched_gt_classes, 0), axis=-1), tf.expand_dims(tf.equal(matched_gt_classes, 0), axis=-1),
[1, 1, 4]), [1, 1, 4]), tf.zeros_like(box_targets), box_targets)
tf.zeros_like(box_targets),
box_targets)
model_outputs.update({ model_outputs.update({
'class_targets': matched_gt_classes, 'class_targets': matched_gt_classes,
'box_targets': box_targets, 'box_targets': box_targets,
...@@ -183,9 +181,7 @@ class MaskrcnnModel(base_model.Model): ...@@ -183,9 +181,7 @@ class MaskrcnnModel(base_model.Model):
mask_outputs), mask_outputs),
}) })
else: else:
model_outputs.update({ model_outputs.update({'detection_masks': tf.nn.sigmoid(mask_outputs)})
'detection_masks': tf.nn.sigmoid(mask_outputs)
})
return model_outputs return model_outputs
...@@ -312,8 +308,8 @@ class MaskrcnnModel(base_model.Model): ...@@ -312,8 +308,8 @@ class MaskrcnnModel(base_model.Model):
required_output_fields = ['class_outputs', 'box_outputs'] required_output_fields = ['class_outputs', 'box_outputs']
for field in required_output_fields: for field in required_output_fields:
if field not in outputs: if field not in outputs:
raise ValueError('"%s" is missing in outputs, requried %s found %s' raise ValueError('"%s" is missing in outputs, requried %s found %s' %
%(field, required_output_fields, outputs.keys())) (field, required_output_fields, outputs.keys()))
predictions = { predictions = {
'image_info': labels['image_info'], 'image_info': labels['image_info'],
'num_detections': outputs['num_detections'], 'num_detections': outputs['num_detections'],
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment