Unverified Commit c127d527 authored by Srihari Humbarwadi's avatar Srihari Humbarwadi Committed by GitHub
Browse files

Merge branch 'panoptic-segmentation' into panoptic-deeplab-modeling

parents 78657911 457bcb85
......@@ -44,6 +44,7 @@ DEEPMAC_PROTO_TEXT = """
box_consistency_loss_normalize: NORMALIZE_AUTO
color_consistency_warmup_steps: 20
color_consistency_warmup_start: 10
use_only_last_stage: false
"""
......@@ -117,10 +118,11 @@ def build_meta_arch(**override_params):
mask_size=16,
postprocess_crop_size=128,
max_roi_jitter_ratio=0.0,
roi_jitter_mode='random',
roi_jitter_mode='default',
color_consistency_dilation=2,
color_consistency_warmup_steps=0,
color_consistency_warmup_start=0)
color_consistency_warmup_start=0,
use_only_last_stage=True)
params.update(override_params)
......@@ -185,6 +187,7 @@ class DeepMACUtilsTest(tf.test.TestCase, parameterized.TestCase):
self.assertIsInstance(params, deepmac_meta_arch.DeepMACParams)
self.assertEqual(params.dim, 153)
self.assertEqual(params.box_consistency_loss_normalize, 'normalize_auto')
self.assertFalse(params.use_only_last_stage)
def test_subsample_trivial(self):
"""Test subsampling masks."""
......@@ -201,32 +204,71 @@ class DeepMACUtilsTest(tf.test.TestCase, parameterized.TestCase):
self.assertAllClose(result[2], boxes)
self.assertAllClose(result[3], masks)
def test_filter_masked_classes(self):
classes = np.zeros((2, 3, 5), dtype=np.float32)
classes[0, 0] = [1.0, 0.0, 0.0, 0.0, 0.0]
classes[0, 1] = [0.0, 1.0, 0.0, 0.0, 0.0]
classes[0, 2] = [0.0, 0.0, 1.0, 0.0, 0.0]
classes[1, 0] = [0.0, 0.0, 0.0, 1.0, 0.0]
classes[1, 1] = [0.0, 0.0, 0.0, 0.0, 1.0]
classes[1, 2] = [0.0, 0.0, 0.0, 0.0, 1.0]
classes = tf.constant(classes)
weights = tf.constant([[1.0, 1.0, 1.0], [1.0, 1.0, 0.0]])
masks = tf.ones((2, 3, 32, 32), dtype=tf.float32)
classes, weights, masks = deepmac_meta_arch.filter_masked_classes(
[3, 4], classes, weights, masks)
expected_classes = np.zeros((2, 3, 5))
expected_classes[0, 0] = [0.0, 0.0, 0.0, 0.0, 0.0]
expected_classes[0, 1] = [0.0, 0.0, 0.0, 0.0, 0.0]
expected_classes[0, 2] = [0.0, 0.0, 1.0, 0.0, 0.0]
expected_classes[1, 0] = [0.0, 0.0, 0.0, 1.0, 0.0]
expected_classes[1, 1] = [0.0, 0.0, 0.0, 0.0, 0.0]
expected_classes[1, 2] = [0.0, 0.0, 0.0, 0.0, 0.0]
self.assertAllClose(expected_classes, classes.numpy())
self.assertAllClose(np.array(([0.0, 0.0, 1.0], [1.0, 0.0, 0.0])), weights)
self.assertAllClose(masks[0, 0], np.zeros((32, 32)))
self.assertAllClose(masks[0, 1], np.zeros((32, 32)))
self.assertAllClose(masks[0, 2], np.ones((32, 32)))
self.assertAllClose(masks[1, 0], np.ones((32, 32)))
self.assertAllClose(masks[1, 1], np.zeros((32, 32)))
def test_fill_boxes(self):
boxes = tf.constant([[0., 0., 0.5, 0.5], [0.5, 0.5, 1.0, 1.0]])
boxes = tf.constant([[[0., 0., 0.5, 0.5], [0.5, 0.5, 1.0, 1.0]],
[[0.0, 0.0, 1.0, 1.0], [0.0, 0.0, 0.0, 0.0]]])
filled_boxes = deepmac_meta_arch.fill_boxes(boxes, 32, 32)
expected = np.zeros((2, 32, 32))
expected[0, :17, :17] = 1.0
expected[1, 16:, 16:] = 1.0
expected = np.zeros((2, 2, 32, 32))
expected[0, 0, :17, :17] = 1.0
expected[0, 1, 16:, 16:] = 1.0
expected[1, 0, :, :] = 1.0
filled_boxes = filled_boxes.numpy()
self.assertAllClose(expected[0, 0], filled_boxes[0, 0], rtol=1e-3)
self.assertAllClose(expected[0, 1], filled_boxes[0, 1], rtol=1e-3)
self.assertAllClose(expected[1, 0], filled_boxes[1, 0], rtol=1e-3)
self.assertAllClose(expected, filled_boxes.numpy(), rtol=1e-3)
def test_flatten_and_unpack(self):
t = tf.random.uniform((2, 3, 4, 5, 6))
flatten = tf.function(deepmac_meta_arch.flatten_first2_dims)
unpack = tf.function(deepmac_meta_arch.unpack_first2_dims)
result, d1, d2 = flatten(t)
result = unpack(result, d1, d2)
self.assertAllClose(result.numpy(), t)
def test_crop_and_resize_instance_masks(self):
boxes = tf.zeros((5, 4))
masks = tf.zeros((5, 128, 128))
boxes = tf.zeros((8, 5, 4))
masks = tf.zeros((8, 5, 128, 128))
output = deepmac_meta_arch.crop_and_resize_instance_masks(
masks, boxes, 32)
self.assertEqual(output.shape, (5, 32, 32))
def test_crop_and_resize_feature_map(self):
boxes = tf.zeros((5, 4))
features = tf.zeros((128, 128, 7))
output = deepmac_meta_arch.crop_and_resize_feature_map(
features, boxes, 32)
self.assertEqual(output.shape, (5, 32, 32, 7))
self.assertEqual(output.shape, (8, 5, 32, 32))
def test_embedding_projection_prob_shape(self):
dist = deepmac_meta_arch.embedding_projection(
......@@ -262,73 +304,75 @@ class DeepMACUtilsTest(tf.test.TestCase, parameterized.TestCase):
def test_generate_2d_neighbors_shape(self):
inp = tf.zeros((13, 14, 3))
inp = tf.zeros((5, 13, 14, 3))
out = deepmac_meta_arch.generate_2d_neighbors(inp)
self.assertEqual((8, 13, 14, 3), out.shape)
self.assertEqual((8, 5, 13, 14, 3), out.shape)
def test_generate_2d_neighbors(self):
inp = np.arange(16).reshape(4, 4).astype(np.float32)
inp = tf.stack([inp, inp * 2], axis=2)
inp = tf.reshape(inp, (1, 4, 4, 2))
out = deepmac_meta_arch.generate_2d_neighbors(inp, dilation=1)
self.assertEqual((8, 4, 4, 2), out.shape)
self.assertEqual((8, 1, 4, 4, 2), out.shape)
for i in range(2):
expected = np.array([0, 1, 2, 4, 6, 8, 9, 10]) * (i + 1)
self.assertAllEqual(out[:, 1, 1, i], expected)
self.assertAllEqual(out[:, 0, 1, 1, i], expected)
expected = np.array([1, 2, 3, 5, 7, 9, 10, 11]) * (i + 1)
self.assertAllEqual(out[:, 1, 2, i], expected)
self.assertAllEqual(out[:, 0, 1, 2, i], expected)
expected = np.array([4, 5, 6, 8, 10, 12, 13, 14]) * (i + 1)
self.assertAllEqual(out[:, 2, 1, i], expected)
self.assertAllEqual(out[:, 0, 2, 1, i], expected)
expected = np.array([5, 6, 7, 9, 11, 13, 14, 15]) * (i + 1)
self.assertAllEqual(out[:, 2, 2, i], expected)
self.assertAllEqual(out[:, 0, 2, 2, i], expected)
def test_generate_2d_neighbors_dilation2(self):
inp = np.arange(16).reshape(4, 4, 1).astype(np.float32)
inp = np.arange(16).reshape(1, 4, 4, 1).astype(np.float32)
out = deepmac_meta_arch.generate_2d_neighbors(inp, dilation=2)
self.assertEqual((8, 4, 4, 1), out.shape)
self.assertEqual((8, 1, 4, 4, 1), out.shape)
expected = np.array([0, 0, 0, 0, 2, 0, 8, 10])
self.assertAllEqual(out[:, 0, 0, 0], expected)
self.assertAllEqual(out[:, 0, 0, 0, 0], expected)
def test_dilated_similarity_shape(self):
fmap = tf.zeros((32, 32, 9))
fmap = tf.zeros((5, 32, 32, 9))
similarity = deepmac_meta_arch.dilated_cross_pixel_similarity(
fmap)
self.assertEqual((8, 32, 32), similarity.shape)
self.assertEqual((8, 5, 32, 32), similarity.shape)
def test_dilated_similarity(self):
fmap = np.zeros((5, 5, 2), dtype=np.float32)
fmap = np.zeros((1, 5, 5, 2), dtype=np.float32)
fmap[0, 0, :] = 1.0
fmap[4, 4, :] = 1.0
fmap[0, 0, 0, :] = 1.0
fmap[0, 4, 4, :] = 1.0
similarity = deepmac_meta_arch.dilated_cross_pixel_similarity(
fmap, theta=1.0, dilation=2)
self.assertAlmostEqual(similarity.numpy()[0, 2, 2],
self.assertAlmostEqual(similarity.numpy()[0, 0, 2, 2],
np.exp(-np.sqrt(2)))
def test_dilated_same_instance_mask_shape(self):
instances = tf.zeros((5, 32, 32))
instances = tf.zeros((2, 5, 32, 32))
output = deepmac_meta_arch.dilated_cross_same_mask_label(instances)
self.assertEqual((8, 5, 32, 32), output.shape)
self.assertEqual((8, 2, 5, 32, 32), output.shape)
def test_dilated_same_instance_mask(self):
instances = np.zeros((3, 2, 5, 5), dtype=np.float32)
instances[0, 0, 0, 0] = 1.0
instances[0, 0, 2, 2] = 1.0
instances[0, 0, 4, 4] = 1.0
instances[2, 0, 0, 0] = 1.0
instances[2, 0, 2, 2] = 1.0
instances[2, 0, 4, 4] = 0.0
instances = np.zeros((2, 5, 5), dtype=np.float32)
instances[0, 0, 0] = 1.0
instances[0, 2, 2] = 1.0
instances[0, 4, 4] = 1.0
output = deepmac_meta_arch.dilated_cross_same_mask_label(instances).numpy()
self.assertAllClose(np.ones((8, 5, 5)), output[:, 1, :, :])
self.assertAllClose([1, 0, 0, 0, 0, 0, 0, 1], output[:, 0, 2, 2])
self.assertAllClose(np.ones((8, 2, 5, 5)), output[:, 1, :, :])
self.assertAllClose([1, 0, 0, 0, 0, 0, 0, 1], output[:, 0, 0, 2, 2])
self.assertAllClose([1, 0, 0, 0, 0, 0, 0, 0], output[:, 2, 0, 2, 2])
def test_per_pixel_single_conv_multiple_instance(self):
......@@ -550,151 +594,184 @@ class DeepMACMaskHeadTest(tf.test.TestCase, parameterized.TestCase):
@unittest.skipIf(tf_version.is_tf1(), 'Skipping TF2.X only test.')
class DeepMACMetaArchTest(tf.test.TestCase, parameterized.TestCase):
# TODO(vighneshb): Add batch_size > 1 tests for loss functions.
def setUp(self): # pylint:disable=g-missing-super-call
self.model = build_meta_arch()
def test_get_mask_head_input(self):
boxes = tf.constant([[0., 0., 0.25, 0.25], [0.75, 0.75, 1.0, 1.0]],
boxes = tf.constant([[[0., 0., 0.25, 0.25], [0.75, 0.75, 1.0, 1.0]],
[[0., 0., 0.25, 0.25], [0.75, 0.75, 1.0, 1.0]]],
dtype=tf.float32)
pixel_embedding = np.zeros((32, 32, 4), dtype=np.float32)
pixel_embedding[:16, :16] = 1.0
pixel_embedding[16:, 16:] = 2.0
pixel_embedding = np.zeros((2, 32, 32, 4), dtype=np.float32)
pixel_embedding[0, :16, :16] = 1.0
pixel_embedding[0, 16:, 16:] = 2.0
pixel_embedding[1, :16, :16] = 3.0
pixel_embedding[1, 16:, 16:] = 4.0
pixel_embedding = tf.constant(pixel_embedding)
mask_inputs = self.model._get_mask_head_input(boxes, pixel_embedding)
self.assertEqual(mask_inputs.shape, (2, 16, 16, 6))
self.assertEqual(mask_inputs.shape, (2, 2, 16, 16, 6))
y_grid, x_grid = tf.meshgrid(np.linspace(-1.0, 1.0, 16),
np.linspace(-1.0, 1.0, 16), indexing='ij')
for i in range(2):
mask_input = mask_inputs[i]
self.assertAllClose(y_grid, mask_input[:, :, 0])
self.assertAllClose(x_grid, mask_input[:, :, 1])
pixel_embedding = mask_input[:, :, 2:]
self.assertAllClose(np.zeros((16, 16, 4)) + i + 1, pixel_embedding)
for i, j in ([0, 0], [0, 1], [1, 0], [1, 1]):
self.assertAllClose(y_grid, mask_inputs[i, j, :, :, 0])
self.assertAllClose(x_grid, mask_inputs[i, j, :, :, 1])
zeros = np.zeros((16, 16, 4))
self.assertAllClose(zeros + 1, mask_inputs[0, 0, :, :, 2:])
self.assertAllClose(zeros + 2, mask_inputs[0, 1, :, :, 2:])
self.assertAllClose(zeros + 3, mask_inputs[1, 0, :, :, 2:])
self.assertAllClose(zeros + 4, mask_inputs[1, 1, :, :, 2:])
def test_get_mask_head_input_no_crop_resize(self):
model = build_meta_arch(predict_full_resolution_masks=True)
boxes = tf.constant([[0., 0., 1.0, 1.0], [0.0, 0.0, 0.5, 1.0]],
dtype=tf.float32)
boxes = tf.constant([[[0., 0., 1.0, 1.0], [0.0, 0.0, 0.5, 1.0]],
[[0.5, 0.5, 1.0, 1.0], [0.0, 0.0, 0.0, 0.0]]])
pixel_embedding_np = np.random.randn(32, 32, 4).astype(np.float32)
pixel_embedding_np = np.random.randn(2, 32, 32, 4).astype(np.float32)
pixel_embedding = tf.constant(pixel_embedding_np)
mask_inputs = model._get_mask_head_input(boxes, pixel_embedding)
self.assertEqual(mask_inputs.shape, (2, 32, 32, 6))
self.assertEqual(mask_inputs.shape, (2, 2, 32, 32, 6))
y_grid, x_grid = tf.meshgrid(np.linspace(.0, 1.0, 32),
np.linspace(.0, 1.0, 32), indexing='ij')
ys = [0.5, 0.25]
xs = [0.5, 0.5]
for i in range(2):
mask_input = mask_inputs[i]
self.assertAllClose(y_grid - ys[i], mask_input[:, :, 0])
self.assertAllClose(x_grid - xs[i], mask_input[:, :, 1])
pixel_embedding = mask_input[:, :, 2:]
self.assertAllClose(pixel_embedding_np, pixel_embedding)
self.assertAllClose(y_grid - 0.5, mask_inputs[0, 0, :, :, 0])
self.assertAllClose(x_grid - 0.5, mask_inputs[0, 0, :, :, 1])
self.assertAllClose(y_grid - 0.25, mask_inputs[0, 1, :, :, 0])
self.assertAllClose(x_grid - 0.5, mask_inputs[0, 1, :, :, 1])
self.assertAllClose(y_grid - 0.75, mask_inputs[1, 0, :, :, 0])
self.assertAllClose(x_grid - 0.75, mask_inputs[1, 0, :, :, 1])
self.assertAllClose(y_grid, mask_inputs[1, 1, :, :, 0])
self.assertAllClose(x_grid, mask_inputs[1, 1, :, :, 1])
def test_get_instance_embeddings(self):
embeddings = np.zeros((32, 32, 2))
embeddings[8, 8] = 1.0
embeddings[24, 16] = 2.0
embeddings = np.zeros((2, 32, 32, 2))
embeddings[0, 8, 8] = 1.0
embeddings[0, 24, 16] = 2.0
embeddings[1, 8, 16] = 3.0
embeddings = tf.constant(embeddings)
boxes = tf.constant([[0., 0., 0.5, 0.5], [0.5, 0.0, 1.0, 1.0]])
boxes = np.zeros((2, 2, 4), dtype=np.float32)
boxes[0, 0] = [0.0, 0.0, 0.5, 0.5]
boxes[0, 1] = [0.5, 0.0, 1.0, 1.0]
boxes[1, 0] = [0.0, 0.0, 0.5, 1.0]
boxes = tf.constant(boxes)
center_embeddings = self.model._get_instance_embeddings(boxes, embeddings)
self.assertAllClose(center_embeddings, [[1.0, 1.0], [2.0, 2.0]])
self.assertAllClose(center_embeddings[0, 0], [1.0, 1.0])
self.assertAllClose(center_embeddings[0, 1], [2.0, 2.0])
self.assertAllClose(center_embeddings[1, 0], [3.0, 3.0])
def test_get_groundtruth_mask_output(self):
boxes = tf.constant([[0., 0., 0.25, 0.25], [0.75, 0.75, 1.0, 1.0]],
dtype=tf.float32)
masks = np.zeros((2, 32, 32), dtype=np.float32)
masks[0, :16, :16] = 0.5
masks[1, 16:, 16:] = 0.1
boxes = np.zeros((2, 2, 4))
masks = np.zeros((2, 2, 32, 32))
boxes[0, 0] = [0.0, 0.0, 0.25, 0.25]
boxes[0, 1] = [0.75, 0.75, 1.0, 1.0]
boxes[1, 0] = [0.0, 0.0, 0.5, 1.0]
masks = np.zeros((2, 2, 32, 32), dtype=np.float32)
masks[0, 0, :16, :16] = 0.5
masks[0, 1, 16:, 16:] = 0.1
masks[1, 0, :17, :] = 0.3
masks = self.model._get_groundtruth_mask_output(boxes, masks)
self.assertEqual(masks.shape, (2, 16, 16))
self.assertEqual(masks.shape, (2, 2, 16, 16))
self.assertAllClose(masks[0], np.zeros((16, 16)) + 0.5)
self.assertAllClose(masks[1], np.zeros((16, 16)) + 0.1)
self.assertAllClose(masks[0, 0], np.zeros((16, 16)) + 0.5)
self.assertAllClose(masks[0, 1], np.zeros((16, 16)) + 0.1)
self.assertAllClose(masks[1, 0], np.zeros((16, 16)) + 0.3)
def test_get_groundtruth_mask_output_crop_resize(self):
def test_get_groundtruth_mask_output_no_crop_resize(self):
model = build_meta_arch(predict_full_resolution_masks=True)
boxes = tf.constant([[0., 0., 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
dtype=tf.float32)
masks = tf.ones((2, 32, 32))
boxes = tf.zeros((2, 5, 4))
masks = tf.ones((2, 5, 32, 32))
masks = model._get_groundtruth_mask_output(boxes, masks)
self.assertAllClose(masks, np.ones((2, 32, 32)))
self.assertAllClose(masks, np.ones((2, 5, 32, 32)))
def test_per_instance_loss(self):
def test_predict(self):
model = build_meta_arch()
model._mask_net = MockMaskNet()
boxes = tf.constant([[0.0, 0.0, 0.25, 0.25], [0.75, 0.75, 1.0, 1.0]])
masks = np.zeros((2, 32, 32), dtype=np.float32)
masks[0, :16, :16] = 1.0
masks[1, 16:, 16:] = 1.0
masks = tf.constant(masks)
tf.keras.backend.set_learning_phase(True)
self.model.provide_groundtruth(
groundtruth_boxes_list=[tf.convert_to_tensor([[0., 0., 1., 1.]] * 5)],
groundtruth_classes_list=[tf.one_hot([1, 0, 1, 1, 1], depth=6)],
groundtruth_weights_list=[tf.ones(5)],
groundtruth_masks_list=[tf.ones((5, 32, 32))])
prediction = self.model.predict(tf.zeros((1, 32, 32, 3)), None)
self.assertEqual(prediction['MASK_LOGITS_GT_BOXES'][0].shape,
(1, 5, 16, 16))
def test_loss(self):
loss_dict = model._compute_per_instance_deepmac_losses(
boxes, masks, tf.zeros((32, 32, 2)), tf.zeros((32, 32, 2)),
tf.zeros((16, 16, 3)))
model = build_meta_arch()
boxes = tf.constant([[[0.0, 0.0, 0.25, 0.25], [0.75, 0.75, 1.0, 1.0]]])
masks = np.zeros((1, 2, 32, 32), dtype=np.float32)
masks[0, 0, :16, :16] = 1.0
masks[0, 1, 16:, 16:] = 1.0
masks_pred = tf.fill((1, 2, 32, 32), 0.9)
loss_dict = model._compute_deepmac_losses(
boxes, masks_pred, masks, tf.zeros((1, 16, 16, 3)))
self.assertAllClose(
loss_dict[deepmac_meta_arch.DEEP_MASK_ESTIMATION],
np.zeros(2) - tf.math.log(tf.nn.sigmoid(0.9)))
np.zeros((1, 2)) - tf.math.log(tf.nn.sigmoid(0.9)))
def test_per_instance_loss_no_crop_resize(self):
def test_loss_no_crop_resize(self):
model = build_meta_arch(predict_full_resolution_masks=True)
model._mask_net = MockMaskNet()
boxes = tf.constant([[0.0, 0.0, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0]])
masks = np.ones((2, 128, 128), dtype=np.float32)
masks = tf.constant(masks)
boxes = tf.constant([[[0.0, 0.0, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0]]])
masks = tf.ones((1, 2, 128, 128), dtype=tf.float32)
masks_pred = tf.fill((1, 2, 32, 32), 0.9)
loss_dict = model._compute_per_instance_deepmac_losses(
boxes, masks, tf.zeros((32, 32, 2)), tf.zeros((32, 32, 2)),
tf.zeros((32, 32, 3)))
loss_dict = model._compute_deepmac_losses(
boxes, masks_pred, masks, tf.zeros((1, 32, 32, 3)))
self.assertAllClose(
loss_dict[deepmac_meta_arch.DEEP_MASK_ESTIMATION],
np.zeros(2) - tf.math.log(tf.nn.sigmoid(0.9)))
np.zeros((1, 2)) - tf.math.log(tf.nn.sigmoid(0.9)))
def test_per_instance_loss_no_crop_resize_dice(self):
def test_loss_no_crop_resize_dice(self):
model = build_meta_arch(predict_full_resolution_masks=True,
use_dice_loss=True)
model._mask_net = MockMaskNet()
boxes = tf.constant([[0.0, 0.0, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0]])
masks = np.ones((2, 128, 128), dtype=np.float32)
boxes = tf.constant([[[0.0, 0.0, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0]]])
masks = np.ones((1, 2, 128, 128), dtype=np.float32)
masks = tf.constant(masks)
masks_pred = tf.fill((1, 2, 32, 32), 0.9)
loss_dict = model._compute_per_instance_deepmac_losses(
boxes, masks, tf.zeros((32, 32, 2)), tf.zeros((32, 32, 2)),
tf.zeros((32, 32, 3)))
loss_dict = model._compute_deepmac_losses(
boxes, masks_pred, masks, tf.zeros((1, 32, 32, 3)))
pred = tf.nn.sigmoid(0.9)
expected = (1.0 - ((2.0 * pred) / (1.0 + pred)))
self.assertAllClose(loss_dict[deepmac_meta_arch.DEEP_MASK_ESTIMATION],
[expected, expected], rtol=1e-3)
[[expected, expected]], rtol=1e-3)
def test_empty_masks(self):
boxes = tf.zeros([0, 4])
masks = tf.zeros([0, 128, 128])
loss_dict = self.model._compute_per_instance_deepmac_losses(
boxes, masks, tf.zeros((32, 32, 2)), tf.zeros((32, 32, 2)),
tf.zeros((16, 16, 3)))
boxes = tf.zeros([1, 0, 4])
masks = tf.zeros([1, 0, 128, 128])
loss_dict = self.model._compute_deepmac_losses(
boxes, masks, masks,
tf.zeros((1, 16, 16, 3)))
self.assertEqual(loss_dict[deepmac_meta_arch.DEEP_MASK_ESTIMATION].shape,
(0,))
(1, 0))
def test_postprocess(self):
model = build_meta_arch()
model._mask_net = MockMaskNet()
boxes = np.zeros((2, 3, 4), dtype=np.float32)
......@@ -708,7 +785,6 @@ class DeepMACMetaArchTest(tf.test.TestCase, parameterized.TestCase):
self.assertAllClose(masks, prob * np.ones((2, 3, 16, 16)))
def test_postprocess_emb_proj(self):
model = build_meta_arch(network_type='embedding_projection',
use_instance_embedding=False,
use_xy=False, pixel_embedding_dim=8,
......@@ -724,7 +800,6 @@ class DeepMACMetaArchTest(tf.test.TestCase, parameterized.TestCase):
self.assertEqual(masks.shape, (2, 3, 16, 16))
def test_postprocess_emb_proj_fullres(self):
model = build_meta_arch(network_type='embedding_projection',
predict_full_resolution_masks=True,
use_instance_embedding=False,
......@@ -751,17 +826,6 @@ class DeepMACMetaArchTest(tf.test.TestCase, parameterized.TestCase):
prob = tf.nn.sigmoid(0.9).numpy()
self.assertAllClose(masks, prob * np.ones((2, 3, 128, 128)))
def test_crop_masks_within_boxes(self):
masks = np.zeros((2, 32, 32))
masks[0, :16, :16] = 1.0
masks[1, 16:, 16:] = 1.0
boxes = tf.constant([[0.0, 0.0, 15.0 / 32, 15.0 / 32],
[0.5, 0.5, 1.0, 1]])
masks = deepmac_meta_arch.crop_masks_within_boxes(
masks, boxes, 128)
masks = (masks.numpy() > 0.0).astype(np.float32)
self.assertAlmostEqual(masks.sum(), 2 * 128 * 128)
def test_transform_boxes_to_feature_coordinates(self):
batch_size = 2
model = build_meta_arch()
......@@ -816,13 +880,13 @@ class DeepMACMetaArchTest(tf.test.TestCase, parameterized.TestCase):
def test_box_consistency_loss(self):
boxes_gt = tf.constant([[0., 0., 0.49, 1.0]])
boxes_jittered = tf.constant([[0.0, 0.0, 1.0, 1.0]])
boxes_gt = tf.constant([[[0., 0., 0.49, 1.0]]])
boxes_jittered = tf.constant([[[0.0, 0.0, 1.0, 1.0]]])
mask_prediction = np.zeros((1, 32, 32)).astype(np.float32)
mask_prediction[0, :24, :24] = 1.0
mask_prediction = np.zeros((1, 1, 32, 32)).astype(np.float32)
mask_prediction[0, 0, :24, :24] = 1.0
loss = self.model._compute_per_instance_box_consistency_loss(
loss = self.model._compute_box_consistency_loss(
boxes_gt, boxes_jittered, tf.constant(mask_prediction))
yloss = tf.nn.sigmoid_cross_entropy_with_logits(
......@@ -834,39 +898,39 @@ class DeepMACMetaArchTest(tf.test.TestCase, parameterized.TestCase):
yloss_mean = tf.reduce_mean(yloss)
xloss_mean = tf.reduce_mean(xloss)
self.assertAllClose(loss, [yloss_mean + xloss_mean])
self.assertAllClose(loss[0], [yloss_mean + xloss_mean])
def test_box_consistency_loss_with_tightness(self):
boxes_gt = tf.constant([[0., 0., 0.49, 0.49]])
boxes_gt = tf.constant([[[0., 0., 0.49, 0.49]]])
boxes_jittered = None
mask_prediction = np.zeros((1, 8, 8)).astype(np.float32) - 1e10
mask_prediction[0, :4, :4] = 1e10
mask_prediction = np.zeros((1, 1, 8, 8)).astype(np.float32) - 1e10
mask_prediction[0, 0, :4, :4] = 1e10
model = build_meta_arch(box_consistency_tightness=True,
predict_full_resolution_masks=True)
loss = model._compute_per_instance_box_consistency_loss(
loss = model._compute_box_consistency_loss(
boxes_gt, boxes_jittered, tf.constant(mask_prediction))
self.assertAllClose(loss, [0.0])
self.assertAllClose(loss[0], [0.0])
def test_box_consistency_loss_gt_count(self):
boxes_gt = tf.constant([
boxes_gt = tf.constant([[
[0., 0., 1.0, 1.0],
[0., 0., 0.49, 0.49]])
[0., 0., 0.49, 0.49]]])
boxes_jittered = None
mask_prediction = np.zeros((2, 32, 32)).astype(np.float32)
mask_prediction[0, :16, :16] = 1.0
mask_prediction[1, :8, :8] = 1.0
mask_prediction = np.zeros((1, 2, 32, 32)).astype(np.float32)
mask_prediction[0, 0, :16, :16] = 1.0
mask_prediction[0, 1, :8, :8] = 1.0
model = build_meta_arch(
box_consistency_loss_normalize='normalize_groundtruth_count',
predict_full_resolution_masks=True)
loss_func = tf.function(
model._compute_per_instance_box_consistency_loss)
loss_func = (
model._compute_box_consistency_loss)
loss = loss_func(
boxes_gt, boxes_jittered, tf.constant(mask_prediction))
......@@ -877,7 +941,7 @@ class DeepMACMetaArchTest(tf.test.TestCase, parameterized.TestCase):
xloss = yloss
xloss_mean = tf.reduce_sum(xloss)
self.assertAllClose(loss[0], yloss_mean + xloss_mean)
self.assertAllClose(loss[0, 0], yloss_mean + xloss_mean)
yloss = tf.nn.sigmoid_cross_entropy_with_logits(
labels=tf.constant([1.0] * 16 + [0.0] * 16),
......@@ -885,21 +949,20 @@ class DeepMACMetaArchTest(tf.test.TestCase, parameterized.TestCase):
yloss_mean = tf.reduce_sum(yloss)
xloss = yloss
xloss_mean = tf.reduce_sum(xloss)
self.assertAllClose(loss[1], yloss_mean + xloss_mean)
self.assertAllClose(loss[0, 1], yloss_mean + xloss_mean)
def test_box_consistency_loss_balanced(self):
boxes_gt = tf.constant([
[0., 0., 0.49, 0.49]])
boxes_gt = tf.constant([[
[0., 0., 0.49, 0.49]]])
boxes_jittered = None
mask_prediction = np.zeros((1, 32, 32)).astype(np.float32)
mask_prediction[0] = 1.0
mask_prediction = np.zeros((1, 1, 32, 32)).astype(np.float32)
mask_prediction[0, 0] = 1.0
model = build_meta_arch(box_consistency_loss_normalize='normalize_balanced',
predict_full_resolution_masks=True)
loss_func = tf.function(
model._compute_per_instance_box_consistency_loss)
model._compute_box_consistency_loss)
loss = loss_func(
boxes_gt, boxes_jittered, tf.constant(mask_prediction))
......@@ -909,63 +972,64 @@ class DeepMACMetaArchTest(tf.test.TestCase, parameterized.TestCase):
yloss_mean = tf.reduce_sum(yloss) / 16.0
xloss_mean = yloss_mean
self.assertAllClose(loss[0], yloss_mean + xloss_mean)
self.assertAllClose(loss[0, 0], yloss_mean + xloss_mean)
def test_box_consistency_dice_loss(self):
model = build_meta_arch(use_dice_loss=True)
boxes_gt = tf.constant([[0., 0., 0.49, 1.0]])
boxes_jittered = tf.constant([[0.0, 0.0, 1.0, 1.0]])
boxes_gt = tf.constant([[[0., 0., 0.49, 1.0]]])
boxes_jittered = tf.constant([[[0.0, 0.0, 1.0, 1.0]]])
almost_inf = 1e10
mask_prediction = np.full((1, 32, 32), -almost_inf, dtype=np.float32)
mask_prediction[0, :24, :24] = almost_inf
mask_prediction = np.full((1, 1, 32, 32), -almost_inf, dtype=np.float32)
mask_prediction[0, 0, :24, :24] = almost_inf
loss = model._compute_per_instance_box_consistency_loss(
loss = model._compute_box_consistency_loss(
boxes_gt, boxes_jittered, tf.constant(mask_prediction))
yloss = 1 - 6.0 / 7
xloss = 0.2
self.assertAllClose(loss, [yloss + xloss])
self.assertAllClose(loss, [[yloss + xloss]])
def test_color_consistency_loss_full_res_shape(self):
model = build_meta_arch(use_dice_loss=True,
predict_full_resolution_masks=True)
boxes = tf.zeros((3, 4))
img = tf.zeros((32, 32, 3))
mask_logits = tf.zeros((3, 32, 32))
boxes = tf.zeros((5, 3, 4))
img = tf.zeros((5, 32, 32, 3))
mask_logits = tf.zeros((5, 3, 32, 32))
loss = model._compute_per_instance_color_consistency_loss(
loss = model._compute_color_consistency_loss(
boxes, img, mask_logits)
self.assertEqual([3], loss.shape)
self.assertEqual([5, 3], loss.shape)
def test_color_consistency_1_threshold(self):
model = build_meta_arch(predict_full_resolution_masks=True,
color_consistency_threshold=0.99)
boxes = tf.zeros((3, 4))
img = tf.zeros((32, 32, 3))
mask_logits = tf.zeros((3, 32, 32)) - 1e4
boxes = tf.zeros((5, 3, 4))
img = tf.zeros((5, 32, 32, 3))
mask_logits = tf.zeros((5, 3, 32, 32)) - 1e4
loss = model._compute_per_instance_color_consistency_loss(
loss = model._compute_color_consistency_loss(
boxes, img, mask_logits)
self.assertAllClose(loss, np.zeros(3))
self.assertAllClose(loss, np.zeros((5, 3)))
def test_box_consistency_dice_loss_full_res(self):
model = build_meta_arch(use_dice_loss=True,
predict_full_resolution_masks=True)
boxes_gt = tf.constant([[0., 0., 1.0, 1.0]])
boxes_gt = tf.constant([[[0., 0., 1.0, 1.0]]])
boxes_jittered = None
size = 32
almost_inf = 1e10
mask_prediction = np.full((1, 32, 32), -almost_inf, dtype=np.float32)
mask_prediction[0, :16, :32] = almost_inf
mask_prediction = np.full((1, 1, size, size), -almost_inf, dtype=np.float32)
mask_prediction[0, 0, :(size // 2), :] = almost_inf
loss = model._compute_per_instance_box_consistency_loss(
loss = model._compute_box_consistency_loss(
boxes_gt, boxes_jittered, tf.constant(mask_prediction))
self.assertAlmostEqual(loss[0].numpy(), 1 / 3)
self.assertAlmostEqual(loss[0, 0].numpy(), 1 / 3)
def test_get_lab_image_shape(self):
......@@ -975,18 +1039,18 @@ class DeepMACMetaArchTest(tf.test.TestCase, parameterized.TestCase):
def test_loss_keys(self):
model = build_meta_arch(use_dice_loss=True)
prediction = {
'preprocessed_inputs': tf.random.normal((1, 32, 32, 3)),
'INSTANCE_EMBEDDING': [tf.random.normal((1, 8, 8, 17))] * 2,
'PIXEL_EMBEDDING': [tf.random.normal((1, 8, 8, 19))] * 2,
'object_center': [tf.random.normal((1, 8, 8, 6))] * 2,
'box/offset': [tf.random.normal((1, 8, 8, 2))] * 2,
'box/scale': [tf.random.normal((1, 8, 8, 2))] * 2
'preprocessed_inputs': tf.random.normal((3, 32, 32, 3)),
'MASK_LOGITS_GT_BOXES': [tf.random.normal((3, 5, 8, 8))] * 2,
'object_center': [tf.random.normal((3, 8, 8, 6))] * 2,
'box/offset': [tf.random.normal((3, 8, 8, 2))] * 2,
'box/scale': [tf.random.normal((3, 8, 8, 2))] * 2
}
model.provide_groundtruth(
groundtruth_boxes_list=[tf.convert_to_tensor([[0., 0., 1., 1.]] * 5)],
groundtruth_classes_list=[tf.one_hot([1, 0, 1, 1, 1], depth=6)],
groundtruth_weights_list=[tf.ones(5)],
groundtruth_masks_list=[tf.ones((5, 32, 32))])
groundtruth_boxes_list=[
tf.convert_to_tensor([[0., 0., 1., 1.]] * 5)] * 3,
groundtruth_classes_list=[tf.one_hot([1, 0, 1, 1, 1], depth=6)] * 3,
groundtruth_weights_list=[tf.ones(5)] * 3,
groundtruth_masks_list=[tf.ones((5, 32, 32))] * 3)
loss = model.loss(prediction, tf.constant([[32, 32, 3.0]]))
self.assertGreater(loss['Loss/deep_mask_estimation'], 0.0)
......@@ -1008,8 +1072,7 @@ class DeepMACMetaArchTest(tf.test.TestCase, parameterized.TestCase):
num_stages = 1
prediction = {
'preprocessed_inputs': tf.random.normal((1, 32, 32, 3)),
'INSTANCE_EMBEDDING': [tf.random.normal((1, 8, 8, 9))] * num_stages,
'PIXEL_EMBEDDING': [tf.random.normal((1, 8, 8, 8))] * num_stages,
'MASK_LOGITS_GT_BOXES': [tf.random.normal((1, 5, 8, 8))] * num_stages,
'object_center': [tf.random.normal((1, 8, 8, 6))] * num_stages,
'box/offset': [tf.random.normal((1, 8, 8, 2))] * num_stages,
'box/scale': [tf.random.normal((1, 8, 8, 2))] * num_stages
......@@ -1066,6 +1129,7 @@ class DeepMACMetaArchTest(tf.test.TestCase, parameterized.TestCase):
f'{mask_loss} did not respond to change in weight.')
def test_color_consistency_warmup(self):
tf.keras.backend.set_learning_phase(True)
model = build_meta_arch(
use_dice_loss=True,
predict_full_resolution_masks=True,
......@@ -1079,8 +1143,7 @@ class DeepMACMetaArchTest(tf.test.TestCase, parameterized.TestCase):
num_stages = 1
prediction = {
'preprocessed_inputs': tf.random.normal((1, 32, 32, 3)),
'INSTANCE_EMBEDDING': [tf.random.normal((1, 8, 8, 9))] * num_stages,
'PIXEL_EMBEDDING': [tf.random.normal((1, 8, 8, 8))] * num_stages,
'MASK_LOGITS_GT_BOXES': [tf.random.normal((1, 5, 8, 8))] * num_stages,
'object_center': [tf.random.normal((1, 8, 8, 6))] * num_stages,
'box/offset': [tf.random.normal((1, 8, 8, 2))] * num_stages,
'box/scale': [tf.random.normal((1, 8, 8, 2))] * num_stages
......
......@@ -403,7 +403,7 @@ message CenterNet {
// Mask prediction support using DeepMAC. See https://arxiv.org/abs/2104.00613
// Next ID 24
// Next ID 25
message DeepMACMaskEstimation {
// The loss used for penalizing mask predictions.
optional ClassificationLoss classification_loss = 1;
......@@ -485,6 +485,14 @@ message CenterNet {
optional int32 color_consistency_warmup_start = 23 [default=0];
// DeepMAC has been refactored to process the entire batch at once,
// instead of the previous (simple) approach of processing one sample at
// a time. Because of this, the memory consumption has increased and
// it's crucial to only feed the mask head the last stage outputs
// from the hourglass. Doing so halves the memory requirement of the
// mask head and does not cause a drop in evaluation metrics.
optional bool use_only_last_stage = 24 [default=false];
}
optional DeepMACMaskEstimation deepmac_mask_estimation = 14;
......
......@@ -76,3 +76,29 @@ py_strict_library(
"//tf_ops:sequence_string_projection_op_v2_py", # sequence projection
],
)
py_strict_library(
name = "misc_layers",
srcs = ["misc_layers.py"],
srcs_version = "PY3",
deps = [
# package tensorflow
"//layers:base_layers", # sequence projection
"//layers:dense_layers", # sequence projection
"//layers:quantization_layers", # sequence projection
],
)
py_strict_library(
name = "qrnn_layers",
srcs = ["qrnn_layers.py"],
srcs_version = "PY3",
deps = [
":base_layers",
":conv_layers",
":dense_layers",
":quantization_layers",
# package tensorflow
"//tf_ops:tf_custom_ops_py", # sequence projection
],
)
# Copyright 2020 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# Lint as: python3
"""Layers for embedding."""
import tensorflow as tf
from layers import base_layers # import seq_flow_lite module
from layers import dense_layers # import seq_flow_lite module
from layers import quantization_layers # import seq_flow_lite module
class AttentionPooling(base_layers.BaseLayer):
"""A basic attention pooling layer."""
def __init__(self, scalar=True, **kwargs):
self.scalar = scalar
# Attention logits should not have activation post linear layer so it can
# be positive or negative. This would enable the attention distribution to
# be anything that the network likes. Using relu activation makes the
# attention distribution biased towards uniform distribution.
# This gets better results for attention pooling. Though some outputs are
# emphasized for making classification decision, all other outputs have
# a non zero probability of influencing the class. This seems to result
# in better backprop.
self.attention = dense_layers.BaseQDenseVarLen(units=1, rank=3, **kwargs)
self.qactivation = quantization_layers.ActivationQuantization(**kwargs)
super(AttentionPooling, self).__init__(**kwargs)
def build(self, input_shapes):
self.feature_size = input_shapes[-1]
def call(self, inputs, mask, inverse_normalizer):
self._assert_rank_and_type(inputs, 3)
self._assert_rank_and_type(mask, 3)
batch_size = self.get_batch_dimension(inputs)
attn_logits = self.attention(inputs, mask, inverse_normalizer)
if self.parameters.mode not in [base_layers.PREDICT, base_layers.TFLITE]:
invalid_mask = (1 - mask) * self.parameters.invalid_logit
attn_logits = attn_logits * mask + invalid_mask
attn_logits = tf.reshape(attn_logits, [batch_size, -1])
attention = tf.nn.softmax(attn_logits, axis=-1)
attention = self.qrange_sigmoid(attention, tf_only=True)
if self.parameters.mode in [base_layers.PREDICT, base_layers.TFLITE]:
inputs = tf.reshape(inputs, [-1, self.feature_size])
else:
attention = tf.expand_dims(attention, axis=1)
pre_logits = self.qactivation(tf.matmul(attention, inputs))
return tf.reshape(pre_logits, [batch_size, self.feature_size])
class TreeInductionLayer(base_layers.BaseLayer):
"""A basic tree induction layer."""
def __init__(self, **kwargs):
self.qactivation = quantization_layers.ActivationQuantization(**kwargs)
super(TreeInductionLayer, self).__init__(**kwargs)
def call(self, keys, queries, sequence_length):
key_dim = keys.get_shape().as_list()[-1]
query_dim = queries.get_shape().as_list()[-1]
assert key_dim == query_dim, "Last dimension of keys/queries should match."
if self.parameters.mode not in [base_layers.PREDICT, base_layers.TFLITE]:
sequence_mask = tf.sequence_mask(
sequence_length, maxlen=tf.shape(keys)[1], dtype=tf.float32)
sequence_mask = tf.expand_dims(sequence_mask, axis=2)
attn_mask = tf.matmul(sequence_mask, sequence_mask, transpose_b=True)
attn_logits = self.qactivation(tf.matmul(keys, queries, transpose_b=True))
invalid_attn_mask = (1 - attn_mask) * self.parameters.invalid_logit
return attn_logits * attn_mask + invalid_attn_mask
else:
assert self.get_batch_dimension(keys) == 1
assert self.get_batch_dimension(queries) == 1
keys = tf.reshape(keys, [-1, key_dim])
queries = tf.reshape(queries, [-1, key_dim])
result = self.qactivation(tf.matmul(keys, queries, transpose_b=True))
# TODO(b/171063452): Bug needs to be fixed to handle this correctly.
# seq_dim = tf.shape(result)[1]
# result = tf.reshape(result, [1, seq_dim, seq_dim])
return result
# Copyright 2020 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# Lint as: python3
"""Layers for QRNN."""
import tensorflow as tf
from layers import base_layers # import seq_flow_lite module
from layers import conv_layers # import seq_flow_lite module
from layers import dense_layers # import seq_flow_lite module
from layers import quantization_layers # import seq_flow_lite module
from tf_ops import tf_custom_ops_py # import seq_flow_lite module
QUASI_RNN_POOLING_F = "f"
QUASI_RNN_POOLING_FO = "fo"
QUASI_RNN_POOLING_IFO = "ifo"
_QUASI_RNN_POOLING_TO_NUMBER_OF_GATES_MAP = {
QUASI_RNN_POOLING_F: 2,
QUASI_RNN_POOLING_FO: 3,
QUASI_RNN_POOLING_IFO: 4,
}
class QRNNUnidirectionalPoolingCore(base_layers.BaseLayer):
"""Create a unidirectional QRNN pooling inner loop."""
def __init__(self, forward=True, **kwargs):
self.forward = forward
super(QRNNUnidirectionalPoolingCore, self).__init__(**kwargs)
def call(self, multiplier, constant):
if self.parameters.mode != base_layers.TFLITE:
return self._qrnn_pooling(multiplier, constant)
else:
return tf_custom_ops_py.pooling_op(multiplier, constant,
[1.0 if self.forward else 0.0])
def _qrnn_pooling(self, multipler, constant):
"""Pooling step computes the internal states for all timesteps."""
assert multipler.get_shape().as_list() == constant.get_shape().as_list()
gate_static_shape = multipler.get_shape().as_list()
gate_shape = tf.shape(multipler)
feature_size = gate_static_shape[2]
assert feature_size is not None
batch_size = gate_static_shape[0] or gate_shape[0]
max_timestep = gate_static_shape[1] or gate_shape[1]
dynamic_loop = gate_static_shape[1] is None
# Get multiplier/constant in [timestep, batch, feature_size] format
multiplier_transposed = tf.transpose(multipler, [1, 0, 2])
constant_transposed = tf.transpose(constant, [1, 0, 2])
# Start state
state = tf.zeros((batch_size, feature_size), tf.float32)
if dynamic_loop:
# One pooling step
def _step(index, state, states):
m = multiplier_transposed[index, :, :]
c = constant_transposed[index, :, :]
new_state = state * m + c
next_index = index + 1 if self.forward else index - 1
return next_index, new_state, states.write(index, new_state)
# Termination condition
def _termination(index, state, states):
del state, states
return (index < max_timestep) if self.forward else (index >= 0)
states = tf.TensorArray(tf.float32, size=max_timestep)
index = 0 if self.forward else max_timestep - 1
# Dynamic pooling loop
_, state, states = tf.while_loop(_termination, _step,
[index, state, states])
states = states.stack()
else:
# Unstack them to process one timestep at a time
multiplier_list = tf.unstack(multiplier_transposed)
constant_list = tf.unstack(constant_transposed)
states = []
# Unroll either forward or backward based on the flag `forward`
timesteps = list(range(max_timestep)) if self.forward else reversed(
list(range(max_timestep)))
# Static pooling loop
for time in timesteps:
state = state * multiplier_list[time] + constant_list[time]
states.append(state)
# Stack them back in the right order
states = tf.stack(states if self.forward else list(reversed(states)))
# Change to [batch, timestep, feature_size]
return tf.transpose(states, [1, 0, 2])
class QRNNUnidirectionalPooling(base_layers.BaseLayer):
"""Create a unidirectional QRNN pooling."""
def __init__(self,
zoneout_probability=0.0,
forward=True,
pooling=QUASI_RNN_POOLING_FO,
output_quantized=True,
**kwargs):
self.zoneout_probability = zoneout_probability
self.pooling = pooling
self.forward = forward
self.output_quantized = output_quantized
if output_quantized and self.pooling == QUASI_RNN_POOLING_IFO:
self.qoutputs = quantization_layers.ActivationQuantization()
self.num_gates = _QUASI_RNN_POOLING_TO_NUMBER_OF_GATES_MAP[pooling]
assert pooling in _QUASI_RNN_POOLING_TO_NUMBER_OF_GATES_MAP.keys()
self.pooling_core = QRNNUnidirectionalPoolingCore(forward=forward, **kwargs)
super(QRNNUnidirectionalPooling, self).__init__(**kwargs)
def call(self, gates, mask):
return self._create_qrnn_pooling_unidirectional(gates, mask)
def _qrnn_preprocess(self, gates):
"""Preprocess the gate inputs to the pooling layer."""
assert self.num_gates == len(gates)
dim = lambda tensor, index: tensor.get_shape().as_list()[index]
for tensor in gates:
assert len(tensor.get_shape().as_list()) == 3
for idx in range(3):
assert dim(gates[0], idx) == dim(tensor, idx)
if self.pooling == QUASI_RNN_POOLING_F:
z = self.quantized_tanh(gates[0], tf_only=True)
f = self.quantized_sigmoid(gates[1], tf_only=True)
return f, self.qrange_tanh(self.qrange_sigmoid(1 - f) * z), 1
elif self.pooling == QUASI_RNN_POOLING_FO:
z = self.quantized_tanh(gates[0], tf_only=True)
f = self.quantized_sigmoid(gates[1], tf_only=True)
o = self.quantized_sigmoid(gates[2], tf_only=True)
return f, self.qrange_tanh(self.qrange_sigmoid(1 - f) * z), o
else: # self.pooling == QUASI_RNN_POOLING_IFO:
z = self.quantized_tanh(gates[0], tf_only=True)
i = self.quantized_sigmoid(gates[1], tf_only=True)
f = self.quantized_sigmoid(gates[2], tf_only=True)
o = self.quantized_sigmoid(gates[3], tf_only=True)
return f, self.qrange_tanh(i * z), o
def _qrnn_postprocess(self, states, multiplier):
"""Postprocess the states and return the output tensors."""
if self.pooling == QUASI_RNN_POOLING_F:
return states
elif self.pooling == QUASI_RNN_POOLING_FO:
return self.qrange_tanh(states) * multiplier
else: # self.pooling == QUASI_RNN_POOLING_IFO
return self.qoutputs(states) * multiplier
def _qrnn_zoneout(self, multipler, constant):
"""Zoneout regularization for Quasi RNN."""
enable_zoneout = self.zoneout_probability > 0.0
if enable_zoneout and self.parameters.mode == base_layers.TRAIN:
# zoneout_mask is 1.0 with self.zoneout_probability and 0.0 with
# probability (1 - self.zoneout_probability)
zoneout_mask = tf.random.uniform(tf.shape(multipler), maxval=1.0)
zoneout_mask = tf.floor(zoneout_mask + self.zoneout_probability)
# When zoneout_mask is 1.0, do not update the state, retain the old state.
# This is achieved by making the multiplier 1.0 and constant 0.0.
# When zoneout_mask is 0.0 the multiplier and constant are unaffected.
# multipler is expected to be in the range [0.0, 1.0]. This is true since
# it is the result of a sigmoid.
multipler = tf.maximum(zoneout_mask, multipler)
constant *= (1 - zoneout_mask)
return multipler, constant
def _create_qrnn_pooling_unidirectional(self, gates, mask):
"""Create QRNN Pooling in either forward or backward direction."""
m1, c1, outgate = self._qrnn_preprocess(gates)
# For inference zero padding will not be used. Hence sequence length is
# not necessary.
if self.parameters.mode not in [base_layers.PREDICT, base_layers.TFLITE]:
m1 = m1 * mask + (1 - mask) * tf.ones_like(m1)
c1 *= mask
m1, c1 = self._qrnn_zoneout(m1, c1)
states = self.pooling_core(m1, c1)
outputs = self._qrnn_postprocess(states, outgate)
# For inference zero padding will not be used. Hence sequence length is
# not necessary.
if self.parameters.mode not in [base_layers.PREDICT, base_layers.TFLITE]:
outputs *= mask
if self.output_quantized:
if self.pooling in [QUASI_RNN_POOLING_FO, QUASI_RNN_POOLING_F]:
outputs = self.qrange_tanh(outputs)
else:
outputs = self.qoutputs.quantize_using_range(outputs)
return outputs
class QRNNUnidirectional(base_layers.BaseLayer):
"""Create a unidirectional QRNN encoder."""
def __init__(self,
kwidth,
state_size,
zoneout_probability=0.0,
forward=True,
pooling=QUASI_RNN_POOLING_FO,
output_quantized=True,
**kwargs):
self.forward = forward
self.kwidth = kwidth
self.pooling = pooling
self.state_size = state_size
assert pooling in _QUASI_RNN_POOLING_TO_NUMBER_OF_GATES_MAP.keys()
self.num_gates = _QUASI_RNN_POOLING_TO_NUMBER_OF_GATES_MAP[pooling]
self.gate_layers = []
for _ in range(self.num_gates):
self.gate_layers.append(
conv_layers.EncoderQConvolutionVarLen(
filters=state_size,
ksize=kwidth,
rank=3,
padding="VALID",
activation=None,
**kwargs))
padding = [kwidth - 1, 0] if forward else [0, kwidth - 1]
self.zero_pad = tf.keras.layers.ZeroPadding1D(padding=padding)
self.qrnn_pooling = QRNNUnidirectionalPooling(
forward=forward,
zoneout_probability=zoneout_probability,
output_quantized=output_quantized,
pooling=pooling,
**kwargs)
super(QRNNUnidirectional, self).__init__(**kwargs)
def call(self, inputs, mask, inverse_normalizer=None):
if inverse_normalizer is None:
inverse_normalizer = tf.math.reciprocal(tf.reduce_sum(mask))
self._assert_rank_and_type(inputs, 3)
self._assert_rank_and_type(mask, 3)
maskr4 = tf.expand_dims(mask, axis=1)
padded_inputs = self.zero_pad(inputs)
gates = [
layer(padded_inputs, maskr4, inverse_normalizer)
for layer in self.gate_layers
]
return self.qrnn_pooling(gates, mask)
class QRNNUnidirectionalWithBottleneck(base_layers.BaseLayer):
"""Create a unidirectional QRNN encoder with bottlenecks."""
def __init__(self,
kwidth,
state_size,
bottleneck_size,
zoneout_probability=0.0,
forward=True,
pooling=QUASI_RNN_POOLING_FO,
output_quantized=True,
**kwargs):
self.bottleneck_size = bottleneck_size
self.state_size = state_size
self.forward = forward
self.kwidth = kwidth
self.pooling = pooling
self.state_size = state_size
assert pooling in _QUASI_RNN_POOLING_TO_NUMBER_OF_GATES_MAP.keys()
self.num_gates = _QUASI_RNN_POOLING_TO_NUMBER_OF_GATES_MAP[pooling]
self.qrnn_pooling = QRNNUnidirectionalPooling(
forward=forward,
zoneout_probability=zoneout_probability,
output_quantized=output_quantized,
pooling=pooling,
**kwargs)
self.pre_conv_layers = []
self.gate_layers = []
self.post_conv_layers = []
for _ in range(self.num_gates):
self.pre_conv_layers.append(
dense_layers.BaseQDense(bottleneck_size, rank=3, **kwargs))
self.gate_layers.append(
conv_layers.EncoderQConvolution(
filters=bottleneck_size,
ksize=kwidth,
rank=3,
padding="SAME",
**kwargs))
self.post_conv_layers.append(
dense_layers.BaseQDense(
state_size, rank=3, activation=None, **kwargs))
super(QRNNUnidirectionalWithBottleneck, self).__init__(**kwargs)
def call(self, inputs, mask, inverse_normalizer=None):
if inverse_normalizer is None:
inverse_normalizer = tf.math.reciprocal(tf.reduce_sum(mask))
self._assert_rank_and_type(inputs, 3)
self._assert_rank_and_type(mask, 3)
pre_conv_out = [layer(inputs) for layer in self.pre_conv_layers]
gates = [layer(pre_conv_out[i]) for i, layer in enumerate(self.gate_layers)]
post_conv_out = [
layer(gates[i]) for i, layer in enumerate(self.post_conv_layers)
]
return self.qrnn_pooling(post_conv_out, mask)
class QRNNBidirectional(base_layers.BaseLayer):
"""Create a bidirectional QRNN encoder."""
def __init__(self,
kwidth,
state_size,
zoneout_probability=0.0,
pooling=QUASI_RNN_POOLING_FO,
bottleneck_size=None,
**kwargs):
self.pooling = pooling
if bottleneck_size is None:
self.forward = QRNNUnidirectional(
kwidth=kwidth,
state_size=state_size,
forward=True,
output_quantized=False,
zoneout_probability=zoneout_probability,
pooling=pooling,
**kwargs)
self.backward = QRNNUnidirectional(
kwidth=kwidth,
state_size=state_size,
forward=False,
output_quantized=False,
zoneout_probability=zoneout_probability,
pooling=pooling,
**kwargs)
else:
self.forward = QRNNUnidirectionalWithBottleneck(
kwidth=kwidth,
state_size=state_size,
bottleneck_size=bottleneck_size,
forward=True,
output_quantized=False,
zoneout_probability=zoneout_probability,
pooling=pooling,
**kwargs)
self.backward = QRNNUnidirectionalWithBottleneck(
kwidth=kwidth,
state_size=state_size,
bottleneck_size=bottleneck_size,
forward=False,
output_quantized=False,
zoneout_probability=zoneout_probability,
pooling=pooling,
**kwargs)
self.qconcat = quantization_layers.ConcatQuantization(axis=2, **kwargs)
super(QRNNBidirectional, self).__init__(**kwargs)
def call(self, inputs, mask, inverse_normalizer=None):
if inverse_normalizer is None:
inverse_normalizer = tf.math.reciprocal(tf.reduce_sum(mask))
fwd_outputs = self.forward(inputs, mask, inverse_normalizer)
bwd_outputs = self.backward(inputs, mask, inverse_normalizer)
if self.pooling in [QUASI_RNN_POOLING_FO, QUASI_RNN_POOLING_F]:
outputs = [self.qrange_tanh(fwd_outputs), self.qrange_tanh(bwd_outputs)]
outputs = self.qrange_tanh(tf.concat(outputs, axis=2))
else:
outputs = self.qconcat([fwd_outputs, bwd_outputs])
return outputs
class QRNNBidirectionalStack(base_layers.BaseLayer):
"""Create a stack of bidirectional QRNN encoder."""
def __init__(self,
num_layers,
kwidth,
state_size,
zoneout_probability=0.0,
layerwise_decaying_zoneout=True,
pooling=QUASI_RNN_POOLING_FO,
bottleneck_size=None,
**kwargs):
self.layers = []
zp = zoneout_probability
for idx in range(num_layers):
if layerwise_decaying_zoneout:
zp = (zoneout_probability**(idx + 1))
self.layers.append(
QRNNBidirectional(
kwidth=kwidth,
state_size=state_size,
zoneout_probability=zp,
pooling=pooling,
bottleneck_size=bottleneck_size,
**kwargs))
super(QRNNBidirectionalStack, self).__init__(**kwargs)
def call(self, inputs, maskr3, inverse_normalizer):
return self._apply_qrnn_stack(inputs, maskr3, inverse_normalizer)
def _apply_qrnn_stack(self, inputs, mask3, inverse_normalizer):
if self.parameters.mode not in [base_layers.PREDICT, base_layers.TFLITE]:
inputs = inputs * mask3
for layer in self.layers:
outputs = layer(inputs, mask3, inverse_normalizer)
inputs = outputs
return outputs
class QRNNBidirectionalStackWithSeqLength(QRNNBidirectionalStack):
def call(self, inputs, sequence_length):
mask = tf.sequence_mask(
sequence_length, tf.shape(inputs)[1], dtype=tf.float32)
inverse_normalizer = tf.math.reciprocal(tf.reduce_sum(mask))
maskr3 = tf.expand_dims(mask, 2)
return self._apply_qrnn_stack(inputs, maskr3, inverse_normalizer)
......@@ -20,3 +20,21 @@ py_library(
"//tf_ops:tf_custom_ops_py", # sequence projection
],
)
py_library(
name = "pqrnn",
srcs = ["pqrnn.py"],
srcs_version = "PY3",
deps = [
# package absl/logging
# package tensorflow
"//layers:base_layers", # sequence projection
"//layers:dense_layers", # sequence projection
"//layers:misc_layers", # sequence projection
"//layers:projection_layers", # sequence projection
"//layers:qrnn_layers", # sequence projection
"//layers:quantization_layers", # sequence projection
# "//tf_ops:tf_custom_ops" # sequence projection
"//tf_ops:tf_custom_ops_py", # sequence projection
],
)
# Copyright 2020 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# Lint as: python3
"""Implementation of pQRNN model."""
from absl import logging
import tensorflow as tf
from layers import base_layers # import seq_flow_lite module
from layers import dense_layers # import seq_flow_lite module
from layers import misc_layers # import seq_flow_lite module
from layers import projection_layers # import seq_flow_lite module
from layers import qrnn_layers # import seq_flow_lite module
from layers import quantization_layers # import seq_flow_lite module
class Encoder(tf.keras.layers.Layer):
"""A pQRNN keras model."""
def __init__(self, config, mode, **kwargs):
super(Encoder, self).__init__(**kwargs)
def _get_params(varname, default_value=None):
value = config[varname] if varname in config else default_value
default = "" if varname in config else " (default)"
logging.info("%s = %s%s", varname, value, default)
setattr(self, varname, value)
_get_params("projection_bottleneck_size")
_get_params("qrnn_state_size")
_get_params("qrnn_kernel_width", 3)
_get_params("qrnn_zoneout_probability")
_get_params("number_qrnn_layers")
_get_params("labels")
_get_params("regularizer_scale")
_get_params("quantize")
self.num_classes = len(self.labels)
self.parameters = base_layers.Parameters(
mode, quantize=self.quantize, regularizer_scale=self.regularizer_scale)
self.bottleneck_layer = dense_layers.BaseQDenseVarLen(
units=self.projection_bottleneck_size,
rank=3,
parameters=self.parameters)
self.qrnn_stack = qrnn_layers.QRNNBidirectionalStack(
parameters=self.parameters,
zoneout_probability=self.qrnn_zoneout_probability,
kwidth=self.qrnn_kernel_width,
state_size=self.qrnn_state_size,
num_layers=self.number_qrnn_layers)
self.attention_pool = misc_layers.AttentionPooling(
parameters=self.parameters)
self.final_fc = dense_layers.BaseQDense(
units=self.num_classes,
rank=2,
parameters=self.parameters,
activation=None)
def call(self, projection, seq_length):
mask = tf.sequence_mask(
seq_length, tf.shape(projection)[1], dtype=tf.float32)
inverse_normalizer = tf.math.reciprocal(tf.reduce_sum(mask))
maskr3 = tf.expand_dims(mask, axis=2)
if self.parameters.mode in [base_layers.TRAIN, base_layers.EVAL]:
projection = projection * maskr3
bottleneck = self.bottleneck_layer(projection, maskr3, inverse_normalizer)
outputs = self.qrnn_stack(bottleneck, maskr3, inverse_normalizer)
pre_logits = self.attention_pool(outputs, maskr3, inverse_normalizer)
return self.final_fc(pre_logits)
class Model(Encoder):
def __init__(self, config, mode, **kwargs):
super(Model, self).__init__(config, mode, **kwargs)
self.projection = projection_layers.ProjectionLayer(config, mode)
def call(self, inputs):
projection, seq_length = self.projection(inputs)
return super(Model, self).call(projection, seq_length)
......@@ -90,6 +90,7 @@ py_binary(
main = "run_tflite.py",
python_version = "PY3",
deps = [
":sgnn_projection_op_resolver",
# Expect numpy installed
# package TFLite flex delegate
# package TFLite interpreter
......
......@@ -43,8 +43,6 @@ Hparams = collections.namedtuple(
def preprocess(text):
"""Normalize the text, and return tokens."""
assert len(text.get_shape().as_list()) == 2
assert text.get_shape().as_list()[-1] == 1
text = tf.reshape(text, [-1])
text = tf_text.case_fold_utf8(text)
tokenizer = tflite_text_api.WhitespaceTokenizer()
......
......@@ -69,3 +69,27 @@ REGISTER_OP("LayerNorm")
.Doc(R"doc(
Dummy layer norm op.
)doc");
class PoolingOp : public tensorflow::OpKernel {
public:
explicit PoolingOp(tensorflow::OpKernelConstruction* context)
: tensorflow::OpKernel(context) {}
void Compute(tensorflow::OpKernelContext* ctx) override {}
};
REGISTER_KERNEL_BUILDER(Name("PoolingOp").Device(::tensorflow::DEVICE_CPU),
PoolingOp);
REGISTER_OP("PoolingOp")
.Input("multiplier: float32")
.Input("constant: float32")
.Input("forward: float32")
.Output("state: float32")
.SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
c->set_output(0, c->input(0));
return tensorflow::Status::OK();
})
.Doc(R"doc(
Dummy pooling op.
)doc");
......@@ -80,6 +80,7 @@ def set_output_quantized_for_custom_ops(graph_def, use_mlir=True):
'ExpectedValueOp': [tf.float32.as_datatype_enum],
'LayerNorm': [tf.float32.as_datatype_enum],
'UniformCausalAttn': [tf.float32.as_datatype_enum],
'DynamicUniformCausalAttn': [tf.float32.as_datatype_enum],
'RnnDecoderReadState': [tf.float32.as_datatype_enum],
'RnnDecoderWriteState': [tf.float32.as_datatype_enum],
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment