Unverified Commit 09d9656f authored by Srihari Humbarwadi's avatar Srihari Humbarwadi Committed by GitHub
Browse files

Merge branch 'panoptic-segmentation' into panoptic-deeplab-modeling

parents ac671306 49a5706c
...@@ -76,6 +76,16 @@ class SegmentationHead(hyperparams.Config): ...@@ -76,6 +76,16 @@ class SegmentationHead(hyperparams.Config):
decoder_max_level: Optional[Union[int, str]] = None decoder_max_level: Optional[Union[int, str]] = None
@dataclasses.dataclass
class MaskScoringHead(hyperparams.Config):
"""Mask Scoring head config."""
num_convs: int = 4
num_filters: int = 128
fc_input_size: List[int] = dataclasses.field(default_factory=list)
num_fcs: int = 2
fc_dims: int = 1024
@dataclasses.dataclass @dataclasses.dataclass
class SemanticSegmentationModel(hyperparams.Config): class SemanticSegmentationModel(hyperparams.Config):
"""Semantic segmentation model config.""" """Semantic segmentation model config."""
...@@ -87,6 +97,7 @@ class SemanticSegmentationModel(hyperparams.Config): ...@@ -87,6 +97,7 @@ class SemanticSegmentationModel(hyperparams.Config):
backbone: backbones.Backbone = backbones.Backbone( backbone: backbones.Backbone = backbones.Backbone(
type='resnet', resnet=backbones.ResNet()) type='resnet', resnet=backbones.ResNet())
decoder: decoders.Decoder = decoders.Decoder(type='identity') decoder: decoders.Decoder = decoders.Decoder(type='identity')
mask_scoring_head: Optional[MaskScoringHead] = None
norm_activation: common.NormActivation = common.NormActivation() norm_activation: common.NormActivation = common.NormActivation()
......
#!/bin/bash
sudo apt update
sudo apt install unzip aria2 -y
DATA_DIR=$1
aria2c -j 8 -Z \
http://images.cocodataset.org/annotations/annotations_trainval2017.zip \
http://images.cocodataset.org/annotations/panoptic_annotations_trainval2017.zip \
http://images.cocodataset.org/zips/train2017.zip \
http://images.cocodataset.org/zips/val2017.zip \
--dir=$DATA_DIR;
unzip $DATA_DIR/"*".zip -d $DATA_DIR;
mkdir $DATA_DIR/zips && mv $DATA_DIR/*.zip $DATA_DIR/zips;
unzip $DATA_DIR/annotations/panoptic_train2017.zip -d $DATA_DIR
unzip $DATA_DIR/annotations/panoptic_val2017.zip -d $DATA_DIR
python3 official/vision/beta/data/create_coco_tf_record.py \
--logtostderr \
--image_dir="$DATA_DIR/val2017" \
--object_annotations_file="$DATA_DIR/annotations/instances_val2017.json" \
--output_file_prefix="$DATA_DIR/tfrecords/val" \
--panoptic_annotations_file="$DATA_DIR/annotations/panoptic_val2017.json" \
--panoptic_masks_dir="$DATA_DIR/panoptic_val2017" \
--num_shards=8 \
--include_masks \
--include_panoptic_masks
python3 official/vision/beta/data/create_coco_tf_record.py \
--logtostderr \
--image_dir="$DATA_DIR/train2017" \
--object_annotations_file="$DATA_DIR/annotations/instances_train2017.json" \
--output_file_prefix="$DATA_DIR/tfrecords/train" \
--panoptic_annotations_file="$DATA_DIR/annotations/panoptic_train2017.json" \
--panoptic_masks_dir="$DATA_DIR/panoptic_train2017" \
--num_shards=32 \
--include_masks \
--include_panoptic_masks
...@@ -55,7 +55,7 @@ class Parser(object): ...@@ -55,7 +55,7 @@ class Parser(object):
is_training: a `bool` to indicate whether it is in training mode. is_training: a `bool` to indicate whether it is in training mode.
Returns: Returns:
parse: a `callable` that takes the serialized examle and generate the parse: a `callable` that takes the serialized example and generate the
images, labels tuple where labels is a dict of Tensors that contains images, labels tuple where labels is a dict of Tensors that contains
labels. labels.
""" """
......
...@@ -19,11 +19,13 @@ into (image, labels) tuple for RetinaNet. ...@@ -19,11 +19,13 @@ into (image, labels) tuple for RetinaNet.
""" """
# Import libraries # Import libraries
from absl import logging
import tensorflow as tf import tensorflow as tf
from official.vision.beta.dataloaders import parser from official.vision.beta.dataloaders import parser
from official.vision.beta.dataloaders import utils from official.vision.beta.dataloaders import utils
from official.vision.beta.ops import anchor from official.vision.beta.ops import anchor
from official.vision.beta.ops import augment
from official.vision.beta.ops import box_ops from official.vision.beta.ops import box_ops
from official.vision.beta.ops import preprocess_ops from official.vision.beta.ops import preprocess_ops
...@@ -40,6 +42,7 @@ class Parser(parser.Parser): ...@@ -40,6 +42,7 @@ class Parser(parser.Parser):
anchor_size, anchor_size,
match_threshold=0.5, match_threshold=0.5,
unmatched_threshold=0.5, unmatched_threshold=0.5,
aug_type=None,
aug_rand_hflip=False, aug_rand_hflip=False,
aug_scale_min=1.0, aug_scale_min=1.0,
aug_scale_max=1.0, aug_scale_max=1.0,
...@@ -71,6 +74,8 @@ class Parser(parser.Parser): ...@@ -71,6 +74,8 @@ class Parser(parser.Parser):
unmatched_threshold: `float` number between 0 and 1 representing the unmatched_threshold: `float` number between 0 and 1 representing the
upper-bound threshold to assign negative labels for anchors. An anchor upper-bound threshold to assign negative labels for anchors. An anchor
with a score below the threshold is labeled negative. with a score below the threshold is labeled negative.
aug_type: An optional Augmentation object to choose from AutoAugment and
RandAugment. The latter is not supported, and will raise ValueError.
aug_rand_hflip: `bool`, if True, augment training with random horizontal aug_rand_hflip: `bool`, if True, augment training with random horizontal
flip. flip.
aug_scale_min: `float`, the minimum scale applied to `output_size` for aug_scale_min: `float`, the minimum scale applied to `output_size` for
...@@ -108,7 +113,20 @@ class Parser(parser.Parser): ...@@ -108,7 +113,20 @@ class Parser(parser.Parser):
self._aug_scale_min = aug_scale_min self._aug_scale_min = aug_scale_min
self._aug_scale_max = aug_scale_max self._aug_scale_max = aug_scale_max
# Data Augmentation with AutoAugment. # Data augmentation with AutoAugment or RandAugment.
self._augmenter = None
if aug_type is not None:
if aug_type.type == 'autoaug':
logging.info('Using AutoAugment.')
self._augmenter = augment.AutoAugment(
augmentation_name=aug_type.autoaug.augmentation_name,
cutout_const=aug_type.autoaug.cutout_const,
translate_const=aug_type.autoaug.translate_const)
else:
# TODO(b/205346436) Support RandAugment.
raise ValueError(f'Augmentation policy {aug_type.type} not supported.')
# Deprecated. Data Augmentation with AutoAugment.
self._use_autoaugment = use_autoaugment self._use_autoaugment = use_autoaugment
self._autoaugment_policy_name = autoaugment_policy_name self._autoaugment_policy_name = autoaugment_policy_name
...@@ -138,9 +156,13 @@ class Parser(parser.Parser): ...@@ -138,9 +156,13 @@ class Parser(parser.Parser):
for k, v in attributes.items(): for k, v in attributes.items():
attributes[k] = tf.gather(v, indices) attributes[k] = tf.gather(v, indices)
# Gets original image and its size. # Gets original image.
image = data['image'] image = data['image']
# Apply autoaug or randaug.
if self._augmenter is not None:
image, boxes = self._augmenter.distort_with_boxes(image, boxes)
image_shape = tf.shape(input=image)[0:2] image_shape = tf.shape(input=image)[0:2]
# Normalizes image with mean and std pixel values. # Normalizes image with mean and std pixel values.
......
...@@ -23,8 +23,9 @@ from official.vision.beta.dataloaders import decoder ...@@ -23,8 +23,9 @@ from official.vision.beta.dataloaders import decoder
def _generate_source_id(image_bytes): def _generate_source_id(image_bytes):
# Hashing using 22 bits since float32 has only 23 mantissa bits.
return tf.strings.as_string( return tf.strings.as_string(
tf.strings.to_hash_bucket_fast(image_bytes, 2 ** 63 - 1)) tf.strings.to_hash_bucket_fast(image_bytes, 2 ** 22 - 1))
class TfExampleDecoder(decoder.Decoder): class TfExampleDecoder(decoder.Decoder):
......
...@@ -14,24 +14,13 @@ ...@@ -14,24 +14,13 @@
"""Tests for tf_example_decoder.py.""" """Tests for tf_example_decoder.py."""
import io
# Import libraries # Import libraries
from absl.testing import parameterized from absl.testing import parameterized
import numpy as np import numpy as np
from PIL import Image
import tensorflow as tf import tensorflow as tf
from official.vision.beta.dataloaders import tf_example_decoder from official.vision.beta.dataloaders import tf_example_decoder
from official.vision.beta.dataloaders import tfexample_utils
DUMP_SOURCE_ID = b'123'
def _encode_image(image_array, fmt):
image = Image.fromarray(image_array)
with io.BytesIO() as output:
image.save(output, format=fmt)
return output.getvalue()
class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase): class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase):
...@@ -52,73 +41,11 @@ class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase): ...@@ -52,73 +41,11 @@ class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase):
decoder = tf_example_decoder.TfExampleDecoder( decoder = tf_example_decoder.TfExampleDecoder(
include_mask=True, regenerate_source_id=regenerate_source_id) include_mask=True, regenerate_source_id=regenerate_source_id)
image = _encode_image( serialized_example = tfexample_utils.create_detection_test_example(
np.uint8(np.random.rand(image_height, image_width, 3) * 255), image_height=image_height,
fmt='JPEG') image_width=image_width,
if num_instances == 0: image_channel=3,
xmins = [] num_instances=num_instances).SerializeToString()
xmaxs = []
ymins = []
ymaxs = []
labels = []
areas = []
is_crowds = []
masks = []
else:
xmins = list(np.random.rand(num_instances))
xmaxs = list(np.random.rand(num_instances))
ymins = list(np.random.rand(num_instances))
ymaxs = list(np.random.rand(num_instances))
labels = list(np.random.randint(100, size=num_instances))
areas = [(xmax - xmin) * (ymax - ymin) * image_height * image_width
for xmin, xmax, ymin, ymax in zip(xmins, xmaxs, ymins, ymaxs)]
is_crowds = [0] * num_instances
masks = []
for _ in range(num_instances):
mask = _encode_image(
np.uint8(np.random.rand(image_height, image_width) * 255),
fmt='PNG')
masks.append(mask)
serialized_example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded': (
tf.train.Feature(
bytes_list=tf.train.BytesList(value=[image]))),
'image/source_id': (
tf.train.Feature(
bytes_list=tf.train.BytesList(value=[DUMP_SOURCE_ID]))),
'image/height': (
tf.train.Feature(
int64_list=tf.train.Int64List(value=[image_height]))),
'image/width': (
tf.train.Feature(
int64_list=tf.train.Int64List(value=[image_width]))),
'image/object/bbox/xmin': (
tf.train.Feature(
float_list=tf.train.FloatList(value=xmins))),
'image/object/bbox/xmax': (
tf.train.Feature(
float_list=tf.train.FloatList(value=xmaxs))),
'image/object/bbox/ymin': (
tf.train.Feature(
float_list=tf.train.FloatList(value=ymins))),
'image/object/bbox/ymax': (
tf.train.Feature(
float_list=tf.train.FloatList(value=ymaxs))),
'image/object/class/label': (
tf.train.Feature(
int64_list=tf.train.Int64List(value=labels))),
'image/object/is_crowd': (
tf.train.Feature(
int64_list=tf.train.Int64List(value=is_crowds))),
'image/object/area': (
tf.train.Feature(
float_list=tf.train.FloatList(value=areas))),
'image/object/mask': (
tf.train.Feature(
bytes_list=tf.train.BytesList(value=masks))),
})).SerializeToString()
decoded_tensors = decoder.decode( decoded_tensors = decoder.decode(
tf.convert_to_tensor(value=serialized_example)) tf.convert_to_tensor(value=serialized_example))
...@@ -127,7 +54,7 @@ class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase): ...@@ -127,7 +54,7 @@ class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase):
self.assertAllEqual( self.assertAllEqual(
(image_height, image_width, 3), results['image'].shape) (image_height, image_width, 3), results['image'].shape)
if not regenerate_source_id: if not regenerate_source_id:
self.assertEqual(DUMP_SOURCE_ID, results['source_id']) self.assertEqual(tfexample_utils.DUMP_SOURCE_ID, results['source_id'])
self.assertEqual(image_height, results['height']) self.assertEqual(image_height, results['height'])
self.assertEqual(image_width, results['width']) self.assertEqual(image_width, results['width'])
self.assertAllEqual( self.assertAllEqual(
...@@ -151,7 +78,7 @@ class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase): ...@@ -151,7 +78,7 @@ class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase):
[[0, 0, 0], [255, 255, 255], [255, 255, 255], [0, 0, 0]], [[0, 0, 0], [255, 255, 255], [255, 255, 255], [0, 0, 0]],
[[0, 0, 0], [255, 255, 255], [255, 255, 255], [0, 0, 0]], [[0, 0, 0], [255, 255, 255], [255, 255, 255], [0, 0, 0]],
[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]]] [[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]]]
image = _encode_image(np.uint8(image_content), fmt='PNG') image = tfexample_utils.encode_image(np.uint8(image_content), fmt='PNG')
image_height = 4 image_height = 4
image_width = 4 image_width = 4
num_instances = 2 num_instances = 2
...@@ -172,46 +99,38 @@ class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase): ...@@ -172,46 +99,38 @@ class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase):
[0, 255, 255, 255], [0, 255, 255, 255],
[0, 255, 255, 255], [0, 255, 255, 255],
[0, 255, 255, 255]]] [0, 255, 255, 255]]]
masks = [_encode_image(np.uint8(m), fmt='PNG') for m in list(mask_content)] masks = [
tfexample_utils.encode_image(np.uint8(m), fmt='PNG')
for m in list(mask_content)
]
serialized_example = tf.train.Example( serialized_example = tf.train.Example(
features=tf.train.Features( features=tf.train.Features(
feature={ feature={
'image/encoded': ( 'image/encoded': (tf.train.Feature(
tf.train.Feature( bytes_list=tf.train.BytesList(value=[image]))),
bytes_list=tf.train.BytesList(value=[image]))), 'image/source_id': (tf.train.Feature(
'image/source_id': ( bytes_list=tf.train.BytesList(
tf.train.Feature( value=[tfexample_utils.DUMP_SOURCE_ID]))),
bytes_list=tf.train.BytesList(value=[DUMP_SOURCE_ID]))), 'image/height': (tf.train.Feature(
'image/height': ( int64_list=tf.train.Int64List(value=[image_height]))),
tf.train.Feature( 'image/width': (tf.train.Feature(
int64_list=tf.train.Int64List(value=[image_height]))), int64_list=tf.train.Int64List(value=[image_width]))),
'image/width': ( 'image/object/bbox/xmin': (tf.train.Feature(
tf.train.Feature( float_list=tf.train.FloatList(value=xmins))),
int64_list=tf.train.Int64List(value=[image_width]))), 'image/object/bbox/xmax': (tf.train.Feature(
'image/object/bbox/xmin': ( float_list=tf.train.FloatList(value=xmaxs))),
tf.train.Feature( 'image/object/bbox/ymin': (tf.train.Feature(
float_list=tf.train.FloatList(value=xmins))), float_list=tf.train.FloatList(value=ymins))),
'image/object/bbox/xmax': ( 'image/object/bbox/ymax': (tf.train.Feature(
tf.train.Feature( float_list=tf.train.FloatList(value=ymaxs))),
float_list=tf.train.FloatList(value=xmaxs))), 'image/object/class/label': (tf.train.Feature(
'image/object/bbox/ymin': ( int64_list=tf.train.Int64List(value=labels))),
tf.train.Feature( 'image/object/is_crowd': (tf.train.Feature(
float_list=tf.train.FloatList(value=ymins))), int64_list=tf.train.Int64List(value=is_crowds))),
'image/object/bbox/ymax': ( 'image/object/area': (tf.train.Feature(
tf.train.Feature( float_list=tf.train.FloatList(value=areas))),
float_list=tf.train.FloatList(value=ymaxs))), 'image/object/mask': (tf.train.Feature(
'image/object/class/label': ( bytes_list=tf.train.BytesList(value=masks))),
tf.train.Feature(
int64_list=tf.train.Int64List(value=labels))),
'image/object/is_crowd': (
tf.train.Feature(
int64_list=tf.train.Int64List(value=is_crowds))),
'image/object/area': (
tf.train.Feature(
float_list=tf.train.FloatList(value=areas))),
'image/object/mask': (
tf.train.Feature(
bytes_list=tf.train.BytesList(value=masks))),
})).SerializeToString() })).SerializeToString()
decoded_tensors = decoder.decode( decoded_tensors = decoder.decode(
tf.convert_to_tensor(value=serialized_example)) tf.convert_to_tensor(value=serialized_example))
...@@ -221,7 +140,7 @@ class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase): ...@@ -221,7 +140,7 @@ class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase):
self.assertAllEqual( self.assertAllEqual(
(image_height, image_width, 3), results['image'].shape) (image_height, image_width, 3), results['image'].shape)
self.assertAllEqual(image_content, results['image']) self.assertAllEqual(image_content, results['image'])
self.assertEqual(DUMP_SOURCE_ID, results['source_id']) self.assertEqual(tfexample_utils.DUMP_SOURCE_ID, results['source_id'])
self.assertEqual(image_height, results['height']) self.assertEqual(image_height, results['height'])
self.assertEqual(image_width, results['width']) self.assertEqual(image_width, results['width'])
self.assertAllEqual( self.assertAllEqual(
...@@ -259,7 +178,7 @@ class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase): ...@@ -259,7 +178,7 @@ class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase):
[[0, 0, 0], [255, 255, 255], [255, 255, 255], [0, 0, 0]], [[0, 0, 0], [255, 255, 255], [255, 255, 255], [0, 0, 0]],
[[0, 0, 0], [255, 255, 255], [255, 255, 255], [0, 0, 0]], [[0, 0, 0], [255, 255, 255], [255, 255, 255], [0, 0, 0]],
[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]]] [[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]]]
image = _encode_image(np.uint8(image_content), fmt='PNG') image = tfexample_utils.encode_image(np.uint8(image_content), fmt='PNG')
image_height = 4 image_height = 4
image_width = 4 image_width = 4
num_instances = 2 num_instances = 2
...@@ -276,40 +195,34 @@ class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase): ...@@ -276,40 +195,34 @@ class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase):
[0, 255, 255, 255], [0, 255, 255, 255],
[0, 255, 255, 255], [0, 255, 255, 255],
[0, 255, 255, 255]]] [0, 255, 255, 255]]]
masks = [_encode_image(np.uint8(m), fmt='PNG') for m in list(mask_content)] masks = [
tfexample_utils.encode_image(np.uint8(m), fmt='PNG')
for m in list(mask_content)
]
serialized_example = tf.train.Example( serialized_example = tf.train.Example(
features=tf.train.Features( features=tf.train.Features(
feature={ feature={
'image/encoded': ( 'image/encoded': (tf.train.Feature(
tf.train.Feature( bytes_list=tf.train.BytesList(value=[image]))),
bytes_list=tf.train.BytesList(value=[image]))), 'image/source_id': (tf.train.Feature(
'image/source_id': ( bytes_list=tf.train.BytesList(
tf.train.Feature( value=[tfexample_utils.DUMP_SOURCE_ID]))),
bytes_list=tf.train.BytesList(value=[DUMP_SOURCE_ID]))), 'image/height': (tf.train.Feature(
'image/height': ( int64_list=tf.train.Int64List(value=[image_height]))),
tf.train.Feature( 'image/width': (tf.train.Feature(
int64_list=tf.train.Int64List(value=[image_height]))), int64_list=tf.train.Int64List(value=[image_width]))),
'image/width': ( 'image/object/bbox/xmin': (tf.train.Feature(
tf.train.Feature( float_list=tf.train.FloatList(value=xmins))),
int64_list=tf.train.Int64List(value=[image_width]))), 'image/object/bbox/xmax': (tf.train.Feature(
'image/object/bbox/xmin': ( float_list=tf.train.FloatList(value=xmaxs))),
tf.train.Feature( 'image/object/bbox/ymin': (tf.train.Feature(
float_list=tf.train.FloatList(value=xmins))), float_list=tf.train.FloatList(value=ymins))),
'image/object/bbox/xmax': ( 'image/object/bbox/ymax': (tf.train.Feature(
tf.train.Feature( float_list=tf.train.FloatList(value=ymaxs))),
float_list=tf.train.FloatList(value=xmaxs))), 'image/object/class/label': (tf.train.Feature(
'image/object/bbox/ymin': ( int64_list=tf.train.Int64List(value=labels))),
tf.train.Feature( 'image/object/mask': (tf.train.Feature(
float_list=tf.train.FloatList(value=ymins))), bytes_list=tf.train.BytesList(value=masks))),
'image/object/bbox/ymax': (
tf.train.Feature(
float_list=tf.train.FloatList(value=ymaxs))),
'image/object/class/label': (
tf.train.Feature(
int64_list=tf.train.Int64List(value=labels))),
'image/object/mask': (
tf.train.Feature(
bytes_list=tf.train.BytesList(value=masks))),
})).SerializeToString() })).SerializeToString()
decoded_tensors = decoder.decode( decoded_tensors = decoder.decode(
tf.convert_to_tensor(serialized_example)) tf.convert_to_tensor(serialized_example))
...@@ -318,7 +231,7 @@ class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase): ...@@ -318,7 +231,7 @@ class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase):
self.assertAllEqual( self.assertAllEqual(
(image_height, image_width, 3), results['image'].shape) (image_height, image_width, 3), results['image'].shape)
self.assertAllEqual(image_content, results['image']) self.assertAllEqual(image_content, results['image'])
self.assertEqual(DUMP_SOURCE_ID, results['source_id']) self.assertEqual(tfexample_utils.DUMP_SOURCE_ID, results['source_id'])
self.assertEqual(image_height, results['height']) self.assertEqual(image_height, results['height'])
self.assertEqual(image_width, results['width']) self.assertEqual(image_width, results['width'])
self.assertAllEqual( self.assertAllEqual(
......
...@@ -14,28 +14,19 @@ ...@@ -14,28 +14,19 @@
"""Tests for tf_example_label_map_decoder.py.""" """Tests for tf_example_label_map_decoder.py."""
import io
import os import os
# Import libraries # Import libraries
from absl.testing import parameterized from absl.testing import parameterized
import numpy as np import numpy as np
from PIL import Image
import tensorflow as tf import tensorflow as tf
from official.vision.beta.dataloaders import tf_example_label_map_decoder from official.vision.beta.dataloaders import tf_example_label_map_decoder
from official.vision.beta.dataloaders import tfexample_utils
DUMP_SOURCE_ID = b'123'
LABEL_MAP_CSV_CONTENT = '0,class_0\n1,class_1\n2,class_2' LABEL_MAP_CSV_CONTENT = '0,class_0\n1,class_1\n2,class_2'
def _encode_image(image_array, fmt):
image = Image.fromarray(image_array)
with io.BytesIO() as output:
image.save(output, format=fmt)
return output.getvalue()
class TfExampleDecoderLabelMapTest(tf.test.TestCase, parameterized.TestCase): class TfExampleDecoderLabelMapTest(tf.test.TestCase, parameterized.TestCase):
@parameterized.parameters( @parameterized.parameters(
...@@ -56,74 +47,11 @@ class TfExampleDecoderLabelMapTest(tf.test.TestCase, parameterized.TestCase): ...@@ -56,74 +47,11 @@ class TfExampleDecoderLabelMapTest(tf.test.TestCase, parameterized.TestCase):
decoder = tf_example_label_map_decoder.TfExampleDecoderLabelMap( decoder = tf_example_label_map_decoder.TfExampleDecoderLabelMap(
label_map_path, include_mask=True) label_map_path, include_mask=True)
image = _encode_image( serialized_example = tfexample_utils.create_detection_test_example(
np.uint8(np.random.rand(image_height, image_width, 3) * 255), image_height=image_height,
fmt='JPEG') image_width=image_width,
if num_instances == 0: image_channel=3,
xmins = [] num_instances=num_instances).SerializeToString()
xmaxs = []
ymins = []
ymaxs = []
labels = []
areas = []
is_crowds = []
masks = []
else:
xmins = list(np.random.rand(num_instances))
xmaxs = list(np.random.rand(num_instances))
ymins = list(np.random.rand(num_instances))
ymaxs = list(np.random.rand(num_instances))
labels = list(np.random.randint(100, size=num_instances))
areas = [(xmax - xmin) * (ymax - ymin) * image_height * image_width
for xmin, xmax, ymin, ymax in zip(xmins, xmaxs, ymins, ymaxs)]
is_crowds = [0] * num_instances
masks = []
labels = [b'class_1'] * num_instances
for _ in range(num_instances):
mask = _encode_image(
np.uint8(np.random.rand(image_height, image_width) * 255),
fmt='PNG')
masks.append(mask)
serialized_example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded': (
tf.train.Feature(
bytes_list=tf.train.BytesList(value=[image]))),
'image/source_id': (
tf.train.Feature(
bytes_list=tf.train.BytesList(value=[DUMP_SOURCE_ID]))),
'image/height': (
tf.train.Feature(
int64_list=tf.train.Int64List(value=[image_height]))),
'image/width': (
tf.train.Feature(
int64_list=tf.train.Int64List(value=[image_width]))),
'image/object/bbox/xmin': (
tf.train.Feature(
float_list=tf.train.FloatList(value=xmins))),
'image/object/bbox/xmax': (
tf.train.Feature(
float_list=tf.train.FloatList(value=xmaxs))),
'image/object/bbox/ymin': (
tf.train.Feature(
float_list=tf.train.FloatList(value=ymins))),
'image/object/bbox/ymax': (
tf.train.Feature(
float_list=tf.train.FloatList(value=ymaxs))),
'image/object/class/text': (
tf.train.Feature(
bytes_list=tf.train.BytesList(value=labels))),
'image/object/is_crowd': (
tf.train.Feature(
int64_list=tf.train.Int64List(value=is_crowds))),
'image/object/area': (
tf.train.Feature(
float_list=tf.train.FloatList(value=areas))),
'image/object/mask': (
tf.train.Feature(
bytes_list=tf.train.BytesList(value=masks))),
})).SerializeToString()
decoded_tensors = decoder.decode( decoded_tensors = decoder.decode(
tf.convert_to_tensor(value=serialized_example)) tf.convert_to_tensor(value=serialized_example))
...@@ -131,7 +59,7 @@ class TfExampleDecoderLabelMapTest(tf.test.TestCase, parameterized.TestCase): ...@@ -131,7 +59,7 @@ class TfExampleDecoderLabelMapTest(tf.test.TestCase, parameterized.TestCase):
self.assertAllEqual( self.assertAllEqual(
(image_height, image_width, 3), results['image'].shape) (image_height, image_width, 3), results['image'].shape)
self.assertEqual(DUMP_SOURCE_ID, results['source_id']) self.assertEqual(tfexample_utils.DUMP_SOURCE_ID, results['source_id'])
self.assertEqual(image_height, results['height']) self.assertEqual(image_height, results['height'])
self.assertEqual(image_width, results['width']) self.assertEqual(image_width, results['width'])
self.assertAllEqual( self.assertAllEqual(
...@@ -162,7 +90,7 @@ class TfExampleDecoderLabelMapTest(tf.test.TestCase, parameterized.TestCase): ...@@ -162,7 +90,7 @@ class TfExampleDecoderLabelMapTest(tf.test.TestCase, parameterized.TestCase):
[[0, 0, 0], [255, 255, 255], [255, 255, 255], [0, 0, 0]], [[0, 0, 0], [255, 255, 255], [255, 255, 255], [0, 0, 0]],
[[0, 0, 0], [255, 255, 255], [255, 255, 255], [0, 0, 0]], [[0, 0, 0], [255, 255, 255], [255, 255, 255], [0, 0, 0]],
[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]]] [[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]]]
image = _encode_image(np.uint8(image_content), fmt='PNG') image = tfexample_utils.encode_image(np.uint8(image_content), fmt='PNG')
image_height = 4 image_height = 4
image_width = 4 image_width = 4
num_instances = 2 num_instances = 2
...@@ -183,46 +111,38 @@ class TfExampleDecoderLabelMapTest(tf.test.TestCase, parameterized.TestCase): ...@@ -183,46 +111,38 @@ class TfExampleDecoderLabelMapTest(tf.test.TestCase, parameterized.TestCase):
[0, 255, 255, 255], [0, 255, 255, 255],
[0, 255, 255, 255], [0, 255, 255, 255],
[0, 255, 255, 255]]] [0, 255, 255, 255]]]
masks = [_encode_image(np.uint8(m), fmt='PNG') for m in list(mask_content)] masks = [
tfexample_utils.encode_image(np.uint8(m), fmt='PNG')
for m in list(mask_content)
]
serialized_example = tf.train.Example( serialized_example = tf.train.Example(
features=tf.train.Features( features=tf.train.Features(
feature={ feature={
'image/encoded': ( 'image/encoded': (tf.train.Feature(
tf.train.Feature( bytes_list=tf.train.BytesList(value=[image]))),
bytes_list=tf.train.BytesList(value=[image]))), 'image/source_id': (tf.train.Feature(
'image/source_id': ( bytes_list=tf.train.BytesList(
tf.train.Feature( value=[tfexample_utils.DUMP_SOURCE_ID]))),
bytes_list=tf.train.BytesList(value=[DUMP_SOURCE_ID]))), 'image/height': (tf.train.Feature(
'image/height': ( int64_list=tf.train.Int64List(value=[image_height]))),
tf.train.Feature( 'image/width': (tf.train.Feature(
int64_list=tf.train.Int64List(value=[image_height]))), int64_list=tf.train.Int64List(value=[image_width]))),
'image/width': ( 'image/object/bbox/xmin': (tf.train.Feature(
tf.train.Feature( float_list=tf.train.FloatList(value=xmins))),
int64_list=tf.train.Int64List(value=[image_width]))), 'image/object/bbox/xmax': (tf.train.Feature(
'image/object/bbox/xmin': ( float_list=tf.train.FloatList(value=xmaxs))),
tf.train.Feature( 'image/object/bbox/ymin': (tf.train.Feature(
float_list=tf.train.FloatList(value=xmins))), float_list=tf.train.FloatList(value=ymins))),
'image/object/bbox/xmax': ( 'image/object/bbox/ymax': (tf.train.Feature(
tf.train.Feature( float_list=tf.train.FloatList(value=ymaxs))),
float_list=tf.train.FloatList(value=xmaxs))), 'image/object/class/text': (tf.train.Feature(
'image/object/bbox/ymin': ( bytes_list=tf.train.BytesList(value=labels))),
tf.train.Feature( 'image/object/is_crowd': (tf.train.Feature(
float_list=tf.train.FloatList(value=ymins))), int64_list=tf.train.Int64List(value=is_crowds))),
'image/object/bbox/ymax': ( 'image/object/area': (tf.train.Feature(
tf.train.Feature( float_list=tf.train.FloatList(value=areas))),
float_list=tf.train.FloatList(value=ymaxs))), 'image/object/mask': (tf.train.Feature(
'image/object/class/text': ( bytes_list=tf.train.BytesList(value=masks))),
tf.train.Feature(
bytes_list=tf.train.BytesList(value=labels))),
'image/object/is_crowd': (
tf.train.Feature(
int64_list=tf.train.Int64List(value=is_crowds))),
'image/object/area': (
tf.train.Feature(
float_list=tf.train.FloatList(value=areas))),
'image/object/mask': (
tf.train.Feature(
bytes_list=tf.train.BytesList(value=masks))),
})).SerializeToString() })).SerializeToString()
decoded_tensors = decoder.decode( decoded_tensors = decoder.decode(
tf.convert_to_tensor(value=serialized_example)) tf.convert_to_tensor(value=serialized_example))
...@@ -232,7 +152,7 @@ class TfExampleDecoderLabelMapTest(tf.test.TestCase, parameterized.TestCase): ...@@ -232,7 +152,7 @@ class TfExampleDecoderLabelMapTest(tf.test.TestCase, parameterized.TestCase):
self.assertAllEqual( self.assertAllEqual(
(image_height, image_width, 3), results['image'].shape) (image_height, image_width, 3), results['image'].shape)
self.assertAllEqual(image_content, results['image']) self.assertAllEqual(image_content, results['image'])
self.assertEqual(DUMP_SOURCE_ID, results['source_id']) self.assertEqual(tfexample_utils.DUMP_SOURCE_ID, results['source_id'])
self.assertEqual(image_height, results['height']) self.assertEqual(image_height, results['height'])
self.assertEqual(image_width, results['width']) self.assertEqual(image_width, results['width'])
self.assertAllEqual( self.assertAllEqual(
......
...@@ -23,6 +23,22 @@ from official.vision.beta.dataloaders import tfds_factory ...@@ -23,6 +23,22 @@ from official.vision.beta.dataloaders import tfds_factory
class TFDSFactoryTest(tf.test.TestCase, parameterized.TestCase): class TFDSFactoryTest(tf.test.TestCase, parameterized.TestCase):
def _create_test_example(self):
serialized_example = {
'image': tf.ones(shape=(100, 100, 3), dtype=tf.uint8),
'label': 1,
'image/id': 0,
'objects': {
'label': 1,
'is_crowd': 0,
'area': 0.5,
'bbox': [0.1, 0.2, 0.3, 0.4]
},
'segmentation_label': tf.ones((100, 100, 1), dtype=tf.uint8),
'image_left': tf.ones(shape=(100, 100, 3), dtype=tf.uint8)
}
return serialized_example
@parameterized.parameters( @parameterized.parameters(
('imagenet2012'), ('imagenet2012'),
('cifar10'), ('cifar10'),
...@@ -31,6 +47,10 @@ class TFDSFactoryTest(tf.test.TestCase, parameterized.TestCase): ...@@ -31,6 +47,10 @@ class TFDSFactoryTest(tf.test.TestCase, parameterized.TestCase):
def test_classification_decoder(self, tfds_name): def test_classification_decoder(self, tfds_name):
decoder = tfds_factory.get_classification_decoder(tfds_name) decoder = tfds_factory.get_classification_decoder(tfds_name)
self.assertIsInstance(decoder, base_decoder.Decoder) self.assertIsInstance(decoder, base_decoder.Decoder)
decoded_tensor = decoder.decode(self._create_test_example())
self.assertLen(decoded_tensor, 2)
self.assertIn('image/encoded', decoded_tensor)
self.assertIn('image/class/label', decoded_tensor)
@parameterized.parameters( @parameterized.parameters(
('flowers'), ('flowers'),
...@@ -48,6 +68,16 @@ class TFDSFactoryTest(tf.test.TestCase, parameterized.TestCase): ...@@ -48,6 +68,16 @@ class TFDSFactoryTest(tf.test.TestCase, parameterized.TestCase):
def test_detection_decoder(self, tfds_name): def test_detection_decoder(self, tfds_name):
decoder = tfds_factory.get_detection_decoder(tfds_name) decoder = tfds_factory.get_detection_decoder(tfds_name)
self.assertIsInstance(decoder, base_decoder.Decoder) self.assertIsInstance(decoder, base_decoder.Decoder)
decoded_tensor = decoder.decode(self._create_test_example())
self.assertLen(decoded_tensor, 8)
self.assertIn('image', decoded_tensor)
self.assertIn('source_id', decoded_tensor)
self.assertIn('height', decoded_tensor)
self.assertIn('width', decoded_tensor)
self.assertIn('groundtruth_classes', decoded_tensor)
self.assertIn('groundtruth_is_crowd', decoded_tensor)
self.assertIn('groundtruth_area', decoded_tensor)
self.assertIn('groundtruth_boxes', decoded_tensor)
@parameterized.parameters( @parameterized.parameters(
('pascal'), ('pascal'),
...@@ -65,6 +95,12 @@ class TFDSFactoryTest(tf.test.TestCase, parameterized.TestCase): ...@@ -65,6 +95,12 @@ class TFDSFactoryTest(tf.test.TestCase, parameterized.TestCase):
def test_segmentation_decoder(self, tfds_name): def test_segmentation_decoder(self, tfds_name):
decoder = tfds_factory.get_segmentation_decoder(tfds_name) decoder = tfds_factory.get_segmentation_decoder(tfds_name)
self.assertIsInstance(decoder, base_decoder.Decoder) self.assertIsInstance(decoder, base_decoder.Decoder)
decoded_tensor = decoder.decode(self._create_test_example())
self.assertLen(decoded_tensor, 4)
self.assertIn('image/encoded', decoded_tensor)
self.assertIn('image/segmentation/class/encoded', decoded_tensor)
self.assertIn('image/height', decoded_tensor)
self.assertIn('image/width', decoded_tensor)
@parameterized.parameters( @parameterized.parameters(
('coco'), ('coco'),
......
...@@ -54,16 +54,20 @@ IMAGE_KEY = 'image/encoded' ...@@ -54,16 +54,20 @@ IMAGE_KEY = 'image/encoded'
CLASSIFICATION_LABEL_KEY = 'image/class/label' CLASSIFICATION_LABEL_KEY = 'image/class/label'
LABEL_KEY = 'clip/label/index' LABEL_KEY = 'clip/label/index'
AUDIO_KEY = 'features/audio' AUDIO_KEY = 'features/audio'
DUMP_SOURCE_ID = b'123'
def make_image_bytes(shape: Sequence[int]): def encode_image(image_array: np.array, fmt: str) -> bytes:
"""Generates image and return bytes in JPEG format.""" image = Image.fromarray(image_array)
with io.BytesIO() as output:
image.save(output, format=fmt)
return output.getvalue()
def make_image_bytes(shape: Sequence[int], fmt: str = 'JPEG') -> bytes:
"""Generates image and return bytes in specified format."""
random_image = np.random.randint(0, 256, size=shape, dtype=np.uint8) random_image = np.random.randint(0, 256, size=shape, dtype=np.uint8)
random_image = Image.fromarray(random_image) return encode_image(random_image, fmt=fmt)
with io.BytesIO() as buffer:
random_image.save(buffer, format='JPEG')
raw_image_bytes = buffer.getvalue()
return raw_image_bytes
def put_int64_to_context(seq_example: tf.train.SequenceExample, def put_int64_to_context(seq_example: tf.train.SequenceExample,
...@@ -164,3 +168,102 @@ def create_3d_image_test_example(image_height: int, image_width: int, ...@@ -164,3 +168,102 @@ def create_3d_image_test_example(image_height: int, image_width: int,
bytes_list=tf.train.BytesList(value=[labels.tobytes()]))) bytes_list=tf.train.BytesList(value=[labels.tobytes()])))
} }
return tf.train.Example(features=tf.train.Features(feature=feature)) return tf.train.Example(features=tf.train.Features(feature=feature))
def create_detection_test_example(image_height: int, image_width: int,
image_channel: int,
num_instances: int) -> tf.train.Example:
"""Creates and returns a test example containing box and mask annotations.
Args:
image_height: The height of test image.
image_width: The width of test image.
image_channel: The channel of test image.
num_instances: The number of object instances per image.
Returns:
A tf.train.Example for testing.
"""
image = make_image_bytes([image_height, image_width, image_channel])
if num_instances == 0:
xmins = []
xmaxs = []
ymins = []
ymaxs = []
labels = []
areas = []
is_crowds = []
masks = []
labels_text = []
else:
xmins = list(np.random.rand(num_instances))
xmaxs = list(np.random.rand(num_instances))
ymins = list(np.random.rand(num_instances))
ymaxs = list(np.random.rand(num_instances))
labels_text = [b'class_1'] * num_instances
labels = list(np.random.randint(100, size=num_instances))
areas = [(xmax - xmin) * (ymax - ymin) * image_height * image_width
for xmin, xmax, ymin, ymax in zip(xmins, xmaxs, ymins, ymaxs)]
is_crowds = [0] * num_instances
masks = []
for _ in range(num_instances):
mask = make_image_bytes([image_height, image_width], fmt='PNG')
masks.append(mask)
return tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded': (tf.train.Feature(
bytes_list=tf.train.BytesList(value=[image]))),
'image/source_id': (tf.train.Feature(
bytes_list=tf.train.BytesList(value=[DUMP_SOURCE_ID]))),
'image/height': (tf.train.Feature(
int64_list=tf.train.Int64List(value=[image_height]))),
'image/width': (tf.train.Feature(
int64_list=tf.train.Int64List(value=[image_width]))),
'image/object/bbox/xmin': (tf.train.Feature(
float_list=tf.train.FloatList(value=xmins))),
'image/object/bbox/xmax': (tf.train.Feature(
float_list=tf.train.FloatList(value=xmaxs))),
'image/object/bbox/ymin': (tf.train.Feature(
float_list=tf.train.FloatList(value=ymins))),
'image/object/bbox/ymax': (tf.train.Feature(
float_list=tf.train.FloatList(value=ymaxs))),
'image/object/class/label': (tf.train.Feature(
int64_list=tf.train.Int64List(value=labels))),
'image/object/class/text': (tf.train.Feature(
bytes_list=tf.train.BytesList(value=labels_text))),
'image/object/is_crowd': (tf.train.Feature(
int64_list=tf.train.Int64List(value=is_crowds))),
'image/object/area': (tf.train.Feature(
float_list=tf.train.FloatList(value=areas))),
'image/object/mask': (tf.train.Feature(
bytes_list=tf.train.BytesList(value=masks))),
}))
def create_segmentation_test_example(image_height: int, image_width: int,
image_channel: int) -> tf.train.Example:
"""Creates and returns a test example containing mask annotations.
Args:
image_height: The height of test image.
image_width: The width of test image.
image_channel: The channel of test image.
Returns:
A tf.train.Example for testing.
"""
image = make_image_bytes([image_height, image_width, image_channel])
mask = make_image_bytes([image_height, image_width], fmt='PNG')
return tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded': (tf.train.Feature(
bytes_list=tf.train.BytesList(value=[image]))),
'image/segmentation/class/encoded': (tf.train.Feature(
bytes_list=tf.train.BytesList(value=[mask]))),
'image/height': (tf.train.Feature(
int64_list=tf.train.Int64List(value=[image_height]))),
'image/width': (tf.train.Feature(
int64_list=tf.train.Int64List(value=[image_width])))
}))
...@@ -31,7 +31,7 @@ def process_source_id(source_id: tf.Tensor) -> tf.Tensor: ...@@ -31,7 +31,7 @@ def process_source_id(source_id: tf.Tensor) -> tf.Tensor:
A formatted source ID. A formatted source ID.
""" """
if source_id.dtype == tf.string: if source_id.dtype == tf.string:
source_id = tf.cast(tf.strings.to_number(source_id), tf.int64) source_id = tf.strings.to_number(source_id, tf.int64)
with tf.control_dependencies([source_id]): with tf.control_dependencies([source_id]):
source_id = tf.cond( source_id = tf.cond(
pred=tf.equal(tf.size(input=source_id), 0), pred=tf.equal(tf.size(input=source_id), 0),
......
...@@ -361,7 +361,7 @@ class Parser(parser.Parser): ...@@ -361,7 +361,7 @@ class Parser(parser.Parser):
audio = decoded_tensors[self._audio_feature] audio = decoded_tensors[self._audio_feature]
audio = tf.cast(audio, dtype=self._dtype) audio = tf.cast(audio, dtype=self._dtype)
audio = preprocess_ops_3d.sample_sequence( audio = preprocess_ops_3d.sample_sequence(
audio, 20, random=False, stride=1) audio, self._audio_shape[0], random=False, stride=1)
audio = tf.ensure_shape(audio, self._audio_shape) audio = tf.ensure_shape(audio, self._audio_shape)
features['audio'] = audio features['audio'] = audio
......
...@@ -212,6 +212,8 @@ def convert_groundtruths_to_coco_dataset(groundtruths, label_map=None): ...@@ -212,6 +212,8 @@ def convert_groundtruths_to_coco_dataset(groundtruths, label_map=None):
gt_annotations = [] gt_annotations = []
num_batches = len(groundtruths['source_id']) num_batches = len(groundtruths['source_id'])
for i in range(num_batches): for i in range(num_batches):
logging.info(
'convert_groundtruths_to_coco_dataset: Processing annotation %d', i)
max_num_instances = groundtruths['classes'][i].shape[1] max_num_instances = groundtruths['classes'][i].shape[1]
batch_size = groundtruths['source_id'][i].shape[0] batch_size = groundtruths['source_id'][i].shape[0]
for j in range(batch_size): for j in range(batch_size):
...@@ -259,6 +261,10 @@ def convert_groundtruths_to_coco_dataset(groundtruths, label_map=None): ...@@ -259,6 +261,10 @@ def convert_groundtruths_to_coco_dataset(groundtruths, label_map=None):
np_mask[np_mask > 0] = 255 np_mask[np_mask > 0] = 255
encoded_mask = mask_api.encode(np.asfortranarray(np_mask)) encoded_mask = mask_api.encode(np.asfortranarray(np_mask))
ann['segmentation'] = encoded_mask ann['segmentation'] = encoded_mask
# Ensure the content of `counts` is JSON serializable string.
if 'counts' in ann['segmentation']:
ann['segmentation']['counts'] = six.ensure_str(
ann['segmentation']['counts'])
if 'areas' not in groundtruths: if 'areas' not in groundtruths:
ann['area'] = mask_api.area(encoded_mask) ann['area'] = mask_api.area(encoded_mask)
gt_annotations.append(ann) gt_annotations.append(ann)
...@@ -283,11 +289,13 @@ def convert_groundtruths_to_coco_dataset(groundtruths, label_map=None): ...@@ -283,11 +289,13 @@ def convert_groundtruths_to_coco_dataset(groundtruths, label_map=None):
class COCOGroundtruthGenerator: class COCOGroundtruthGenerator:
"""Generates the groundtruth annotations from a single example.""" """Generates the groundtruth annotations from a single example."""
def __init__(self, file_pattern, file_type, num_examples, include_mask): def __init__(self, file_pattern, file_type, num_examples, include_mask,
regenerate_source_id=False):
self._file_pattern = file_pattern self._file_pattern = file_pattern
self._num_examples = num_examples self._num_examples = num_examples
self._include_mask = include_mask self._include_mask = include_mask
self._dataset_fn = dataset_fn.pick_dataset_fn(file_type) self._dataset_fn = dataset_fn.pick_dataset_fn(file_type)
self._regenerate_source_id = regenerate_source_id
def _parse_single_example(self, example): def _parse_single_example(self, example):
"""Parses a single serialized tf.Example proto. """Parses a single serialized tf.Example proto.
...@@ -312,16 +320,21 @@ class COCOGroundtruthGenerator: ...@@ -312,16 +320,21 @@ class COCOGroundtruthGenerator:
mask of each instance. mask of each instance.
""" """
decoder = tf_example_decoder.TfExampleDecoder( decoder = tf_example_decoder.TfExampleDecoder(
include_mask=self._include_mask) include_mask=self._include_mask,
regenerate_source_id=self._regenerate_source_id)
decoded_tensors = decoder.decode(example) decoded_tensors = decoder.decode(example)
image = decoded_tensors['image'] image = decoded_tensors['image']
image_size = tf.shape(image)[0:2] image_size = tf.shape(image)[0:2]
boxes = box_ops.denormalize_boxes( boxes = box_ops.denormalize_boxes(
decoded_tensors['groundtruth_boxes'], image_size) decoded_tensors['groundtruth_boxes'], image_size)
source_id = decoded_tensors['source_id']
if source_id.dtype is tf.string:
source_id = tf.strings.to_number(source_id, out_type=tf.int64)
groundtruths = { groundtruths = {
'source_id': tf.strings.to_number( 'source_id': source_id,
decoded_tensors['source_id'], out_type=tf.int64),
'height': decoded_tensors['height'], 'height': decoded_tensors['height'],
'width': decoded_tensors['width'], 'width': decoded_tensors['width'],
'num_detections': tf.shape(decoded_tensors['groundtruth_classes'])[0], 'num_detections': tf.shape(decoded_tensors['groundtruth_classes'])[0],
...@@ -341,9 +354,10 @@ class COCOGroundtruthGenerator: ...@@ -341,9 +354,10 @@ class COCOGroundtruthGenerator:
dataset = tf.data.Dataset.list_files(self._file_pattern, shuffle=False) dataset = tf.data.Dataset.list_files(self._file_pattern, shuffle=False)
dataset = dataset.interleave( dataset = dataset.interleave(
map_func=lambda filename: self._dataset_fn(filename).prefetch(1), map_func=lambda filename: self._dataset_fn(filename).prefetch(1),
cycle_length=12, cycle_length=None,
num_parallel_calls=tf.data.experimental.AUTOTUNE) num_parallel_calls=tf.data.experimental.AUTOTUNE)
dataset = dataset.take(self._num_examples)
dataset = dataset.map(self._parse_single_example, dataset = dataset.map(self._parse_single_example,
num_parallel_calls=tf.data.experimental.AUTOTUNE) num_parallel_calls=tf.data.experimental.AUTOTUNE)
dataset = dataset.batch(1, drop_remainder=False) dataset = dataset.batch(1, drop_remainder=False)
...@@ -351,18 +365,18 @@ class COCOGroundtruthGenerator: ...@@ -351,18 +365,18 @@ class COCOGroundtruthGenerator:
return dataset return dataset
def __call__(self): def __call__(self):
for groundtruth_result in self._build_pipeline(): return self._build_pipeline()
yield groundtruth_result
def scan_and_generator_annotation_file(file_pattern: str, def scan_and_generator_annotation_file(file_pattern: str,
file_type: str, file_type: str,
num_samples: int, num_samples: int,
include_mask: bool, include_mask: bool,
annotation_file: str): annotation_file: str,
regenerate_source_id: bool = False):
"""Scans and generate the COCO-style annotation JSON file given a dataset.""" """Scans and generate the COCO-style annotation JSON file given a dataset."""
groundtruth_generator = COCOGroundtruthGenerator( groundtruth_generator = COCOGroundtruthGenerator(
file_pattern, file_type, num_samples, include_mask) file_pattern, file_type, num_samples, include_mask, regenerate_source_id)
generate_annotation_file(groundtruth_generator, annotation_file) generate_annotation_file(groundtruth_generator, annotation_file)
...@@ -371,7 +385,8 @@ def generate_annotation_file(groundtruth_generator, ...@@ -371,7 +385,8 @@ def generate_annotation_file(groundtruth_generator,
"""Generates COCO-style annotation JSON file given a groundtruth generator.""" """Generates COCO-style annotation JSON file given a groundtruth generator."""
groundtruths = {} groundtruths = {}
logging.info('Loading groundtruth annotations from dataset to memory...') logging.info('Loading groundtruth annotations from dataset to memory...')
for groundtruth in groundtruth_generator(): for i, groundtruth in enumerate(groundtruth_generator()):
logging.info('generate_annotation_file: Processing annotation %d', i)
for k, v in six.iteritems(groundtruth): for k, v in six.iteritems(groundtruth):
if k not in groundtruths: if k not in groundtruths:
groundtruths[k] = [v] groundtruths[k] = [v]
......
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for coco_utils."""
import os
import tensorflow as tf
from official.vision.beta.dataloaders import tfexample_utils
from official.vision.beta.evaluation import coco_utils
class CocoUtilsTest(tf.test.TestCase):
def test_scan_and_generator_annotation_file(self):
num_samples = 10
example = tfexample_utils.create_detection_test_example(
image_height=512, image_width=512, image_channel=3, num_instances=10)
tf_examples = [example] * num_samples
data_file = os.path.join(self.create_tempdir(), 'test.tfrecord')
tfexample_utils.dump_to_tfrecord(
record_file=data_file, tf_examples=tf_examples)
annotation_file = os.path.join(self.create_tempdir(), 'annotation.json')
coco_utils.scan_and_generator_annotation_file(
file_pattern=data_file,
file_type='tfrecord',
num_samples=num_samples,
include_mask=True,
annotation_file=annotation_file)
self.assertTrue(
tf.io.gfile.exists(annotation_file),
msg='Annotation file {annotation_file} does not exists.')
if __name__ == '__main__':
tf.test.main()
...@@ -95,5 +95,21 @@ class MeanIoUTest(tf.test.TestCase): ...@@ -95,5 +95,21 @@ class MeanIoUTest(tf.test.TestCase):
expected_result = [0, 1 / (1 + 1 - 1)] expected_result = [0, 1 / (1 + 1 - 1)]
self.assertAllClose(expected_result, result, atol=1e-3) self.assertAllClose(expected_result, result, atol=1e-3)
def test_update_state_annd_result(self):
y_pred = [0, 1, 0, 1]
y_true = [0, 0, 1, 1]
m_obj = iou.PerClassIoU(num_classes=2)
m_obj.update_state(y_true, y_pred)
result = m_obj.result()
# cm = [[1, 1],
# [1, 1]]
# sum_row = [2, 2], sum_col = [2, 2], true_positives = [1, 1]
# iou = true_positives / (sum_row + sum_col - true_positives))
expected_result = [1 / (2 + 2 - 1), 1 / (2 + 2 - 1)]
self.assertAllClose(expected_result, result, atol=1e-3)
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()
...@@ -45,19 +45,25 @@ class PanopticQualityEvaluatorTest(tf.test.TestCase): ...@@ -45,19 +45,25 @@ class PanopticQualityEvaluatorTest(tf.test.TestCase):
dtype=np.uint16) dtype=np.uint16)
groundtruths = { groundtruths = {
'category_mask': tf.convert_to_tensor(category_mask), 'category_mask':
'instance_mask': tf.convert_to_tensor(groundtruth_instance_mask) tf.convert_to_tensor([category_mask]),
'instance_mask':
tf.convert_to_tensor([groundtruth_instance_mask]),
'image_info':
tf.convert_to_tensor([[[6, 6], [6, 6], [1.0, 1.0], [0, 0]]],
dtype=tf.float32)
} }
predictions = { predictions = {
'category_mask': tf.convert_to_tensor(category_mask), 'category_mask': tf.convert_to_tensor([category_mask]),
'instance_mask': tf.convert_to_tensor(good_det_instance_mask) 'instance_mask': tf.convert_to_tensor([good_det_instance_mask])
} }
pq_evaluator = panoptic_quality_evaluator.PanopticQualityEvaluator( pq_evaluator = panoptic_quality_evaluator.PanopticQualityEvaluator(
num_categories=1, num_categories=1,
ignored_label=2, ignored_label=2,
max_instances_per_category=16, max_instances_per_category=16,
offset=16) offset=16,
rescale_predictions=True)
for _ in range(2): for _ in range(2):
pq_evaluator.update_state(groundtruths, predictions) pq_evaluator.update_state(groundtruths, predictions)
...@@ -70,7 +76,7 @@ class PanopticQualityEvaluatorTest(tf.test.TestCase): ...@@ -70,7 +76,7 @@ class PanopticQualityEvaluatorTest(tf.test.TestCase):
[1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1],
], ],
dtype=np.uint16) dtype=np.uint16)
predictions['instance_mask'] = tf.convert_to_tensor(bad_det_instance_mask) predictions['instance_mask'] = tf.convert_to_tensor([bad_det_instance_mask])
for _ in range(2): for _ in range(2):
pq_evaluator.update_state(groundtruths, predictions) pq_evaluator.update_state(groundtruths, predictions)
......
...@@ -41,8 +41,7 @@ class MeanIoU(tf.keras.metrics.MeanIoU): ...@@ -41,8 +41,7 @@ class MeanIoU(tf.keras.metrics.MeanIoU):
dtype: data type of the metric result. dtype: data type of the metric result.
""" """
self._rescale_predictions = rescale_predictions self._rescale_predictions = rescale_predictions
super(MeanIoU, self).__init__( super().__init__(num_classes=num_classes, name=name, dtype=dtype)
num_classes=num_classes, name=name, dtype=dtype)
def update_state(self, y_true, y_pred): def update_state(self, y_true, y_pred):
"""Updates metric state. """Updates metric state.
...@@ -120,9 +119,8 @@ class MeanIoU(tf.keras.metrics.MeanIoU): ...@@ -120,9 +119,8 @@ class MeanIoU(tf.keras.metrics.MeanIoU):
flatten_masks = tf.reshape(masks, shape=[-1]) flatten_masks = tf.reshape(masks, shape=[-1])
flatten_valid_masks = tf.reshape(valid_masks, shape=[-1]) flatten_valid_masks = tf.reshape(valid_masks, shape=[-1])
super(MeanIoU, self).update_state( super().update_state(flatten_masks, flatten_predictions,
flatten_masks, flatten_predictions, tf.cast(flatten_valid_masks, tf.float32))
tf.cast(flatten_valid_masks, tf.float32))
class PerClassIoU(iou.PerClassIoU): class PerClassIoU(iou.PerClassIoU):
...@@ -148,8 +146,7 @@ class PerClassIoU(iou.PerClassIoU): ...@@ -148,8 +146,7 @@ class PerClassIoU(iou.PerClassIoU):
dtype: data type of the metric result. dtype: data type of the metric result.
""" """
self._rescale_predictions = rescale_predictions self._rescale_predictions = rescale_predictions
super(PerClassIoU, self).__init__( super().__init__(num_classes=num_classes, name=name, dtype=dtype)
num_classes=num_classes, name=name, dtype=dtype)
def update_state(self, y_true, y_pred): def update_state(self, y_true, y_pred):
"""Updates metric state. """Updates metric state.
...@@ -213,9 +210,8 @@ class PerClassIoU(iou.PerClassIoU): ...@@ -213,9 +210,8 @@ class PerClassIoU(iou.PerClassIoU):
flatten_predictions = tf.reshape(predicted_mask, shape=[1, -1]) flatten_predictions = tf.reshape(predicted_mask, shape=[1, -1])
flatten_masks = tf.reshape(mask, shape=[1, -1]) flatten_masks = tf.reshape(mask, shape=[1, -1])
flatten_valid_masks = tf.reshape(valid_mask, shape=[1, -1]) flatten_valid_masks = tf.reshape(valid_mask, shape=[1, -1])
super(PerClassIoU, self).update_state( super().update_state(flatten_masks, flatten_predictions,
flatten_masks, flatten_predictions, tf.cast(flatten_valid_masks, tf.float32))
tf.cast(flatten_valid_masks, tf.float32))
else: else:
predictions = tf.image.resize( predictions = tf.image.resize(
...@@ -227,6 +223,5 @@ class PerClassIoU(iou.PerClassIoU): ...@@ -227,6 +223,5 @@ class PerClassIoU(iou.PerClassIoU):
flatten_masks = tf.reshape(masks, shape=[-1]) flatten_masks = tf.reshape(masks, shape=[-1])
flatten_valid_masks = tf.reshape(valid_masks, shape=[-1]) flatten_valid_masks = tf.reshape(valid_masks, shape=[-1])
super(PerClassIoU, self).update_state( super().update_state(flatten_masks, flatten_predictions,
flatten_masks, flatten_predictions, tf.cast(flatten_valid_masks, tf.float32))
tf.cast(flatten_valid_masks, tf.float32))
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for segmentation_metrics."""
from absl.testing import parameterized
import numpy as np
import tensorflow as tf
from official.vision.beta.evaluation import segmentation_metrics
class SegmentationMetricsTest(parameterized.TestCase, tf.test.TestCase):
def _create_test_data(self):
y_pred_cls0 = np.expand_dims(
np.array([[1, 1, 0], [1, 1, 0], [0, 0, 0]], dtype=np.uint16),
axis=(0, -1))
y_pred_cls1 = np.expand_dims(
np.array([[0, 0, 0], [0, 0, 1], [0, 0, 1]], dtype=np.uint16),
axis=(0, -1))
y_pred = np.concatenate((y_pred_cls0, y_pred_cls1), axis=-1)
y_true = {
'masks':
np.expand_dims(
np.array([[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0], [0, 0, 0, 1, 1, 1],
[0, 0, 0, 1, 1, 1], [0, 0, 0, 1, 1, 1]],
dtype=np.uint16),
axis=(0, -1)),
'valid_masks':
np.ones([1, 6, 6, 1], dtype=np.uint16),
'image_info':
np.array([[[6, 6], [3, 3], [0.5, 0.5], [0, 0]]], dtype=np.float32)
}
return y_pred, y_true
@parameterized.parameters(True, False)
def test_mean_iou_metric(self, rescale_predictions):
tf.config.experimental_run_functions_eagerly(True)
mean_iou_metric = segmentation_metrics.MeanIoU(
num_classes=2, rescale_predictions=rescale_predictions)
y_pred, y_true = self._create_test_data()
# Disable autograph for correct coverage statistics.
update_fn = tf.autograph.experimental.do_not_convert(
mean_iou_metric.update_state)
update_fn(y_true=y_true, y_pred=y_pred)
miou = mean_iou_metric.result()
self.assertAlmostEqual(miou.numpy(), 0.762, places=3)
@parameterized.parameters(True, False)
def test_per_class_mean_iou_metric(self, rescale_predictions):
per_class_iou_metric = segmentation_metrics.PerClassIoU(
num_classes=2, rescale_predictions=rescale_predictions)
y_pred, y_true = self._create_test_data()
# Disable autograph for correct coverage statistics.
update_fn = tf.autograph.experimental.do_not_convert(
per_class_iou_metric.update_state)
update_fn(y_true=y_true, y_pred=y_pred)
per_class_miou = per_class_iou_metric.result()
self.assertAllClose(per_class_miou.numpy(), [0.857, 0.667], atol=1e-3)
if __name__ == '__main__':
tf.test.main()
...@@ -17,6 +17,8 @@ ...@@ -17,6 +17,8 @@
# Import libraries # Import libraries
import tensorflow as tf import tensorflow as tf
from official.modeling import tf_utils
EPSILON = 1e-5 EPSILON = 1e-5
...@@ -87,3 +89,46 @@ class SegmentationLoss: ...@@ -87,3 +89,46 @@ class SegmentationLoss:
loss = tf.reduce_sum(top_k_losses) / normalizer loss = tf.reduce_sum(top_k_losses) / normalizer
return loss return loss
def get_actual_mask_scores(logits, labels, ignore_label):
"""Gets actual mask scores."""
_, height, width, num_classes = logits.get_shape().as_list()
batch_size = tf.shape(logits)[0]
logits = tf.stop_gradient(logits)
labels = tf.image.resize(
labels, (height, width),
method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
predicted_labels = tf.argmax(logits, -1, output_type=tf.int32)
flat_predictions = tf.reshape(predicted_labels, [batch_size, -1])
flat_labels = tf.cast(tf.reshape(labels, [batch_size, -1]), tf.int32)
one_hot_predictions = tf.one_hot(
flat_predictions, num_classes, on_value=True, off_value=False)
one_hot_labels = tf.one_hot(
flat_labels, num_classes, on_value=True, off_value=False)
keep_mask = tf.not_equal(flat_labels, ignore_label)
keep_mask = tf.expand_dims(keep_mask, 2)
overlap = tf.logical_and(one_hot_predictions, one_hot_labels)
overlap = tf.logical_and(overlap, keep_mask)
overlap = tf.reduce_sum(tf.cast(overlap, tf.float32), axis=1)
union = tf.logical_or(one_hot_predictions, one_hot_labels)
union = tf.logical_and(union, keep_mask)
union = tf.reduce_sum(tf.cast(union, tf.float32), axis=1)
actual_scores = tf.divide(overlap, tf.maximum(union, EPSILON))
return actual_scores
class MaskScoringLoss:
"""Mask Scoring loss."""
def __init__(self, ignore_label):
self._ignore_label = ignore_label
self._mse_loss = tf.keras.losses.MeanSquaredError(
reduction=tf.keras.losses.Reduction.NONE)
def __call__(self, predicted_scores, logits, labels):
actual_scores = get_actual_mask_scores(logits, labels, self._ignore_label)
loss = tf_utils.safe_mean(self._mse_loss(actual_scores, predicted_scores))
return loss
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
"""Backbones package definition.""" """Backbones package definition."""
from official.vision.beta.modeling.backbones.efficientnet import EfficientNet from official.vision.beta.modeling.backbones.efficientnet import EfficientNet
from official.vision.beta.modeling.backbones.mobiledet import MobileDet
from official.vision.beta.modeling.backbones.mobilenet import MobileNet from official.vision.beta.modeling.backbones.mobilenet import MobileNet
from official.vision.beta.modeling.backbones.resnet import ResNet from official.vision.beta.modeling.backbones.resnet import ResNet
from official.vision.beta.modeling.backbones.resnet_3d import ResNet3D from official.vision.beta.modeling.backbones.resnet_3d import ResNet3D
......
...@@ -189,6 +189,40 @@ class FactoryTest(tf.test.TestCase, parameterized.TestCase): ...@@ -189,6 +189,40 @@ class FactoryTest(tf.test.TestCase, parameterized.TestCase):
norm_momentum=0.99, norm_momentum=0.99,
norm_epsilon=1e-5) norm_epsilon=1e-5)
@combinations.generate(
combinations.combine(
model_id=[
'MobileDetCPU',
'MobileDetDSP',
'MobileDetEdgeTPU',
'MobileDetGPU'],
filter_size_scale=[1.0, 0.75],
))
def test_mobiledet_creation(self, model_id, filter_size_scale):
"""Test creation of Mobiledet models."""
network = backbones.MobileDet(
model_id=model_id,
filter_size_scale=filter_size_scale,
norm_momentum=0.99,
norm_epsilon=1e-5)
backbone_config = backbones_cfg.Backbone(
type='mobiledet',
mobiledet=backbones_cfg.MobileDet(
model_id=model_id, filter_size_scale=filter_size_scale))
norm_activation_config = common_cfg.NormActivation(
norm_momentum=0.99, norm_epsilon=1e-5, use_sync_bn=False)
factory_network = factory.build_backbone(
input_specs=tf.keras.layers.InputSpec(shape=[None, None, None, 3]),
backbone_config=backbone_config,
norm_activation_config=norm_activation_config)
network_config = network.get_config()
factory_network_config = factory_network.get_config()
self.assertEqual(network_config, factory_network_config)
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment