Unverified Commit 09d9656f authored by Srihari Humbarwadi's avatar Srihari Humbarwadi Committed by GitHub
Browse files

Merge branch 'panoptic-segmentation' into panoptic-deeplab-modeling

parents ac671306 49a5706c
......@@ -76,6 +76,16 @@ class SegmentationHead(hyperparams.Config):
decoder_max_level: Optional[Union[int, str]] = None
@dataclasses.dataclass
class MaskScoringHead(hyperparams.Config):
"""Mask Scoring head config."""
num_convs: int = 4
num_filters: int = 128
fc_input_size: List[int] = dataclasses.field(default_factory=list)
num_fcs: int = 2
fc_dims: int = 1024
@dataclasses.dataclass
class SemanticSegmentationModel(hyperparams.Config):
"""Semantic segmentation model config."""
......@@ -87,6 +97,7 @@ class SemanticSegmentationModel(hyperparams.Config):
backbone: backbones.Backbone = backbones.Backbone(
type='resnet', resnet=backbones.ResNet())
decoder: decoders.Decoder = decoders.Decoder(type='identity')
mask_scoring_head: Optional[MaskScoringHead] = None
norm_activation: common.NormActivation = common.NormActivation()
......
#!/bin/bash
sudo apt update
sudo apt install unzip aria2 -y
DATA_DIR=$1
aria2c -j 8 -Z \
http://images.cocodataset.org/annotations/annotations_trainval2017.zip \
http://images.cocodataset.org/annotations/panoptic_annotations_trainval2017.zip \
http://images.cocodataset.org/zips/train2017.zip \
http://images.cocodataset.org/zips/val2017.zip \
--dir=$DATA_DIR;
unzip $DATA_DIR/"*".zip -d $DATA_DIR;
mkdir $DATA_DIR/zips && mv $DATA_DIR/*.zip $DATA_DIR/zips;
unzip $DATA_DIR/annotations/panoptic_train2017.zip -d $DATA_DIR
unzip $DATA_DIR/annotations/panoptic_val2017.zip -d $DATA_DIR
python3 official/vision/beta/data/create_coco_tf_record.py \
--logtostderr \
--image_dir="$DATA_DIR/val2017" \
--object_annotations_file="$DATA_DIR/annotations/instances_val2017.json" \
--output_file_prefix="$DATA_DIR/tfrecords/val" \
--panoptic_annotations_file="$DATA_DIR/annotations/panoptic_val2017.json" \
--panoptic_masks_dir="$DATA_DIR/panoptic_val2017" \
--num_shards=8 \
--include_masks \
--include_panoptic_masks
python3 official/vision/beta/data/create_coco_tf_record.py \
--logtostderr \
--image_dir="$DATA_DIR/train2017" \
--object_annotations_file="$DATA_DIR/annotations/instances_train2017.json" \
--output_file_prefix="$DATA_DIR/tfrecords/train" \
--panoptic_annotations_file="$DATA_DIR/annotations/panoptic_train2017.json" \
--panoptic_masks_dir="$DATA_DIR/panoptic_train2017" \
--num_shards=32 \
--include_masks \
--include_panoptic_masks
......@@ -55,7 +55,7 @@ class Parser(object):
is_training: a `bool` to indicate whether it is in training mode.
Returns:
parse: a `callable` that takes the serialized examle and generate the
parse: a `callable` that takes the serialized example and generate the
images, labels tuple where labels is a dict of Tensors that contains
labels.
"""
......
......@@ -19,11 +19,13 @@ into (image, labels) tuple for RetinaNet.
"""
# Import libraries
from absl import logging
import tensorflow as tf
from official.vision.beta.dataloaders import parser
from official.vision.beta.dataloaders import utils
from official.vision.beta.ops import anchor
from official.vision.beta.ops import augment
from official.vision.beta.ops import box_ops
from official.vision.beta.ops import preprocess_ops
......@@ -40,6 +42,7 @@ class Parser(parser.Parser):
anchor_size,
match_threshold=0.5,
unmatched_threshold=0.5,
aug_type=None,
aug_rand_hflip=False,
aug_scale_min=1.0,
aug_scale_max=1.0,
......@@ -71,6 +74,8 @@ class Parser(parser.Parser):
unmatched_threshold: `float` number between 0 and 1 representing the
upper-bound threshold to assign negative labels for anchors. An anchor
with a score below the threshold is labeled negative.
aug_type: An optional Augmentation object to choose from AutoAugment and
RandAugment. The latter is not supported, and will raise ValueError.
aug_rand_hflip: `bool`, if True, augment training with random horizontal
flip.
aug_scale_min: `float`, the minimum scale applied to `output_size` for
......@@ -108,7 +113,20 @@ class Parser(parser.Parser):
self._aug_scale_min = aug_scale_min
self._aug_scale_max = aug_scale_max
# Data Augmentation with AutoAugment.
# Data augmentation with AutoAugment or RandAugment.
self._augmenter = None
if aug_type is not None:
if aug_type.type == 'autoaug':
logging.info('Using AutoAugment.')
self._augmenter = augment.AutoAugment(
augmentation_name=aug_type.autoaug.augmentation_name,
cutout_const=aug_type.autoaug.cutout_const,
translate_const=aug_type.autoaug.translate_const)
else:
# TODO(b/205346436) Support RandAugment.
raise ValueError(f'Augmentation policy {aug_type.type} not supported.')
# Deprecated. Data Augmentation with AutoAugment.
self._use_autoaugment = use_autoaugment
self._autoaugment_policy_name = autoaugment_policy_name
......@@ -138,9 +156,13 @@ class Parser(parser.Parser):
for k, v in attributes.items():
attributes[k] = tf.gather(v, indices)
# Gets original image and its size.
# Gets original image.
image = data['image']
# Apply autoaug or randaug.
if self._augmenter is not None:
image, boxes = self._augmenter.distort_with_boxes(image, boxes)
image_shape = tf.shape(input=image)[0:2]
# Normalizes image with mean and std pixel values.
......
......@@ -23,8 +23,9 @@ from official.vision.beta.dataloaders import decoder
def _generate_source_id(image_bytes):
# Hashing using 22 bits since float32 has only 23 mantissa bits.
return tf.strings.as_string(
tf.strings.to_hash_bucket_fast(image_bytes, 2 ** 63 - 1))
tf.strings.to_hash_bucket_fast(image_bytes, 2 ** 22 - 1))
class TfExampleDecoder(decoder.Decoder):
......
......@@ -14,24 +14,13 @@
"""Tests for tf_example_decoder.py."""
import io
# Import libraries
from absl.testing import parameterized
import numpy as np
from PIL import Image
import tensorflow as tf
from official.vision.beta.dataloaders import tf_example_decoder
DUMP_SOURCE_ID = b'123'
def _encode_image(image_array, fmt):
image = Image.fromarray(image_array)
with io.BytesIO() as output:
image.save(output, format=fmt)
return output.getvalue()
from official.vision.beta.dataloaders import tfexample_utils
class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase):
......@@ -52,73 +41,11 @@ class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase):
decoder = tf_example_decoder.TfExampleDecoder(
include_mask=True, regenerate_source_id=regenerate_source_id)
image = _encode_image(
np.uint8(np.random.rand(image_height, image_width, 3) * 255),
fmt='JPEG')
if num_instances == 0:
xmins = []
xmaxs = []
ymins = []
ymaxs = []
labels = []
areas = []
is_crowds = []
masks = []
else:
xmins = list(np.random.rand(num_instances))
xmaxs = list(np.random.rand(num_instances))
ymins = list(np.random.rand(num_instances))
ymaxs = list(np.random.rand(num_instances))
labels = list(np.random.randint(100, size=num_instances))
areas = [(xmax - xmin) * (ymax - ymin) * image_height * image_width
for xmin, xmax, ymin, ymax in zip(xmins, xmaxs, ymins, ymaxs)]
is_crowds = [0] * num_instances
masks = []
for _ in range(num_instances):
mask = _encode_image(
np.uint8(np.random.rand(image_height, image_width) * 255),
fmt='PNG')
masks.append(mask)
serialized_example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded': (
tf.train.Feature(
bytes_list=tf.train.BytesList(value=[image]))),
'image/source_id': (
tf.train.Feature(
bytes_list=tf.train.BytesList(value=[DUMP_SOURCE_ID]))),
'image/height': (
tf.train.Feature(
int64_list=tf.train.Int64List(value=[image_height]))),
'image/width': (
tf.train.Feature(
int64_list=tf.train.Int64List(value=[image_width]))),
'image/object/bbox/xmin': (
tf.train.Feature(
float_list=tf.train.FloatList(value=xmins))),
'image/object/bbox/xmax': (
tf.train.Feature(
float_list=tf.train.FloatList(value=xmaxs))),
'image/object/bbox/ymin': (
tf.train.Feature(
float_list=tf.train.FloatList(value=ymins))),
'image/object/bbox/ymax': (
tf.train.Feature(
float_list=tf.train.FloatList(value=ymaxs))),
'image/object/class/label': (
tf.train.Feature(
int64_list=tf.train.Int64List(value=labels))),
'image/object/is_crowd': (
tf.train.Feature(
int64_list=tf.train.Int64List(value=is_crowds))),
'image/object/area': (
tf.train.Feature(
float_list=tf.train.FloatList(value=areas))),
'image/object/mask': (
tf.train.Feature(
bytes_list=tf.train.BytesList(value=masks))),
})).SerializeToString()
serialized_example = tfexample_utils.create_detection_test_example(
image_height=image_height,
image_width=image_width,
image_channel=3,
num_instances=num_instances).SerializeToString()
decoded_tensors = decoder.decode(
tf.convert_to_tensor(value=serialized_example))
......@@ -127,7 +54,7 @@ class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase):
self.assertAllEqual(
(image_height, image_width, 3), results['image'].shape)
if not regenerate_source_id:
self.assertEqual(DUMP_SOURCE_ID, results['source_id'])
self.assertEqual(tfexample_utils.DUMP_SOURCE_ID, results['source_id'])
self.assertEqual(image_height, results['height'])
self.assertEqual(image_width, results['width'])
self.assertAllEqual(
......@@ -151,7 +78,7 @@ class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase):
[[0, 0, 0], [255, 255, 255], [255, 255, 255], [0, 0, 0]],
[[0, 0, 0], [255, 255, 255], [255, 255, 255], [0, 0, 0]],
[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]]]
image = _encode_image(np.uint8(image_content), fmt='PNG')
image = tfexample_utils.encode_image(np.uint8(image_content), fmt='PNG')
image_height = 4
image_width = 4
num_instances = 2
......@@ -172,45 +99,37 @@ class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase):
[0, 255, 255, 255],
[0, 255, 255, 255],
[0, 255, 255, 255]]]
masks = [_encode_image(np.uint8(m), fmt='PNG') for m in list(mask_content)]
masks = [
tfexample_utils.encode_image(np.uint8(m), fmt='PNG')
for m in list(mask_content)
]
serialized_example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded': (
tf.train.Feature(
'image/encoded': (tf.train.Feature(
bytes_list=tf.train.BytesList(value=[image]))),
'image/source_id': (
tf.train.Feature(
bytes_list=tf.train.BytesList(value=[DUMP_SOURCE_ID]))),
'image/height': (
tf.train.Feature(
'image/source_id': (tf.train.Feature(
bytes_list=tf.train.BytesList(
value=[tfexample_utils.DUMP_SOURCE_ID]))),
'image/height': (tf.train.Feature(
int64_list=tf.train.Int64List(value=[image_height]))),
'image/width': (
tf.train.Feature(
'image/width': (tf.train.Feature(
int64_list=tf.train.Int64List(value=[image_width]))),
'image/object/bbox/xmin': (
tf.train.Feature(
'image/object/bbox/xmin': (tf.train.Feature(
float_list=tf.train.FloatList(value=xmins))),
'image/object/bbox/xmax': (
tf.train.Feature(
'image/object/bbox/xmax': (tf.train.Feature(
float_list=tf.train.FloatList(value=xmaxs))),
'image/object/bbox/ymin': (
tf.train.Feature(
'image/object/bbox/ymin': (tf.train.Feature(
float_list=tf.train.FloatList(value=ymins))),
'image/object/bbox/ymax': (
tf.train.Feature(
'image/object/bbox/ymax': (tf.train.Feature(
float_list=tf.train.FloatList(value=ymaxs))),
'image/object/class/label': (
tf.train.Feature(
'image/object/class/label': (tf.train.Feature(
int64_list=tf.train.Int64List(value=labels))),
'image/object/is_crowd': (
tf.train.Feature(
'image/object/is_crowd': (tf.train.Feature(
int64_list=tf.train.Int64List(value=is_crowds))),
'image/object/area': (
tf.train.Feature(
'image/object/area': (tf.train.Feature(
float_list=tf.train.FloatList(value=areas))),
'image/object/mask': (
tf.train.Feature(
'image/object/mask': (tf.train.Feature(
bytes_list=tf.train.BytesList(value=masks))),
})).SerializeToString()
decoded_tensors = decoder.decode(
......@@ -221,7 +140,7 @@ class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase):
self.assertAllEqual(
(image_height, image_width, 3), results['image'].shape)
self.assertAllEqual(image_content, results['image'])
self.assertEqual(DUMP_SOURCE_ID, results['source_id'])
self.assertEqual(tfexample_utils.DUMP_SOURCE_ID, results['source_id'])
self.assertEqual(image_height, results['height'])
self.assertEqual(image_width, results['width'])
self.assertAllEqual(
......@@ -259,7 +178,7 @@ class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase):
[[0, 0, 0], [255, 255, 255], [255, 255, 255], [0, 0, 0]],
[[0, 0, 0], [255, 255, 255], [255, 255, 255], [0, 0, 0]],
[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]]]
image = _encode_image(np.uint8(image_content), fmt='PNG')
image = tfexample_utils.encode_image(np.uint8(image_content), fmt='PNG')
image_height = 4
image_width = 4
num_instances = 2
......@@ -276,39 +195,33 @@ class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase):
[0, 255, 255, 255],
[0, 255, 255, 255],
[0, 255, 255, 255]]]
masks = [_encode_image(np.uint8(m), fmt='PNG') for m in list(mask_content)]
masks = [
tfexample_utils.encode_image(np.uint8(m), fmt='PNG')
for m in list(mask_content)
]
serialized_example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded': (
tf.train.Feature(
'image/encoded': (tf.train.Feature(
bytes_list=tf.train.BytesList(value=[image]))),
'image/source_id': (
tf.train.Feature(
bytes_list=tf.train.BytesList(value=[DUMP_SOURCE_ID]))),
'image/height': (
tf.train.Feature(
'image/source_id': (tf.train.Feature(
bytes_list=tf.train.BytesList(
value=[tfexample_utils.DUMP_SOURCE_ID]))),
'image/height': (tf.train.Feature(
int64_list=tf.train.Int64List(value=[image_height]))),
'image/width': (
tf.train.Feature(
'image/width': (tf.train.Feature(
int64_list=tf.train.Int64List(value=[image_width]))),
'image/object/bbox/xmin': (
tf.train.Feature(
'image/object/bbox/xmin': (tf.train.Feature(
float_list=tf.train.FloatList(value=xmins))),
'image/object/bbox/xmax': (
tf.train.Feature(
'image/object/bbox/xmax': (tf.train.Feature(
float_list=tf.train.FloatList(value=xmaxs))),
'image/object/bbox/ymin': (
tf.train.Feature(
'image/object/bbox/ymin': (tf.train.Feature(
float_list=tf.train.FloatList(value=ymins))),
'image/object/bbox/ymax': (
tf.train.Feature(
'image/object/bbox/ymax': (tf.train.Feature(
float_list=tf.train.FloatList(value=ymaxs))),
'image/object/class/label': (
tf.train.Feature(
'image/object/class/label': (tf.train.Feature(
int64_list=tf.train.Int64List(value=labels))),
'image/object/mask': (
tf.train.Feature(
'image/object/mask': (tf.train.Feature(
bytes_list=tf.train.BytesList(value=masks))),
})).SerializeToString()
decoded_tensors = decoder.decode(
......@@ -318,7 +231,7 @@ class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase):
self.assertAllEqual(
(image_height, image_width, 3), results['image'].shape)
self.assertAllEqual(image_content, results['image'])
self.assertEqual(DUMP_SOURCE_ID, results['source_id'])
self.assertEqual(tfexample_utils.DUMP_SOURCE_ID, results['source_id'])
self.assertEqual(image_height, results['height'])
self.assertEqual(image_width, results['width'])
self.assertAllEqual(
......
......@@ -14,28 +14,19 @@
"""Tests for tf_example_label_map_decoder.py."""
import io
import os
# Import libraries
from absl.testing import parameterized
import numpy as np
from PIL import Image
import tensorflow as tf
from official.vision.beta.dataloaders import tf_example_label_map_decoder
from official.vision.beta.dataloaders import tfexample_utils
DUMP_SOURCE_ID = b'123'
LABEL_MAP_CSV_CONTENT = '0,class_0\n1,class_1\n2,class_2'
def _encode_image(image_array, fmt):
image = Image.fromarray(image_array)
with io.BytesIO() as output:
image.save(output, format=fmt)
return output.getvalue()
class TfExampleDecoderLabelMapTest(tf.test.TestCase, parameterized.TestCase):
@parameterized.parameters(
......@@ -56,74 +47,11 @@ class TfExampleDecoderLabelMapTest(tf.test.TestCase, parameterized.TestCase):
decoder = tf_example_label_map_decoder.TfExampleDecoderLabelMap(
label_map_path, include_mask=True)
image = _encode_image(
np.uint8(np.random.rand(image_height, image_width, 3) * 255),
fmt='JPEG')
if num_instances == 0:
xmins = []
xmaxs = []
ymins = []
ymaxs = []
labels = []
areas = []
is_crowds = []
masks = []
else:
xmins = list(np.random.rand(num_instances))
xmaxs = list(np.random.rand(num_instances))
ymins = list(np.random.rand(num_instances))
ymaxs = list(np.random.rand(num_instances))
labels = list(np.random.randint(100, size=num_instances))
areas = [(xmax - xmin) * (ymax - ymin) * image_height * image_width
for xmin, xmax, ymin, ymax in zip(xmins, xmaxs, ymins, ymaxs)]
is_crowds = [0] * num_instances
masks = []
labels = [b'class_1'] * num_instances
for _ in range(num_instances):
mask = _encode_image(
np.uint8(np.random.rand(image_height, image_width) * 255),
fmt='PNG')
masks.append(mask)
serialized_example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded': (
tf.train.Feature(
bytes_list=tf.train.BytesList(value=[image]))),
'image/source_id': (
tf.train.Feature(
bytes_list=tf.train.BytesList(value=[DUMP_SOURCE_ID]))),
'image/height': (
tf.train.Feature(
int64_list=tf.train.Int64List(value=[image_height]))),
'image/width': (
tf.train.Feature(
int64_list=tf.train.Int64List(value=[image_width]))),
'image/object/bbox/xmin': (
tf.train.Feature(
float_list=tf.train.FloatList(value=xmins))),
'image/object/bbox/xmax': (
tf.train.Feature(
float_list=tf.train.FloatList(value=xmaxs))),
'image/object/bbox/ymin': (
tf.train.Feature(
float_list=tf.train.FloatList(value=ymins))),
'image/object/bbox/ymax': (
tf.train.Feature(
float_list=tf.train.FloatList(value=ymaxs))),
'image/object/class/text': (
tf.train.Feature(
bytes_list=tf.train.BytesList(value=labels))),
'image/object/is_crowd': (
tf.train.Feature(
int64_list=tf.train.Int64List(value=is_crowds))),
'image/object/area': (
tf.train.Feature(
float_list=tf.train.FloatList(value=areas))),
'image/object/mask': (
tf.train.Feature(
bytes_list=tf.train.BytesList(value=masks))),
})).SerializeToString()
serialized_example = tfexample_utils.create_detection_test_example(
image_height=image_height,
image_width=image_width,
image_channel=3,
num_instances=num_instances).SerializeToString()
decoded_tensors = decoder.decode(
tf.convert_to_tensor(value=serialized_example))
......@@ -131,7 +59,7 @@ class TfExampleDecoderLabelMapTest(tf.test.TestCase, parameterized.TestCase):
self.assertAllEqual(
(image_height, image_width, 3), results['image'].shape)
self.assertEqual(DUMP_SOURCE_ID, results['source_id'])
self.assertEqual(tfexample_utils.DUMP_SOURCE_ID, results['source_id'])
self.assertEqual(image_height, results['height'])
self.assertEqual(image_width, results['width'])
self.assertAllEqual(
......@@ -162,7 +90,7 @@ class TfExampleDecoderLabelMapTest(tf.test.TestCase, parameterized.TestCase):
[[0, 0, 0], [255, 255, 255], [255, 255, 255], [0, 0, 0]],
[[0, 0, 0], [255, 255, 255], [255, 255, 255], [0, 0, 0]],
[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]]]
image = _encode_image(np.uint8(image_content), fmt='PNG')
image = tfexample_utils.encode_image(np.uint8(image_content), fmt='PNG')
image_height = 4
image_width = 4
num_instances = 2
......@@ -183,45 +111,37 @@ class TfExampleDecoderLabelMapTest(tf.test.TestCase, parameterized.TestCase):
[0, 255, 255, 255],
[0, 255, 255, 255],
[0, 255, 255, 255]]]
masks = [_encode_image(np.uint8(m), fmt='PNG') for m in list(mask_content)]
masks = [
tfexample_utils.encode_image(np.uint8(m), fmt='PNG')
for m in list(mask_content)
]
serialized_example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded': (
tf.train.Feature(
'image/encoded': (tf.train.Feature(
bytes_list=tf.train.BytesList(value=[image]))),
'image/source_id': (
tf.train.Feature(
bytes_list=tf.train.BytesList(value=[DUMP_SOURCE_ID]))),
'image/height': (
tf.train.Feature(
'image/source_id': (tf.train.Feature(
bytes_list=tf.train.BytesList(
value=[tfexample_utils.DUMP_SOURCE_ID]))),
'image/height': (tf.train.Feature(
int64_list=tf.train.Int64List(value=[image_height]))),
'image/width': (
tf.train.Feature(
'image/width': (tf.train.Feature(
int64_list=tf.train.Int64List(value=[image_width]))),
'image/object/bbox/xmin': (
tf.train.Feature(
'image/object/bbox/xmin': (tf.train.Feature(
float_list=tf.train.FloatList(value=xmins))),
'image/object/bbox/xmax': (
tf.train.Feature(
'image/object/bbox/xmax': (tf.train.Feature(
float_list=tf.train.FloatList(value=xmaxs))),
'image/object/bbox/ymin': (
tf.train.Feature(
'image/object/bbox/ymin': (tf.train.Feature(
float_list=tf.train.FloatList(value=ymins))),
'image/object/bbox/ymax': (
tf.train.Feature(
'image/object/bbox/ymax': (tf.train.Feature(
float_list=tf.train.FloatList(value=ymaxs))),
'image/object/class/text': (
tf.train.Feature(
'image/object/class/text': (tf.train.Feature(
bytes_list=tf.train.BytesList(value=labels))),
'image/object/is_crowd': (
tf.train.Feature(
'image/object/is_crowd': (tf.train.Feature(
int64_list=tf.train.Int64List(value=is_crowds))),
'image/object/area': (
tf.train.Feature(
'image/object/area': (tf.train.Feature(
float_list=tf.train.FloatList(value=areas))),
'image/object/mask': (
tf.train.Feature(
'image/object/mask': (tf.train.Feature(
bytes_list=tf.train.BytesList(value=masks))),
})).SerializeToString()
decoded_tensors = decoder.decode(
......@@ -232,7 +152,7 @@ class TfExampleDecoderLabelMapTest(tf.test.TestCase, parameterized.TestCase):
self.assertAllEqual(
(image_height, image_width, 3), results['image'].shape)
self.assertAllEqual(image_content, results['image'])
self.assertEqual(DUMP_SOURCE_ID, results['source_id'])
self.assertEqual(tfexample_utils.DUMP_SOURCE_ID, results['source_id'])
self.assertEqual(image_height, results['height'])
self.assertEqual(image_width, results['width'])
self.assertAllEqual(
......
......@@ -23,6 +23,22 @@ from official.vision.beta.dataloaders import tfds_factory
class TFDSFactoryTest(tf.test.TestCase, parameterized.TestCase):
def _create_test_example(self):
serialized_example = {
'image': tf.ones(shape=(100, 100, 3), dtype=tf.uint8),
'label': 1,
'image/id': 0,
'objects': {
'label': 1,
'is_crowd': 0,
'area': 0.5,
'bbox': [0.1, 0.2, 0.3, 0.4]
},
'segmentation_label': tf.ones((100, 100, 1), dtype=tf.uint8),
'image_left': tf.ones(shape=(100, 100, 3), dtype=tf.uint8)
}
return serialized_example
@parameterized.parameters(
('imagenet2012'),
('cifar10'),
......@@ -31,6 +47,10 @@ class TFDSFactoryTest(tf.test.TestCase, parameterized.TestCase):
def test_classification_decoder(self, tfds_name):
decoder = tfds_factory.get_classification_decoder(tfds_name)
self.assertIsInstance(decoder, base_decoder.Decoder)
decoded_tensor = decoder.decode(self._create_test_example())
self.assertLen(decoded_tensor, 2)
self.assertIn('image/encoded', decoded_tensor)
self.assertIn('image/class/label', decoded_tensor)
@parameterized.parameters(
('flowers'),
......@@ -48,6 +68,16 @@ class TFDSFactoryTest(tf.test.TestCase, parameterized.TestCase):
def test_detection_decoder(self, tfds_name):
decoder = tfds_factory.get_detection_decoder(tfds_name)
self.assertIsInstance(decoder, base_decoder.Decoder)
decoded_tensor = decoder.decode(self._create_test_example())
self.assertLen(decoded_tensor, 8)
self.assertIn('image', decoded_tensor)
self.assertIn('source_id', decoded_tensor)
self.assertIn('height', decoded_tensor)
self.assertIn('width', decoded_tensor)
self.assertIn('groundtruth_classes', decoded_tensor)
self.assertIn('groundtruth_is_crowd', decoded_tensor)
self.assertIn('groundtruth_area', decoded_tensor)
self.assertIn('groundtruth_boxes', decoded_tensor)
@parameterized.parameters(
('pascal'),
......@@ -65,6 +95,12 @@ class TFDSFactoryTest(tf.test.TestCase, parameterized.TestCase):
def test_segmentation_decoder(self, tfds_name):
decoder = tfds_factory.get_segmentation_decoder(tfds_name)
self.assertIsInstance(decoder, base_decoder.Decoder)
decoded_tensor = decoder.decode(self._create_test_example())
self.assertLen(decoded_tensor, 4)
self.assertIn('image/encoded', decoded_tensor)
self.assertIn('image/segmentation/class/encoded', decoded_tensor)
self.assertIn('image/height', decoded_tensor)
self.assertIn('image/width', decoded_tensor)
@parameterized.parameters(
('coco'),
......
......@@ -54,16 +54,20 @@ IMAGE_KEY = 'image/encoded'
CLASSIFICATION_LABEL_KEY = 'image/class/label'
LABEL_KEY = 'clip/label/index'
AUDIO_KEY = 'features/audio'
DUMP_SOURCE_ID = b'123'
def make_image_bytes(shape: Sequence[int]):
"""Generates image and return bytes in JPEG format."""
def encode_image(image_array: np.array, fmt: str) -> bytes:
image = Image.fromarray(image_array)
with io.BytesIO() as output:
image.save(output, format=fmt)
return output.getvalue()
def make_image_bytes(shape: Sequence[int], fmt: str = 'JPEG') -> bytes:
"""Generates image and return bytes in specified format."""
random_image = np.random.randint(0, 256, size=shape, dtype=np.uint8)
random_image = Image.fromarray(random_image)
with io.BytesIO() as buffer:
random_image.save(buffer, format='JPEG')
raw_image_bytes = buffer.getvalue()
return raw_image_bytes
return encode_image(random_image, fmt=fmt)
def put_int64_to_context(seq_example: tf.train.SequenceExample,
......@@ -164,3 +168,102 @@ def create_3d_image_test_example(image_height: int, image_width: int,
bytes_list=tf.train.BytesList(value=[labels.tobytes()])))
}
return tf.train.Example(features=tf.train.Features(feature=feature))
def create_detection_test_example(image_height: int, image_width: int,
image_channel: int,
num_instances: int) -> tf.train.Example:
"""Creates and returns a test example containing box and mask annotations.
Args:
image_height: The height of test image.
image_width: The width of test image.
image_channel: The channel of test image.
num_instances: The number of object instances per image.
Returns:
A tf.train.Example for testing.
"""
image = make_image_bytes([image_height, image_width, image_channel])
if num_instances == 0:
xmins = []
xmaxs = []
ymins = []
ymaxs = []
labels = []
areas = []
is_crowds = []
masks = []
labels_text = []
else:
xmins = list(np.random.rand(num_instances))
xmaxs = list(np.random.rand(num_instances))
ymins = list(np.random.rand(num_instances))
ymaxs = list(np.random.rand(num_instances))
labels_text = [b'class_1'] * num_instances
labels = list(np.random.randint(100, size=num_instances))
areas = [(xmax - xmin) * (ymax - ymin) * image_height * image_width
for xmin, xmax, ymin, ymax in zip(xmins, xmaxs, ymins, ymaxs)]
is_crowds = [0] * num_instances
masks = []
for _ in range(num_instances):
mask = make_image_bytes([image_height, image_width], fmt='PNG')
masks.append(mask)
return tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded': (tf.train.Feature(
bytes_list=tf.train.BytesList(value=[image]))),
'image/source_id': (tf.train.Feature(
bytes_list=tf.train.BytesList(value=[DUMP_SOURCE_ID]))),
'image/height': (tf.train.Feature(
int64_list=tf.train.Int64List(value=[image_height]))),
'image/width': (tf.train.Feature(
int64_list=tf.train.Int64List(value=[image_width]))),
'image/object/bbox/xmin': (tf.train.Feature(
float_list=tf.train.FloatList(value=xmins))),
'image/object/bbox/xmax': (tf.train.Feature(
float_list=tf.train.FloatList(value=xmaxs))),
'image/object/bbox/ymin': (tf.train.Feature(
float_list=tf.train.FloatList(value=ymins))),
'image/object/bbox/ymax': (tf.train.Feature(
float_list=tf.train.FloatList(value=ymaxs))),
'image/object/class/label': (tf.train.Feature(
int64_list=tf.train.Int64List(value=labels))),
'image/object/class/text': (tf.train.Feature(
bytes_list=tf.train.BytesList(value=labels_text))),
'image/object/is_crowd': (tf.train.Feature(
int64_list=tf.train.Int64List(value=is_crowds))),
'image/object/area': (tf.train.Feature(
float_list=tf.train.FloatList(value=areas))),
'image/object/mask': (tf.train.Feature(
bytes_list=tf.train.BytesList(value=masks))),
}))
def create_segmentation_test_example(image_height: int, image_width: int,
image_channel: int) -> tf.train.Example:
"""Creates and returns a test example containing mask annotations.
Args:
image_height: The height of test image.
image_width: The width of test image.
image_channel: The channel of test image.
Returns:
A tf.train.Example for testing.
"""
image = make_image_bytes([image_height, image_width, image_channel])
mask = make_image_bytes([image_height, image_width], fmt='PNG')
return tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded': (tf.train.Feature(
bytes_list=tf.train.BytesList(value=[image]))),
'image/segmentation/class/encoded': (tf.train.Feature(
bytes_list=tf.train.BytesList(value=[mask]))),
'image/height': (tf.train.Feature(
int64_list=tf.train.Int64List(value=[image_height]))),
'image/width': (tf.train.Feature(
int64_list=tf.train.Int64List(value=[image_width])))
}))
......@@ -31,7 +31,7 @@ def process_source_id(source_id: tf.Tensor) -> tf.Tensor:
A formatted source ID.
"""
if source_id.dtype == tf.string:
source_id = tf.cast(tf.strings.to_number(source_id), tf.int64)
source_id = tf.strings.to_number(source_id, tf.int64)
with tf.control_dependencies([source_id]):
source_id = tf.cond(
pred=tf.equal(tf.size(input=source_id), 0),
......
......@@ -361,7 +361,7 @@ class Parser(parser.Parser):
audio = decoded_tensors[self._audio_feature]
audio = tf.cast(audio, dtype=self._dtype)
audio = preprocess_ops_3d.sample_sequence(
audio, 20, random=False, stride=1)
audio, self._audio_shape[0], random=False, stride=1)
audio = tf.ensure_shape(audio, self._audio_shape)
features['audio'] = audio
......
......@@ -212,6 +212,8 @@ def convert_groundtruths_to_coco_dataset(groundtruths, label_map=None):
gt_annotations = []
num_batches = len(groundtruths['source_id'])
for i in range(num_batches):
logging.info(
'convert_groundtruths_to_coco_dataset: Processing annotation %d', i)
max_num_instances = groundtruths['classes'][i].shape[1]
batch_size = groundtruths['source_id'][i].shape[0]
for j in range(batch_size):
......@@ -259,6 +261,10 @@ def convert_groundtruths_to_coco_dataset(groundtruths, label_map=None):
np_mask[np_mask > 0] = 255
encoded_mask = mask_api.encode(np.asfortranarray(np_mask))
ann['segmentation'] = encoded_mask
# Ensure the content of `counts` is JSON serializable string.
if 'counts' in ann['segmentation']:
ann['segmentation']['counts'] = six.ensure_str(
ann['segmentation']['counts'])
if 'areas' not in groundtruths:
ann['area'] = mask_api.area(encoded_mask)
gt_annotations.append(ann)
......@@ -283,11 +289,13 @@ def convert_groundtruths_to_coco_dataset(groundtruths, label_map=None):
class COCOGroundtruthGenerator:
"""Generates the groundtruth annotations from a single example."""
def __init__(self, file_pattern, file_type, num_examples, include_mask):
def __init__(self, file_pattern, file_type, num_examples, include_mask,
regenerate_source_id=False):
self._file_pattern = file_pattern
self._num_examples = num_examples
self._include_mask = include_mask
self._dataset_fn = dataset_fn.pick_dataset_fn(file_type)
self._regenerate_source_id = regenerate_source_id
def _parse_single_example(self, example):
"""Parses a single serialized tf.Example proto.
......@@ -312,16 +320,21 @@ class COCOGroundtruthGenerator:
mask of each instance.
"""
decoder = tf_example_decoder.TfExampleDecoder(
include_mask=self._include_mask)
include_mask=self._include_mask,
regenerate_source_id=self._regenerate_source_id)
decoded_tensors = decoder.decode(example)
image = decoded_tensors['image']
image_size = tf.shape(image)[0:2]
boxes = box_ops.denormalize_boxes(
decoded_tensors['groundtruth_boxes'], image_size)
source_id = decoded_tensors['source_id']
if source_id.dtype is tf.string:
source_id = tf.strings.to_number(source_id, out_type=tf.int64)
groundtruths = {
'source_id': tf.strings.to_number(
decoded_tensors['source_id'], out_type=tf.int64),
'source_id': source_id,
'height': decoded_tensors['height'],
'width': decoded_tensors['width'],
'num_detections': tf.shape(decoded_tensors['groundtruth_classes'])[0],
......@@ -341,9 +354,10 @@ class COCOGroundtruthGenerator:
dataset = tf.data.Dataset.list_files(self._file_pattern, shuffle=False)
dataset = dataset.interleave(
map_func=lambda filename: self._dataset_fn(filename).prefetch(1),
cycle_length=12,
cycle_length=None,
num_parallel_calls=tf.data.experimental.AUTOTUNE)
dataset = dataset.take(self._num_examples)
dataset = dataset.map(self._parse_single_example,
num_parallel_calls=tf.data.experimental.AUTOTUNE)
dataset = dataset.batch(1, drop_remainder=False)
......@@ -351,18 +365,18 @@ class COCOGroundtruthGenerator:
return dataset
def __call__(self):
for groundtruth_result in self._build_pipeline():
yield groundtruth_result
return self._build_pipeline()
def scan_and_generator_annotation_file(file_pattern: str,
file_type: str,
num_samples: int,
include_mask: bool,
annotation_file: str):
annotation_file: str,
regenerate_source_id: bool = False):
"""Scans and generate the COCO-style annotation JSON file given a dataset."""
groundtruth_generator = COCOGroundtruthGenerator(
file_pattern, file_type, num_samples, include_mask)
file_pattern, file_type, num_samples, include_mask, regenerate_source_id)
generate_annotation_file(groundtruth_generator, annotation_file)
......@@ -371,7 +385,8 @@ def generate_annotation_file(groundtruth_generator,
"""Generates COCO-style annotation JSON file given a groundtruth generator."""
groundtruths = {}
logging.info('Loading groundtruth annotations from dataset to memory...')
for groundtruth in groundtruth_generator():
for i, groundtruth in enumerate(groundtruth_generator()):
logging.info('generate_annotation_file: Processing annotation %d', i)
for k, v in six.iteritems(groundtruth):
if k not in groundtruths:
groundtruths[k] = [v]
......
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for coco_utils."""
import os
import tensorflow as tf
from official.vision.beta.dataloaders import tfexample_utils
from official.vision.beta.evaluation import coco_utils
class CocoUtilsTest(tf.test.TestCase):
def test_scan_and_generator_annotation_file(self):
num_samples = 10
example = tfexample_utils.create_detection_test_example(
image_height=512, image_width=512, image_channel=3, num_instances=10)
tf_examples = [example] * num_samples
data_file = os.path.join(self.create_tempdir(), 'test.tfrecord')
tfexample_utils.dump_to_tfrecord(
record_file=data_file, tf_examples=tf_examples)
annotation_file = os.path.join(self.create_tempdir(), 'annotation.json')
coco_utils.scan_and_generator_annotation_file(
file_pattern=data_file,
file_type='tfrecord',
num_samples=num_samples,
include_mask=True,
annotation_file=annotation_file)
self.assertTrue(
tf.io.gfile.exists(annotation_file),
msg='Annotation file {annotation_file} does not exists.')
if __name__ == '__main__':
tf.test.main()
......@@ -95,5 +95,21 @@ class MeanIoUTest(tf.test.TestCase):
expected_result = [0, 1 / (1 + 1 - 1)]
self.assertAllClose(expected_result, result, atol=1e-3)
def test_update_state_annd_result(self):
y_pred = [0, 1, 0, 1]
y_true = [0, 0, 1, 1]
m_obj = iou.PerClassIoU(num_classes=2)
m_obj.update_state(y_true, y_pred)
result = m_obj.result()
# cm = [[1, 1],
# [1, 1]]
# sum_row = [2, 2], sum_col = [2, 2], true_positives = [1, 1]
# iou = true_positives / (sum_row + sum_col - true_positives))
expected_result = [1 / (2 + 2 - 1), 1 / (2 + 2 - 1)]
self.assertAllClose(expected_result, result, atol=1e-3)
if __name__ == '__main__':
tf.test.main()
......@@ -45,19 +45,25 @@ class PanopticQualityEvaluatorTest(tf.test.TestCase):
dtype=np.uint16)
groundtruths = {
'category_mask': tf.convert_to_tensor(category_mask),
'instance_mask': tf.convert_to_tensor(groundtruth_instance_mask)
'category_mask':
tf.convert_to_tensor([category_mask]),
'instance_mask':
tf.convert_to_tensor([groundtruth_instance_mask]),
'image_info':
tf.convert_to_tensor([[[6, 6], [6, 6], [1.0, 1.0], [0, 0]]],
dtype=tf.float32)
}
predictions = {
'category_mask': tf.convert_to_tensor(category_mask),
'instance_mask': tf.convert_to_tensor(good_det_instance_mask)
'category_mask': tf.convert_to_tensor([category_mask]),
'instance_mask': tf.convert_to_tensor([good_det_instance_mask])
}
pq_evaluator = panoptic_quality_evaluator.PanopticQualityEvaluator(
num_categories=1,
ignored_label=2,
max_instances_per_category=16,
offset=16)
offset=16,
rescale_predictions=True)
for _ in range(2):
pq_evaluator.update_state(groundtruths, predictions)
......@@ -70,7 +76,7 @@ class PanopticQualityEvaluatorTest(tf.test.TestCase):
[1, 1, 1, 1, 1, 1],
],
dtype=np.uint16)
predictions['instance_mask'] = tf.convert_to_tensor(bad_det_instance_mask)
predictions['instance_mask'] = tf.convert_to_tensor([bad_det_instance_mask])
for _ in range(2):
pq_evaluator.update_state(groundtruths, predictions)
......
......@@ -41,8 +41,7 @@ class MeanIoU(tf.keras.metrics.MeanIoU):
dtype: data type of the metric result.
"""
self._rescale_predictions = rescale_predictions
super(MeanIoU, self).__init__(
num_classes=num_classes, name=name, dtype=dtype)
super().__init__(num_classes=num_classes, name=name, dtype=dtype)
def update_state(self, y_true, y_pred):
"""Updates metric state.
......@@ -120,8 +119,7 @@ class MeanIoU(tf.keras.metrics.MeanIoU):
flatten_masks = tf.reshape(masks, shape=[-1])
flatten_valid_masks = tf.reshape(valid_masks, shape=[-1])
super(MeanIoU, self).update_state(
flatten_masks, flatten_predictions,
super().update_state(flatten_masks, flatten_predictions,
tf.cast(flatten_valid_masks, tf.float32))
......@@ -148,8 +146,7 @@ class PerClassIoU(iou.PerClassIoU):
dtype: data type of the metric result.
"""
self._rescale_predictions = rescale_predictions
super(PerClassIoU, self).__init__(
num_classes=num_classes, name=name, dtype=dtype)
super().__init__(num_classes=num_classes, name=name, dtype=dtype)
def update_state(self, y_true, y_pred):
"""Updates metric state.
......@@ -213,8 +210,7 @@ class PerClassIoU(iou.PerClassIoU):
flatten_predictions = tf.reshape(predicted_mask, shape=[1, -1])
flatten_masks = tf.reshape(mask, shape=[1, -1])
flatten_valid_masks = tf.reshape(valid_mask, shape=[1, -1])
super(PerClassIoU, self).update_state(
flatten_masks, flatten_predictions,
super().update_state(flatten_masks, flatten_predictions,
tf.cast(flatten_valid_masks, tf.float32))
else:
......@@ -227,6 +223,5 @@ class PerClassIoU(iou.PerClassIoU):
flatten_masks = tf.reshape(masks, shape=[-1])
flatten_valid_masks = tf.reshape(valid_masks, shape=[-1])
super(PerClassIoU, self).update_state(
flatten_masks, flatten_predictions,
super().update_state(flatten_masks, flatten_predictions,
tf.cast(flatten_valid_masks, tf.float32))
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for segmentation_metrics."""
from absl.testing import parameterized
import numpy as np
import tensorflow as tf
from official.vision.beta.evaluation import segmentation_metrics
class SegmentationMetricsTest(parameterized.TestCase, tf.test.TestCase):
def _create_test_data(self):
y_pred_cls0 = np.expand_dims(
np.array([[1, 1, 0], [1, 1, 0], [0, 0, 0]], dtype=np.uint16),
axis=(0, -1))
y_pred_cls1 = np.expand_dims(
np.array([[0, 0, 0], [0, 0, 1], [0, 0, 1]], dtype=np.uint16),
axis=(0, -1))
y_pred = np.concatenate((y_pred_cls0, y_pred_cls1), axis=-1)
y_true = {
'masks':
np.expand_dims(
np.array([[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0], [0, 0, 0, 1, 1, 1],
[0, 0, 0, 1, 1, 1], [0, 0, 0, 1, 1, 1]],
dtype=np.uint16),
axis=(0, -1)),
'valid_masks':
np.ones([1, 6, 6, 1], dtype=np.uint16),
'image_info':
np.array([[[6, 6], [3, 3], [0.5, 0.5], [0, 0]]], dtype=np.float32)
}
return y_pred, y_true
@parameterized.parameters(True, False)
def test_mean_iou_metric(self, rescale_predictions):
tf.config.experimental_run_functions_eagerly(True)
mean_iou_metric = segmentation_metrics.MeanIoU(
num_classes=2, rescale_predictions=rescale_predictions)
y_pred, y_true = self._create_test_data()
# Disable autograph for correct coverage statistics.
update_fn = tf.autograph.experimental.do_not_convert(
mean_iou_metric.update_state)
update_fn(y_true=y_true, y_pred=y_pred)
miou = mean_iou_metric.result()
self.assertAlmostEqual(miou.numpy(), 0.762, places=3)
@parameterized.parameters(True, False)
def test_per_class_mean_iou_metric(self, rescale_predictions):
per_class_iou_metric = segmentation_metrics.PerClassIoU(
num_classes=2, rescale_predictions=rescale_predictions)
y_pred, y_true = self._create_test_data()
# Disable autograph for correct coverage statistics.
update_fn = tf.autograph.experimental.do_not_convert(
per_class_iou_metric.update_state)
update_fn(y_true=y_true, y_pred=y_pred)
per_class_miou = per_class_iou_metric.result()
self.assertAllClose(per_class_miou.numpy(), [0.857, 0.667], atol=1e-3)
if __name__ == '__main__':
tf.test.main()
......@@ -17,6 +17,8 @@
# Import libraries
import tensorflow as tf
from official.modeling import tf_utils
EPSILON = 1e-5
......@@ -87,3 +89,46 @@ class SegmentationLoss:
loss = tf.reduce_sum(top_k_losses) / normalizer
return loss
def get_actual_mask_scores(logits, labels, ignore_label):
"""Gets actual mask scores."""
_, height, width, num_classes = logits.get_shape().as_list()
batch_size = tf.shape(logits)[0]
logits = tf.stop_gradient(logits)
labels = tf.image.resize(
labels, (height, width),
method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
predicted_labels = tf.argmax(logits, -1, output_type=tf.int32)
flat_predictions = tf.reshape(predicted_labels, [batch_size, -1])
flat_labels = tf.cast(tf.reshape(labels, [batch_size, -1]), tf.int32)
one_hot_predictions = tf.one_hot(
flat_predictions, num_classes, on_value=True, off_value=False)
one_hot_labels = tf.one_hot(
flat_labels, num_classes, on_value=True, off_value=False)
keep_mask = tf.not_equal(flat_labels, ignore_label)
keep_mask = tf.expand_dims(keep_mask, 2)
overlap = tf.logical_and(one_hot_predictions, one_hot_labels)
overlap = tf.logical_and(overlap, keep_mask)
overlap = tf.reduce_sum(tf.cast(overlap, tf.float32), axis=1)
union = tf.logical_or(one_hot_predictions, one_hot_labels)
union = tf.logical_and(union, keep_mask)
union = tf.reduce_sum(tf.cast(union, tf.float32), axis=1)
actual_scores = tf.divide(overlap, tf.maximum(union, EPSILON))
return actual_scores
class MaskScoringLoss:
"""Mask Scoring loss."""
def __init__(self, ignore_label):
self._ignore_label = ignore_label
self._mse_loss = tf.keras.losses.MeanSquaredError(
reduction=tf.keras.losses.Reduction.NONE)
def __call__(self, predicted_scores, logits, labels):
actual_scores = get_actual_mask_scores(logits, labels, self._ignore_label)
loss = tf_utils.safe_mean(self._mse_loss(actual_scores, predicted_scores))
return loss
......@@ -16,6 +16,7 @@
"""Backbones package definition."""
from official.vision.beta.modeling.backbones.efficientnet import EfficientNet
from official.vision.beta.modeling.backbones.mobiledet import MobileDet
from official.vision.beta.modeling.backbones.mobilenet import MobileNet
from official.vision.beta.modeling.backbones.resnet import ResNet
from official.vision.beta.modeling.backbones.resnet_3d import ResNet3D
......
......@@ -189,6 +189,40 @@ class FactoryTest(tf.test.TestCase, parameterized.TestCase):
norm_momentum=0.99,
norm_epsilon=1e-5)
@combinations.generate(
combinations.combine(
model_id=[
'MobileDetCPU',
'MobileDetDSP',
'MobileDetEdgeTPU',
'MobileDetGPU'],
filter_size_scale=[1.0, 0.75],
))
def test_mobiledet_creation(self, model_id, filter_size_scale):
"""Test creation of Mobiledet models."""
network = backbones.MobileDet(
model_id=model_id,
filter_size_scale=filter_size_scale,
norm_momentum=0.99,
norm_epsilon=1e-5)
backbone_config = backbones_cfg.Backbone(
type='mobiledet',
mobiledet=backbones_cfg.MobileDet(
model_id=model_id, filter_size_scale=filter_size_scale))
norm_activation_config = common_cfg.NormActivation(
norm_momentum=0.99, norm_epsilon=1e-5, use_sync_bn=False)
factory_network = factory.build_backbone(
input_specs=tf.keras.layers.InputSpec(shape=[None, None, None, 3]),
backbone_config=backbone_config,
norm_activation_config=norm_activation_config)
network_config = network.get_config()
factory_network_config = factory_network.get_config()
self.assertEqual(network_config, factory_network_config)
if __name__ == '__main__':
tf.test.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment