Commit c57e975a authored by saberkun's avatar saberkun
Browse files

Merge pull request #10338 from srihari-humbarwadi:readme

PiperOrigin-RevId: 413033276
parents 7fb4f3cd acf4156e
...@@ -19,9 +19,6 @@ import os ...@@ -19,9 +19,6 @@ import os
from absl.testing import parameterized from absl.testing import parameterized
import tensorflow as tf import tensorflow as tf
from official.common import dataset_fn
from official.core import config_definitions as cfg
from official.core import input_reader
from official.projects.volumetric_models.dataloaders import segmentation_input_3d from official.projects.volumetric_models.dataloaders import segmentation_input_3d
from official.vision.beta.dataloaders import tfexample_utils from official.vision.beta.dataloaders import tfexample_utils
...@@ -33,19 +30,15 @@ class InputReaderTest(parameterized.TestCase, tf.test.TestCase): ...@@ -33,19 +30,15 @@ class InputReaderTest(parameterized.TestCase, tf.test.TestCase):
data_dir = os.path.join(self.get_temp_dir(), 'data') data_dir = os.path.join(self.get_temp_dir(), 'data')
tf.io.gfile.makedirs(data_dir) tf.io.gfile.makedirs(data_dir)
self._data_path = os.path.join(data_dir, 'data.tfrecord') self._data_path = os.path.join(data_dir, 'data.tfrecord')
# pylint: disable=g-complex-comprehension self._example = tfexample_utils.create_3d_image_test_example(
examples = [ image_height=32, image_width=32, image_volume=32, image_channel=2)
tfexample_utils.create_3d_image_test_example(
image_height=32, image_width=32, image_volume=32, image_channel=2)
for _ in range(20)
]
# pylint: enable=g-complex-comprehension
tfexample_utils.dump_to_tfrecord(self._data_path, tf_examples=examples)
@parameterized.parameters(([32, 32, 32], 2, 2)) @parameterized.parameters(
def testSegmentationInputReader(self, input_size, num_classes, num_channels): ([32, 32, 32], 2, 2, False),
params = cfg.DataConfig( ([32, 32, 32], 2, 2, True),
input_path=self._data_path, global_batch_size=2, is_training=False) )
def testSegmentationInputReader(self, input_size, num_classes, num_channels,
is_training):
decoder = segmentation_input_3d.Decoder() decoder = segmentation_input_3d.Decoder()
parser = segmentation_input_3d.Parser( parser = segmentation_input_3d.Parser(
...@@ -53,23 +46,16 @@ class InputReaderTest(parameterized.TestCase, tf.test.TestCase): ...@@ -53,23 +46,16 @@ class InputReaderTest(parameterized.TestCase, tf.test.TestCase):
num_classes=num_classes, num_classes=num_classes,
num_channels=num_channels) num_channels=num_channels)
reader = input_reader.InputReader( decoded_tensor = decoder.decode(self._example.SerializeToString())
params, image, labels = parser.parse_fn(is_training=is_training)(decoded_tensor)
dataset_fn=dataset_fn.pick_dataset_fn('tfrecord'),
decoder_fn=decoder.decode,
parser_fn=parser.parse_fn(params.is_training))
dataset = reader.read()
iterator = iter(dataset)
image, labels = next(iterator)
# Checks image shape. # Checks image shape.
self.assertEqual( self.assertEqual(
list(image.numpy().shape), list(image.numpy().shape),
[2, input_size[0], input_size[1], input_size[2], num_channels]) [input_size[0], input_size[1], input_size[2], num_channels])
self.assertEqual( self.assertEqual(
list(labels.numpy().shape), list(labels.numpy().shape),
[2, input_size[0], input_size[1], input_size[2], num_classes]) [input_size[0], input_size[1], input_size[2], num_classes])
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
"""Segmentation heads.""" """Segmentation heads."""
from typing import Any, Union, Sequence, Mapping from typing import Any, Union, Sequence, Mapping, Tuple
import tensorflow as tf import tensorflow as tf
from official.modeling import tf_utils from official.modeling import tf_utils
...@@ -139,25 +139,29 @@ class SegmentationHead3D(tf.keras.layers.Layer): ...@@ -139,25 +139,29 @@ class SegmentationHead3D(tf.keras.layers.Layer):
super(SegmentationHead3D, self).build(input_shape) super(SegmentationHead3D, self).build(input_shape)
def call(self, backbone_output: Mapping[str, tf.Tensor], def call(self, inputs: Tuple[Union[tf.Tensor, Mapping[str, tf.Tensor]],
decoder_output: Mapping[str, tf.Tensor]) -> tf.Tensor: Union[tf.Tensor, Mapping[str, tf.Tensor]]]):
"""Forward pass of the segmentation head. """Forward pass of the segmentation head.
Args: It supports both a tuple of 2 tensors or 2 dictionaries. The first is
backbone_output: a dict of tensors backbone endpoints, and the second is decoder endpoints. When inputs are
- key: `str`, the level of the multilevel features. tensors, they are from a single level of feature maps. When inputs are
- values: `Tensor`, the feature map tensors, whose shape is [batch, dictionaries, they contain multiple levels of feature maps, where the key
height_l, width_l, channels]. is the index of feature map.
decoder_output: a dict of tensors
- key: `str`, the level of the multilevel features.
- values: `Tensor`, the feature map tensors, whose shape is [batch,
height_l, width_l, channels].
Args:
inputs: A tuple of 2 feature map tensors of shape
[batch, height_l, width_l, channels] or 2 dictionaries of tensors:
- key: A `str` of the level of the multilevel features.
- values: A `tf.Tensor` of the feature map tensors, whose shape is
[batch, height_l, width_l, channels].
Returns: Returns:
segmentation prediction mask: `Tensor`, the segmentation mask scores segmentation prediction mask: A `tf.Tensor` of the segmentation mask
predicted from input feature. scores predicted from input features.
""" """
x = decoder_output[str(self._config_dict['level'])] decoder_output = inputs[1]
x = decoder_output[str(self._config_dict['level'])] if isinstance(
decoder_output, dict) else decoder_output
for i, conv in enumerate(self._convs): for i, conv in enumerate(self._convs):
x = conv(x) x = conv(x)
......
...@@ -42,7 +42,7 @@ class SegmentationHead3DTest(parameterized.TestCase, tf.test.TestCase): ...@@ -42,7 +42,7 @@ class SegmentationHead3DTest(parameterized.TestCase, tf.test.TestCase):
'1': np.random.rand(2, 128, 128, 128, 16), '1': np.random.rand(2, 128, 128, 128, 16),
'2': np.random.rand(2, 64, 64, 64, 16), '2': np.random.rand(2, 64, 64, 64, 16),
} }
logits = head(backbone_features, decoder_features) logits = head((backbone_features, decoder_features))
if str(level) in decoder_features: if str(level) in decoder_features:
self.assertAllEqual(logits.numpy().shape, [ self.assertAllEqual(logits.numpy().shape, [
......
...@@ -31,7 +31,11 @@ class NNBlocks3DTest(parameterized.TestCase, tf.test.TestCase): ...@@ -31,7 +31,11 @@ class NNBlocks3DTest(parameterized.TestCase, tf.test.TestCase):
shape=(spatial_size, spatial_size, volume_size, filters * 4), shape=(spatial_size, spatial_size, volume_size, filters * 4),
batch_size=1) batch_size=1)
block = nn_blocks_3d.BottleneckBlock3DVolume( block = nn_blocks_3d.BottleneckBlock3DVolume(
filters=filters, strides=strides, use_projection=True) filters=filters,
strides=strides,
use_projection=True,
se_ratio=0.2,
stochastic_depth_drop_rate=0.2)
features = block(inputs) features = block(inputs)
...@@ -46,7 +50,11 @@ class NNBlocks3DTest(parameterized.TestCase, tf.test.TestCase): ...@@ -46,7 +50,11 @@ class NNBlocks3DTest(parameterized.TestCase, tf.test.TestCase):
inputs = tf.keras.Input( inputs = tf.keras.Input(
shape=(spatial_size, spatial_size, volume_size, filters), batch_size=1) shape=(spatial_size, spatial_size, volume_size, filters), batch_size=1)
block = nn_blocks_3d.ResidualBlock3DVolume( block = nn_blocks_3d.ResidualBlock3DVolume(
filters=filters, strides=strides, use_projection=True) filters=filters,
strides=strides,
use_projection=True,
se_ratio=0.2,
stochastic_depth_drop_rate=0.2)
features = block(inputs) features = block(inputs)
......
...@@ -16,7 +16,6 @@ ...@@ -16,7 +16,6 @@
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
# from __future__ import google_type_annotations
from __future__ import print_function from __future__ import print_function
import os import os
......
...@@ -31,7 +31,6 @@ the two models by concatenating their last hidden layer. ...@@ -31,7 +31,6 @@ the two models by concatenating their last hidden layer.
""" """
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
# from __future__ import google_type_annotations
from __future__ import print_function from __future__ import print_function
import sys import sys
......
...@@ -13,12 +13,31 @@ ...@@ -13,12 +13,31 @@
# limitations under the License. # limitations under the License.
"""Ranking Model configuration definition.""" """Ranking Model configuration definition."""
from typing import Optional, List, Union
import dataclasses import dataclasses
from typing import List, Optional, Union
from official.core import config_definitions as cfg
from official.core import exp_factory from official.core import exp_factory
from official.modeling import hyperparams from official.modeling import hyperparams
from official.modeling.hyperparams import config_definitions as cfg
@dataclasses.dataclass
class CallbacksConfig(hyperparams.Config):
"""Configuration for Callbacks.
Attributes:
enable_checkpoint_and_export: Whether or not to enable checkpoints as a
Callback. Defaults to True.
enable_backup_and_restore: Whether or not to add BackupAndRestore
callback. Defaults to True.
enable_tensorboard: Whether or not to enable TensorBoard as a Callback.
Defaults to True.
enable_time_history: Whether or not to enable TimeHistory Callbacks.
Defaults to True.
"""
enable_checkpoint_and_export: bool = True
enable_backup_and_restore: bool = False
enable_tensorboard: bool = True
enable_time_history: bool = True
@dataclasses.dataclass @dataclasses.dataclass
...@@ -126,7 +145,6 @@ class TrainerConfig(cfg.TrainerConfig): ...@@ -126,7 +145,6 @@ class TrainerConfig(cfg.TrainerConfig):
use_orbit: Whether to use orbit library with custom training loop or use_orbit: Whether to use orbit library with custom training loop or
compile/fit API. compile/fit API.
enable_metrics_in_training: Whether to enable metrics during training. enable_metrics_in_training: Whether to enable metrics during training.
tensorboard: An instance of TensorboardConfig.
time_history: Config of TimeHistory callback. time_history: Config of TimeHistory callback.
optimizer_config: An `OptimizerConfig` instance for embedding optimizer. optimizer_config: An `OptimizerConfig` instance for embedding optimizer.
Defaults to None. Defaults to None.
...@@ -135,10 +153,9 @@ class TrainerConfig(cfg.TrainerConfig): ...@@ -135,10 +153,9 @@ class TrainerConfig(cfg.TrainerConfig):
# Sets validation steps to be -1 to evaluate the entire dataset. # Sets validation steps to be -1 to evaluate the entire dataset.
validation_steps: int = -1 validation_steps: int = -1
validation_interval: int = 70000 validation_interval: int = 70000
callbacks: cfg.CallbacksConfig = cfg.CallbacksConfig() callbacks: CallbacksConfig = CallbacksConfig()
use_orbit: bool = False use_orbit: bool = False
enable_metrics_in_training: bool = True enable_metrics_in_training: bool = True
tensorboard: cfg.TensorboardConfig = cfg.TensorboardConfig()
time_history: TimeHistoryConfig = TimeHistoryConfig(log_steps=5000) time_history: TimeHistoryConfig = TimeHistoryConfig(log_steps=5000)
optimizer_config: OptimizationConfig = OptimizationConfig() optimizer_config: OptimizationConfig = OptimizationConfig()
......
...@@ -143,9 +143,14 @@ SpineNet-143 | 1280x1280 | 500 | 94.9 | 51.9 | 45.0 | [config](ht ...@@ -143,9 +143,14 @@ SpineNet-143 | 1280x1280 | 500 | 94.9 | 51.9 | 45.0 | [config](ht
### Common Settings and Notes ### Common Settings and Notes
* We provide models for video classification with two backbones: * We provide models for video classification with backbones:
[SlowOnly](https://arxiv.org/abs/1812.03982) and 3D-ResNet (R3D) used in * SlowOnly in
[Spatiotemporal Contrastive Video Representation Learning](https://arxiv.org/abs/2008.03800). [SlowFast Networks for Video Recognition](https://arxiv.org/abs/1812.03982).
* ResNet-3D (R3D) in
[Spatiotemporal Contrastive Video Representation Learning](https://arxiv.org/abs/2008.03800).
* ResNet-3D-RS (R3D-RS) in
[Revisiting 3D ResNets for Video Recognition](https://arxiv.org/pdf/2109.01696.pdf).
* Training and evaluation details: * Training and evaluation details:
* All models are trained from scratch with vision modality (RGB) for 200 * All models are trained from scratch with vision modality (RGB) for 200
epochs. epochs.
...@@ -161,6 +166,11 @@ SpineNet-143 | 1280x1280 | 500 | 94.9 | 51.9 | 45.0 | [config](ht ...@@ -161,6 +166,11 @@ SpineNet-143 | 1280x1280 | 500 | 94.9 | 51.9 | 45.0 | [config](ht
| SlowOnly | 8 x 8 | 74.1 | 91.4 | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/video_classification/k400_slowonly8x8_tpu.yaml) | | SlowOnly | 8 x 8 | 74.1 | 91.4 | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/video_classification/k400_slowonly8x8_tpu.yaml) |
| SlowOnly | 16 x 4 | 75.6 | 92.1 | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/video_classification/k400_slowonly16x4_tpu.yaml) | | SlowOnly | 16 x 4 | 75.6 | 92.1 | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/video_classification/k400_slowonly16x4_tpu.yaml) |
| R3D-50 | 32 x 2 | 77.0 | 93.0 | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/video_classification/k400_3d-resnet50_tpu.yaml) | | R3D-50 | 32 x 2 | 77.0 | 93.0 | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/video_classification/k400_3d-resnet50_tpu.yaml) |
| R3D-RS-50 | 32 x 2 | 78.2 | 93.7 | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/video_classification/k400_resnet3drs_50_tpu.yaml) |
| R3D-RS-101 | 32 x 2 | 79.5 | 94.2 | -
| R3D-RS-152 | 32 x 2 | 79.9 | 94.3 | -
| R3D-RS-200 | 32 x 2 | 80.4 | 94.4 | -
| R3D-RS-200 | 48 x 2 | 81.0 | - | -
### Kinetics-600 Action Recognition Baselines ### Kinetics-600 Action Recognition Baselines
...@@ -168,3 +178,5 @@ SpineNet-143 | 1280x1280 | 500 | 94.9 | 51.9 | 45.0 | [config](ht ...@@ -168,3 +178,5 @@ SpineNet-143 | 1280x1280 | 500 | 94.9 | 51.9 | 45.0 | [config](ht
| -------- |:----------------------:|--------:|--------:|---------:| | -------- |:----------------------:|--------:|--------:|---------:|
| SlowOnly | 8 x 8 | 77.3 | 93.6 | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/video_classification/k600_slowonly8x8_tpu.yaml) | | SlowOnly | 8 x 8 | 77.3 | 93.6 | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/video_classification/k600_slowonly8x8_tpu.yaml) |
| R3D-50 | 32 x 2 | 79.5 | 94.8 | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/video_classification/k600_3d-resnet50_tpu.yaml) | | R3D-50 | 32 x 2 | 79.5 | 94.8 | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/video_classification/k600_3d-resnet50_tpu.yaml) |
| R3D-RS-200 | 32 x 2 | 83.1 | - | -
| R3D-RS-200 | 48 x 2 | 83.8 | - | -
...@@ -98,6 +98,13 @@ class RevNet(hyperparams.Config): ...@@ -98,6 +98,13 @@ class RevNet(hyperparams.Config):
model_id: int = 56 model_id: int = 56
@dataclasses.dataclass
class MobileDet(hyperparams.Config):
"""Mobiledet config."""
model_id: str = 'MobileDetCPU'
filter_size_scale: float = 1.0
@dataclasses.dataclass @dataclasses.dataclass
class Backbone(hyperparams.OneOfConfig): class Backbone(hyperparams.OneOfConfig):
"""Configuration for backbones. """Configuration for backbones.
...@@ -111,6 +118,7 @@ class Backbone(hyperparams.OneOfConfig): ...@@ -111,6 +118,7 @@ class Backbone(hyperparams.OneOfConfig):
spinenet: spinenet backbone config. spinenet: spinenet backbone config.
spinenet_mobile: mobile spinenet backbone config. spinenet_mobile: mobile spinenet backbone config.
mobilenet: mobilenet backbone config. mobilenet: mobilenet backbone config.
mobiledet: mobiledet backbone config.
""" """
type: Optional[str] = None type: Optional[str] = None
resnet: ResNet = ResNet() resnet: ResNet = ResNet()
...@@ -120,3 +128,5 @@ class Backbone(hyperparams.OneOfConfig): ...@@ -120,3 +128,5 @@ class Backbone(hyperparams.OneOfConfig):
spinenet: SpineNet = SpineNet() spinenet: SpineNet = SpineNet()
spinenet_mobile: SpineNetMobile = SpineNetMobile() spinenet_mobile: SpineNetMobile = SpineNetMobile()
mobilenet: MobileNet = MobileNet() mobilenet: MobileNet = MobileNet()
mobiledet: MobileDet = MobileDet()
...@@ -14,10 +14,10 @@ ...@@ -14,10 +14,10 @@
# Lint as: python3 # Lint as: python3
"""Decoders configurations.""" """Decoders configurations."""
from typing import Optional, List import dataclasses
from typing import List, Optional
# Import libraries # Import libraries
import dataclasses
from official.modeling import hyperparams from official.modeling import hyperparams
...@@ -53,6 +53,8 @@ class ASPP(hyperparams.Config): ...@@ -53,6 +53,8 @@ class ASPP(hyperparams.Config):
num_filters: int = 256 num_filters: int = 256
use_depthwise_convolution: bool = False use_depthwise_convolution: bool = False
pool_kernel_size: Optional[List[int]] = None # Use global average pooling. pool_kernel_size: Optional[List[int]] = None # Use global average pooling.
spp_layer_version: str = 'v1'
output_tensor: bool = False
@dataclasses.dataclass @dataclasses.dataclass
......
# --experiment_type=retinanet_mobile_coco
# COCO AP 27.0%
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'bfloat16'
task:
losses:
l2_weight_decay: 3.0e-05
model:
anchor:
anchor_size: 3
aspect_ratios: [0.5, 1.0, 2.0]
num_scales: 3
backbone:
mobilenet:
model_id: 'MobileDetCPU'
filter_size_scale: 1.0
type: 'mobiledet'
decoder:
type: 'fpn'
fpn:
num_filters: 128
use_separable_conv: true
head:
num_convs: 4
num_filters: 128
use_separable_conv: true
input_size: [320 320, 3]
max_level: 6
min_level: 3
norm_activation:
activation: 'relu6'
norm_epsilon: 0.001
norm_momentum: 0.99
use_sync_bn: true
train_data:
dtype: 'bfloat16'
global_batch_size: 256
is_training: true
parser:
aug_rand_hflip: true
aug_scale_max: 2.0
aug_scale_min: 0.5
validation_data:
dtype: 'bfloat16'
global_batch_size: 8
is_training: false
trainer:
optimizer_config:
learning_rate:
stepwise:
boundaries: [263340, 272580]
values: [0.32, 0.032, 0.0032]
type: 'stepwise'
warmup:
linear:
warmup_learning_rate: 0.0067
warmup_steps: 2000
steps_per_loop: 462
train_steps: 277200
validation_interval: 462
validation_steps: 625
...@@ -19,11 +19,10 @@ import os ...@@ -19,11 +19,10 @@ import os
from typing import List, Optional, Union from typing import List, Optional, Union
import numpy as np import numpy as np
from official.core import config_definitions as cfg
from official.core import exp_factory from official.core import exp_factory
from official.modeling import hyperparams from official.modeling import hyperparams
from official.modeling import optimization from official.modeling import optimization
from official.modeling.hyperparams import config_definitions as cfg
from official.vision.beta.configs import common from official.vision.beta.configs import common
from official.vision.beta.configs import decoders from official.vision.beta.configs import decoders
from official.vision.beta.configs import backbones from official.vision.beta.configs import backbones
......
...@@ -19,8 +19,8 @@ ...@@ -19,8 +19,8 @@
from absl.testing import parameterized from absl.testing import parameterized
import tensorflow as tf import tensorflow as tf
from official.core import config_definitions as cfg
from official.core import exp_factory from official.core import exp_factory
from official.modeling.hyperparams import config_definitions as cfg
from official.vision import beta from official.vision import beta
from official.vision.beta.configs import semantic_segmentation as exp_cfg from official.vision.beta.configs import semantic_segmentation as exp_cfg
......
#!/bin/bash
sudo apt update
sudo apt install unzip aria2 -y
DATA_DIR=$1
aria2c -j 8 -Z \
http://images.cocodataset.org/annotations/annotations_trainval2017.zip \
http://images.cocodataset.org/annotations/panoptic_annotations_trainval2017.zip \
http://images.cocodataset.org/zips/train2017.zip \
http://images.cocodataset.org/zips/val2017.zip \
--dir=$DATA_DIR;
unzip $DATA_DIR/"*".zip -d $DATA_DIR;
mkdir $DATA_DIR/zips && mv $DATA_DIR/*.zip $DATA_DIR/zips;
unzip $DATA_DIR/annotations/panoptic_train2017.zip -d $DATA_DIR
unzip $DATA_DIR/annotations/panoptic_val2017.zip -d $DATA_DIR
python3 official/vision/beta/data/create_coco_tf_record.py \
--logtostderr \
--image_dir="$DATA_DIR/val2017" \
--object_annotations_file="$DATA_DIR/annotations/instances_val2017.json" \
--output_file_prefix="$DATA_DIR/tfrecords/val" \
--panoptic_annotations_file="$DATA_DIR/annotations/panoptic_val2017.json" \
--panoptic_masks_dir="$DATA_DIR/panoptic_val2017" \
--num_shards=8 \
--include_masks \
--include_panoptic_masks
python3 official/vision/beta/data/create_coco_tf_record.py \
--logtostderr \
--image_dir="$DATA_DIR/train2017" \
--object_annotations_file="$DATA_DIR/annotations/instances_train2017.json" \
--output_file_prefix="$DATA_DIR/tfrecords/train" \
--panoptic_annotations_file="$DATA_DIR/annotations/panoptic_train2017.json" \
--panoptic_masks_dir="$DATA_DIR/panoptic_train2017" \
--num_shards=32 \
--include_masks \
--include_panoptic_masks
...@@ -23,8 +23,9 @@ from official.vision.beta.dataloaders import decoder ...@@ -23,8 +23,9 @@ from official.vision.beta.dataloaders import decoder
def _generate_source_id(image_bytes): def _generate_source_id(image_bytes):
# Hashing using 22 bits since float32 has only 23 mantissa bits.
return tf.strings.as_string( return tf.strings.as_string(
tf.strings.to_hash_bucket_fast(image_bytes, 2 ** 63 - 1)) tf.strings.to_hash_bucket_fast(image_bytes, 2 ** 22 - 1))
class TfExampleDecoder(decoder.Decoder): class TfExampleDecoder(decoder.Decoder):
......
...@@ -14,24 +14,13 @@ ...@@ -14,24 +14,13 @@
"""Tests for tf_example_decoder.py.""" """Tests for tf_example_decoder.py."""
import io
# Import libraries # Import libraries
from absl.testing import parameterized from absl.testing import parameterized
import numpy as np import numpy as np
from PIL import Image
import tensorflow as tf import tensorflow as tf
from official.vision.beta.dataloaders import tf_example_decoder from official.vision.beta.dataloaders import tf_example_decoder
from official.vision.beta.dataloaders import tfexample_utils
DUMP_SOURCE_ID = b'123'
def _encode_image(image_array, fmt):
image = Image.fromarray(image_array)
with io.BytesIO() as output:
image.save(output, format=fmt)
return output.getvalue()
class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase): class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase):
...@@ -52,73 +41,11 @@ class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase): ...@@ -52,73 +41,11 @@ class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase):
decoder = tf_example_decoder.TfExampleDecoder( decoder = tf_example_decoder.TfExampleDecoder(
include_mask=True, regenerate_source_id=regenerate_source_id) include_mask=True, regenerate_source_id=regenerate_source_id)
image = _encode_image( serialized_example = tfexample_utils.create_detection_test_example(
np.uint8(np.random.rand(image_height, image_width, 3) * 255), image_height=image_height,
fmt='JPEG') image_width=image_width,
if num_instances == 0: image_channel=3,
xmins = [] num_instances=num_instances).SerializeToString()
xmaxs = []
ymins = []
ymaxs = []
labels = []
areas = []
is_crowds = []
masks = []
else:
xmins = list(np.random.rand(num_instances))
xmaxs = list(np.random.rand(num_instances))
ymins = list(np.random.rand(num_instances))
ymaxs = list(np.random.rand(num_instances))
labels = list(np.random.randint(100, size=num_instances))
areas = [(xmax - xmin) * (ymax - ymin) * image_height * image_width
for xmin, xmax, ymin, ymax in zip(xmins, xmaxs, ymins, ymaxs)]
is_crowds = [0] * num_instances
masks = []
for _ in range(num_instances):
mask = _encode_image(
np.uint8(np.random.rand(image_height, image_width) * 255),
fmt='PNG')
masks.append(mask)
serialized_example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded': (
tf.train.Feature(
bytes_list=tf.train.BytesList(value=[image]))),
'image/source_id': (
tf.train.Feature(
bytes_list=tf.train.BytesList(value=[DUMP_SOURCE_ID]))),
'image/height': (
tf.train.Feature(
int64_list=tf.train.Int64List(value=[image_height]))),
'image/width': (
tf.train.Feature(
int64_list=tf.train.Int64List(value=[image_width]))),
'image/object/bbox/xmin': (
tf.train.Feature(
float_list=tf.train.FloatList(value=xmins))),
'image/object/bbox/xmax': (
tf.train.Feature(
float_list=tf.train.FloatList(value=xmaxs))),
'image/object/bbox/ymin': (
tf.train.Feature(
float_list=tf.train.FloatList(value=ymins))),
'image/object/bbox/ymax': (
tf.train.Feature(
float_list=tf.train.FloatList(value=ymaxs))),
'image/object/class/label': (
tf.train.Feature(
int64_list=tf.train.Int64List(value=labels))),
'image/object/is_crowd': (
tf.train.Feature(
int64_list=tf.train.Int64List(value=is_crowds))),
'image/object/area': (
tf.train.Feature(
float_list=tf.train.FloatList(value=areas))),
'image/object/mask': (
tf.train.Feature(
bytes_list=tf.train.BytesList(value=masks))),
})).SerializeToString()
decoded_tensors = decoder.decode( decoded_tensors = decoder.decode(
tf.convert_to_tensor(value=serialized_example)) tf.convert_to_tensor(value=serialized_example))
...@@ -127,7 +54,7 @@ class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase): ...@@ -127,7 +54,7 @@ class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase):
self.assertAllEqual( self.assertAllEqual(
(image_height, image_width, 3), results['image'].shape) (image_height, image_width, 3), results['image'].shape)
if not regenerate_source_id: if not regenerate_source_id:
self.assertEqual(DUMP_SOURCE_ID, results['source_id']) self.assertEqual(tfexample_utils.DUMP_SOURCE_ID, results['source_id'])
self.assertEqual(image_height, results['height']) self.assertEqual(image_height, results['height'])
self.assertEqual(image_width, results['width']) self.assertEqual(image_width, results['width'])
self.assertAllEqual( self.assertAllEqual(
...@@ -151,7 +78,7 @@ class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase): ...@@ -151,7 +78,7 @@ class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase):
[[0, 0, 0], [255, 255, 255], [255, 255, 255], [0, 0, 0]], [[0, 0, 0], [255, 255, 255], [255, 255, 255], [0, 0, 0]],
[[0, 0, 0], [255, 255, 255], [255, 255, 255], [0, 0, 0]], [[0, 0, 0], [255, 255, 255], [255, 255, 255], [0, 0, 0]],
[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]]] [[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]]]
image = _encode_image(np.uint8(image_content), fmt='PNG') image = tfexample_utils.encode_image(np.uint8(image_content), fmt='PNG')
image_height = 4 image_height = 4
image_width = 4 image_width = 4
num_instances = 2 num_instances = 2
...@@ -172,46 +99,38 @@ class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase): ...@@ -172,46 +99,38 @@ class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase):
[0, 255, 255, 255], [0, 255, 255, 255],
[0, 255, 255, 255], [0, 255, 255, 255],
[0, 255, 255, 255]]] [0, 255, 255, 255]]]
masks = [_encode_image(np.uint8(m), fmt='PNG') for m in list(mask_content)] masks = [
tfexample_utils.encode_image(np.uint8(m), fmt='PNG')
for m in list(mask_content)
]
serialized_example = tf.train.Example( serialized_example = tf.train.Example(
features=tf.train.Features( features=tf.train.Features(
feature={ feature={
'image/encoded': ( 'image/encoded': (tf.train.Feature(
tf.train.Feature( bytes_list=tf.train.BytesList(value=[image]))),
bytes_list=tf.train.BytesList(value=[image]))), 'image/source_id': (tf.train.Feature(
'image/source_id': ( bytes_list=tf.train.BytesList(
tf.train.Feature( value=[tfexample_utils.DUMP_SOURCE_ID]))),
bytes_list=tf.train.BytesList(value=[DUMP_SOURCE_ID]))), 'image/height': (tf.train.Feature(
'image/height': ( int64_list=tf.train.Int64List(value=[image_height]))),
tf.train.Feature( 'image/width': (tf.train.Feature(
int64_list=tf.train.Int64List(value=[image_height]))), int64_list=tf.train.Int64List(value=[image_width]))),
'image/width': ( 'image/object/bbox/xmin': (tf.train.Feature(
tf.train.Feature( float_list=tf.train.FloatList(value=xmins))),
int64_list=tf.train.Int64List(value=[image_width]))), 'image/object/bbox/xmax': (tf.train.Feature(
'image/object/bbox/xmin': ( float_list=tf.train.FloatList(value=xmaxs))),
tf.train.Feature( 'image/object/bbox/ymin': (tf.train.Feature(
float_list=tf.train.FloatList(value=xmins))), float_list=tf.train.FloatList(value=ymins))),
'image/object/bbox/xmax': ( 'image/object/bbox/ymax': (tf.train.Feature(
tf.train.Feature( float_list=tf.train.FloatList(value=ymaxs))),
float_list=tf.train.FloatList(value=xmaxs))), 'image/object/class/label': (tf.train.Feature(
'image/object/bbox/ymin': ( int64_list=tf.train.Int64List(value=labels))),
tf.train.Feature( 'image/object/is_crowd': (tf.train.Feature(
float_list=tf.train.FloatList(value=ymins))), int64_list=tf.train.Int64List(value=is_crowds))),
'image/object/bbox/ymax': ( 'image/object/area': (tf.train.Feature(
tf.train.Feature( float_list=tf.train.FloatList(value=areas))),
float_list=tf.train.FloatList(value=ymaxs))), 'image/object/mask': (tf.train.Feature(
'image/object/class/label': ( bytes_list=tf.train.BytesList(value=masks))),
tf.train.Feature(
int64_list=tf.train.Int64List(value=labels))),
'image/object/is_crowd': (
tf.train.Feature(
int64_list=tf.train.Int64List(value=is_crowds))),
'image/object/area': (
tf.train.Feature(
float_list=tf.train.FloatList(value=areas))),
'image/object/mask': (
tf.train.Feature(
bytes_list=tf.train.BytesList(value=masks))),
})).SerializeToString() })).SerializeToString()
decoded_tensors = decoder.decode( decoded_tensors = decoder.decode(
tf.convert_to_tensor(value=serialized_example)) tf.convert_to_tensor(value=serialized_example))
...@@ -221,7 +140,7 @@ class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase): ...@@ -221,7 +140,7 @@ class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase):
self.assertAllEqual( self.assertAllEqual(
(image_height, image_width, 3), results['image'].shape) (image_height, image_width, 3), results['image'].shape)
self.assertAllEqual(image_content, results['image']) self.assertAllEqual(image_content, results['image'])
self.assertEqual(DUMP_SOURCE_ID, results['source_id']) self.assertEqual(tfexample_utils.DUMP_SOURCE_ID, results['source_id'])
self.assertEqual(image_height, results['height']) self.assertEqual(image_height, results['height'])
self.assertEqual(image_width, results['width']) self.assertEqual(image_width, results['width'])
self.assertAllEqual( self.assertAllEqual(
...@@ -259,7 +178,7 @@ class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase): ...@@ -259,7 +178,7 @@ class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase):
[[0, 0, 0], [255, 255, 255], [255, 255, 255], [0, 0, 0]], [[0, 0, 0], [255, 255, 255], [255, 255, 255], [0, 0, 0]],
[[0, 0, 0], [255, 255, 255], [255, 255, 255], [0, 0, 0]], [[0, 0, 0], [255, 255, 255], [255, 255, 255], [0, 0, 0]],
[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]]] [[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]]]
image = _encode_image(np.uint8(image_content), fmt='PNG') image = tfexample_utils.encode_image(np.uint8(image_content), fmt='PNG')
image_height = 4 image_height = 4
image_width = 4 image_width = 4
num_instances = 2 num_instances = 2
...@@ -276,40 +195,34 @@ class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase): ...@@ -276,40 +195,34 @@ class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase):
[0, 255, 255, 255], [0, 255, 255, 255],
[0, 255, 255, 255], [0, 255, 255, 255],
[0, 255, 255, 255]]] [0, 255, 255, 255]]]
masks = [_encode_image(np.uint8(m), fmt='PNG') for m in list(mask_content)] masks = [
tfexample_utils.encode_image(np.uint8(m), fmt='PNG')
for m in list(mask_content)
]
serialized_example = tf.train.Example( serialized_example = tf.train.Example(
features=tf.train.Features( features=tf.train.Features(
feature={ feature={
'image/encoded': ( 'image/encoded': (tf.train.Feature(
tf.train.Feature( bytes_list=tf.train.BytesList(value=[image]))),
bytes_list=tf.train.BytesList(value=[image]))), 'image/source_id': (tf.train.Feature(
'image/source_id': ( bytes_list=tf.train.BytesList(
tf.train.Feature( value=[tfexample_utils.DUMP_SOURCE_ID]))),
bytes_list=tf.train.BytesList(value=[DUMP_SOURCE_ID]))), 'image/height': (tf.train.Feature(
'image/height': ( int64_list=tf.train.Int64List(value=[image_height]))),
tf.train.Feature( 'image/width': (tf.train.Feature(
int64_list=tf.train.Int64List(value=[image_height]))), int64_list=tf.train.Int64List(value=[image_width]))),
'image/width': ( 'image/object/bbox/xmin': (tf.train.Feature(
tf.train.Feature( float_list=tf.train.FloatList(value=xmins))),
int64_list=tf.train.Int64List(value=[image_width]))), 'image/object/bbox/xmax': (tf.train.Feature(
'image/object/bbox/xmin': ( float_list=tf.train.FloatList(value=xmaxs))),
tf.train.Feature( 'image/object/bbox/ymin': (tf.train.Feature(
float_list=tf.train.FloatList(value=xmins))), float_list=tf.train.FloatList(value=ymins))),
'image/object/bbox/xmax': ( 'image/object/bbox/ymax': (tf.train.Feature(
tf.train.Feature( float_list=tf.train.FloatList(value=ymaxs))),
float_list=tf.train.FloatList(value=xmaxs))), 'image/object/class/label': (tf.train.Feature(
'image/object/bbox/ymin': ( int64_list=tf.train.Int64List(value=labels))),
tf.train.Feature( 'image/object/mask': (tf.train.Feature(
float_list=tf.train.FloatList(value=ymins))), bytes_list=tf.train.BytesList(value=masks))),
'image/object/bbox/ymax': (
tf.train.Feature(
float_list=tf.train.FloatList(value=ymaxs))),
'image/object/class/label': (
tf.train.Feature(
int64_list=tf.train.Int64List(value=labels))),
'image/object/mask': (
tf.train.Feature(
bytes_list=tf.train.BytesList(value=masks))),
})).SerializeToString() })).SerializeToString()
decoded_tensors = decoder.decode( decoded_tensors = decoder.decode(
tf.convert_to_tensor(serialized_example)) tf.convert_to_tensor(serialized_example))
...@@ -318,7 +231,7 @@ class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase): ...@@ -318,7 +231,7 @@ class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase):
self.assertAllEqual( self.assertAllEqual(
(image_height, image_width, 3), results['image'].shape) (image_height, image_width, 3), results['image'].shape)
self.assertAllEqual(image_content, results['image']) self.assertAllEqual(image_content, results['image'])
self.assertEqual(DUMP_SOURCE_ID, results['source_id']) self.assertEqual(tfexample_utils.DUMP_SOURCE_ID, results['source_id'])
self.assertEqual(image_height, results['height']) self.assertEqual(image_height, results['height'])
self.assertEqual(image_width, results['width']) self.assertEqual(image_width, results['width'])
self.assertAllEqual( self.assertAllEqual(
......
...@@ -14,28 +14,19 @@ ...@@ -14,28 +14,19 @@
"""Tests for tf_example_label_map_decoder.py.""" """Tests for tf_example_label_map_decoder.py."""
import io
import os import os
# Import libraries # Import libraries
from absl.testing import parameterized from absl.testing import parameterized
import numpy as np import numpy as np
from PIL import Image
import tensorflow as tf import tensorflow as tf
from official.vision.beta.dataloaders import tf_example_label_map_decoder from official.vision.beta.dataloaders import tf_example_label_map_decoder
from official.vision.beta.dataloaders import tfexample_utils
DUMP_SOURCE_ID = b'123'
LABEL_MAP_CSV_CONTENT = '0,class_0\n1,class_1\n2,class_2' LABEL_MAP_CSV_CONTENT = '0,class_0\n1,class_1\n2,class_2'
def _encode_image(image_array, fmt):
image = Image.fromarray(image_array)
with io.BytesIO() as output:
image.save(output, format=fmt)
return output.getvalue()
class TfExampleDecoderLabelMapTest(tf.test.TestCase, parameterized.TestCase): class TfExampleDecoderLabelMapTest(tf.test.TestCase, parameterized.TestCase):
@parameterized.parameters( @parameterized.parameters(
...@@ -56,74 +47,11 @@ class TfExampleDecoderLabelMapTest(tf.test.TestCase, parameterized.TestCase): ...@@ -56,74 +47,11 @@ class TfExampleDecoderLabelMapTest(tf.test.TestCase, parameterized.TestCase):
decoder = tf_example_label_map_decoder.TfExampleDecoderLabelMap( decoder = tf_example_label_map_decoder.TfExampleDecoderLabelMap(
label_map_path, include_mask=True) label_map_path, include_mask=True)
image = _encode_image( serialized_example = tfexample_utils.create_detection_test_example(
np.uint8(np.random.rand(image_height, image_width, 3) * 255), image_height=image_height,
fmt='JPEG') image_width=image_width,
if num_instances == 0: image_channel=3,
xmins = [] num_instances=num_instances).SerializeToString()
xmaxs = []
ymins = []
ymaxs = []
labels = []
areas = []
is_crowds = []
masks = []
else:
xmins = list(np.random.rand(num_instances))
xmaxs = list(np.random.rand(num_instances))
ymins = list(np.random.rand(num_instances))
ymaxs = list(np.random.rand(num_instances))
labels = list(np.random.randint(100, size=num_instances))
areas = [(xmax - xmin) * (ymax - ymin) * image_height * image_width
for xmin, xmax, ymin, ymax in zip(xmins, xmaxs, ymins, ymaxs)]
is_crowds = [0] * num_instances
masks = []
labels = [b'class_1'] * num_instances
for _ in range(num_instances):
mask = _encode_image(
np.uint8(np.random.rand(image_height, image_width) * 255),
fmt='PNG')
masks.append(mask)
serialized_example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded': (
tf.train.Feature(
bytes_list=tf.train.BytesList(value=[image]))),
'image/source_id': (
tf.train.Feature(
bytes_list=tf.train.BytesList(value=[DUMP_SOURCE_ID]))),
'image/height': (
tf.train.Feature(
int64_list=tf.train.Int64List(value=[image_height]))),
'image/width': (
tf.train.Feature(
int64_list=tf.train.Int64List(value=[image_width]))),
'image/object/bbox/xmin': (
tf.train.Feature(
float_list=tf.train.FloatList(value=xmins))),
'image/object/bbox/xmax': (
tf.train.Feature(
float_list=tf.train.FloatList(value=xmaxs))),
'image/object/bbox/ymin': (
tf.train.Feature(
float_list=tf.train.FloatList(value=ymins))),
'image/object/bbox/ymax': (
tf.train.Feature(
float_list=tf.train.FloatList(value=ymaxs))),
'image/object/class/text': (
tf.train.Feature(
bytes_list=tf.train.BytesList(value=labels))),
'image/object/is_crowd': (
tf.train.Feature(
int64_list=tf.train.Int64List(value=is_crowds))),
'image/object/area': (
tf.train.Feature(
float_list=tf.train.FloatList(value=areas))),
'image/object/mask': (
tf.train.Feature(
bytes_list=tf.train.BytesList(value=masks))),
})).SerializeToString()
decoded_tensors = decoder.decode( decoded_tensors = decoder.decode(
tf.convert_to_tensor(value=serialized_example)) tf.convert_to_tensor(value=serialized_example))
...@@ -131,7 +59,7 @@ class TfExampleDecoderLabelMapTest(tf.test.TestCase, parameterized.TestCase): ...@@ -131,7 +59,7 @@ class TfExampleDecoderLabelMapTest(tf.test.TestCase, parameterized.TestCase):
self.assertAllEqual( self.assertAllEqual(
(image_height, image_width, 3), results['image'].shape) (image_height, image_width, 3), results['image'].shape)
self.assertEqual(DUMP_SOURCE_ID, results['source_id']) self.assertEqual(tfexample_utils.DUMP_SOURCE_ID, results['source_id'])
self.assertEqual(image_height, results['height']) self.assertEqual(image_height, results['height'])
self.assertEqual(image_width, results['width']) self.assertEqual(image_width, results['width'])
self.assertAllEqual( self.assertAllEqual(
...@@ -162,7 +90,7 @@ class TfExampleDecoderLabelMapTest(tf.test.TestCase, parameterized.TestCase): ...@@ -162,7 +90,7 @@ class TfExampleDecoderLabelMapTest(tf.test.TestCase, parameterized.TestCase):
[[0, 0, 0], [255, 255, 255], [255, 255, 255], [0, 0, 0]], [[0, 0, 0], [255, 255, 255], [255, 255, 255], [0, 0, 0]],
[[0, 0, 0], [255, 255, 255], [255, 255, 255], [0, 0, 0]], [[0, 0, 0], [255, 255, 255], [255, 255, 255], [0, 0, 0]],
[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]]] [[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]]]
image = _encode_image(np.uint8(image_content), fmt='PNG') image = tfexample_utils.encode_image(np.uint8(image_content), fmt='PNG')
image_height = 4 image_height = 4
image_width = 4 image_width = 4
num_instances = 2 num_instances = 2
...@@ -183,46 +111,38 @@ class TfExampleDecoderLabelMapTest(tf.test.TestCase, parameterized.TestCase): ...@@ -183,46 +111,38 @@ class TfExampleDecoderLabelMapTest(tf.test.TestCase, parameterized.TestCase):
[0, 255, 255, 255], [0, 255, 255, 255],
[0, 255, 255, 255], [0, 255, 255, 255],
[0, 255, 255, 255]]] [0, 255, 255, 255]]]
masks = [_encode_image(np.uint8(m), fmt='PNG') for m in list(mask_content)] masks = [
tfexample_utils.encode_image(np.uint8(m), fmt='PNG')
for m in list(mask_content)
]
serialized_example = tf.train.Example( serialized_example = tf.train.Example(
features=tf.train.Features( features=tf.train.Features(
feature={ feature={
'image/encoded': ( 'image/encoded': (tf.train.Feature(
tf.train.Feature( bytes_list=tf.train.BytesList(value=[image]))),
bytes_list=tf.train.BytesList(value=[image]))), 'image/source_id': (tf.train.Feature(
'image/source_id': ( bytes_list=tf.train.BytesList(
tf.train.Feature( value=[tfexample_utils.DUMP_SOURCE_ID]))),
bytes_list=tf.train.BytesList(value=[DUMP_SOURCE_ID]))), 'image/height': (tf.train.Feature(
'image/height': ( int64_list=tf.train.Int64List(value=[image_height]))),
tf.train.Feature( 'image/width': (tf.train.Feature(
int64_list=tf.train.Int64List(value=[image_height]))), int64_list=tf.train.Int64List(value=[image_width]))),
'image/width': ( 'image/object/bbox/xmin': (tf.train.Feature(
tf.train.Feature( float_list=tf.train.FloatList(value=xmins))),
int64_list=tf.train.Int64List(value=[image_width]))), 'image/object/bbox/xmax': (tf.train.Feature(
'image/object/bbox/xmin': ( float_list=tf.train.FloatList(value=xmaxs))),
tf.train.Feature( 'image/object/bbox/ymin': (tf.train.Feature(
float_list=tf.train.FloatList(value=xmins))), float_list=tf.train.FloatList(value=ymins))),
'image/object/bbox/xmax': ( 'image/object/bbox/ymax': (tf.train.Feature(
tf.train.Feature( float_list=tf.train.FloatList(value=ymaxs))),
float_list=tf.train.FloatList(value=xmaxs))), 'image/object/class/text': (tf.train.Feature(
'image/object/bbox/ymin': ( bytes_list=tf.train.BytesList(value=labels))),
tf.train.Feature( 'image/object/is_crowd': (tf.train.Feature(
float_list=tf.train.FloatList(value=ymins))), int64_list=tf.train.Int64List(value=is_crowds))),
'image/object/bbox/ymax': ( 'image/object/area': (tf.train.Feature(
tf.train.Feature( float_list=tf.train.FloatList(value=areas))),
float_list=tf.train.FloatList(value=ymaxs))), 'image/object/mask': (tf.train.Feature(
'image/object/class/text': ( bytes_list=tf.train.BytesList(value=masks))),
tf.train.Feature(
bytes_list=tf.train.BytesList(value=labels))),
'image/object/is_crowd': (
tf.train.Feature(
int64_list=tf.train.Int64List(value=is_crowds))),
'image/object/area': (
tf.train.Feature(
float_list=tf.train.FloatList(value=areas))),
'image/object/mask': (
tf.train.Feature(
bytes_list=tf.train.BytesList(value=masks))),
})).SerializeToString() })).SerializeToString()
decoded_tensors = decoder.decode( decoded_tensors = decoder.decode(
tf.convert_to_tensor(value=serialized_example)) tf.convert_to_tensor(value=serialized_example))
...@@ -232,7 +152,7 @@ class TfExampleDecoderLabelMapTest(tf.test.TestCase, parameterized.TestCase): ...@@ -232,7 +152,7 @@ class TfExampleDecoderLabelMapTest(tf.test.TestCase, parameterized.TestCase):
self.assertAllEqual( self.assertAllEqual(
(image_height, image_width, 3), results['image'].shape) (image_height, image_width, 3), results['image'].shape)
self.assertAllEqual(image_content, results['image']) self.assertAllEqual(image_content, results['image'])
self.assertEqual(DUMP_SOURCE_ID, results['source_id']) self.assertEqual(tfexample_utils.DUMP_SOURCE_ID, results['source_id'])
self.assertEqual(image_height, results['height']) self.assertEqual(image_height, results['height'])
self.assertEqual(image_width, results['width']) self.assertEqual(image_width, results['width'])
self.assertAllEqual( self.assertAllEqual(
......
...@@ -23,6 +23,22 @@ from official.vision.beta.dataloaders import tfds_factory ...@@ -23,6 +23,22 @@ from official.vision.beta.dataloaders import tfds_factory
class TFDSFactoryTest(tf.test.TestCase, parameterized.TestCase): class TFDSFactoryTest(tf.test.TestCase, parameterized.TestCase):
def _create_test_example(self):
serialized_example = {
'image': tf.ones(shape=(100, 100, 3), dtype=tf.uint8),
'label': 1,
'image/id': 0,
'objects': {
'label': 1,
'is_crowd': 0,
'area': 0.5,
'bbox': [0.1, 0.2, 0.3, 0.4]
},
'segmentation_label': tf.ones((100, 100, 1), dtype=tf.uint8),
'image_left': tf.ones(shape=(100, 100, 3), dtype=tf.uint8)
}
return serialized_example
@parameterized.parameters( @parameterized.parameters(
('imagenet2012'), ('imagenet2012'),
('cifar10'), ('cifar10'),
...@@ -31,6 +47,10 @@ class TFDSFactoryTest(tf.test.TestCase, parameterized.TestCase): ...@@ -31,6 +47,10 @@ class TFDSFactoryTest(tf.test.TestCase, parameterized.TestCase):
def test_classification_decoder(self, tfds_name): def test_classification_decoder(self, tfds_name):
decoder = tfds_factory.get_classification_decoder(tfds_name) decoder = tfds_factory.get_classification_decoder(tfds_name)
self.assertIsInstance(decoder, base_decoder.Decoder) self.assertIsInstance(decoder, base_decoder.Decoder)
decoded_tensor = decoder.decode(self._create_test_example())
self.assertLen(decoded_tensor, 2)
self.assertIn('image/encoded', decoded_tensor)
self.assertIn('image/class/label', decoded_tensor)
@parameterized.parameters( @parameterized.parameters(
('flowers'), ('flowers'),
...@@ -48,6 +68,16 @@ class TFDSFactoryTest(tf.test.TestCase, parameterized.TestCase): ...@@ -48,6 +68,16 @@ class TFDSFactoryTest(tf.test.TestCase, parameterized.TestCase):
def test_detection_decoder(self, tfds_name): def test_detection_decoder(self, tfds_name):
decoder = tfds_factory.get_detection_decoder(tfds_name) decoder = tfds_factory.get_detection_decoder(tfds_name)
self.assertIsInstance(decoder, base_decoder.Decoder) self.assertIsInstance(decoder, base_decoder.Decoder)
decoded_tensor = decoder.decode(self._create_test_example())
self.assertLen(decoded_tensor, 8)
self.assertIn('image', decoded_tensor)
self.assertIn('source_id', decoded_tensor)
self.assertIn('height', decoded_tensor)
self.assertIn('width', decoded_tensor)
self.assertIn('groundtruth_classes', decoded_tensor)
self.assertIn('groundtruth_is_crowd', decoded_tensor)
self.assertIn('groundtruth_area', decoded_tensor)
self.assertIn('groundtruth_boxes', decoded_tensor)
@parameterized.parameters( @parameterized.parameters(
('pascal'), ('pascal'),
...@@ -65,6 +95,12 @@ class TFDSFactoryTest(tf.test.TestCase, parameterized.TestCase): ...@@ -65,6 +95,12 @@ class TFDSFactoryTest(tf.test.TestCase, parameterized.TestCase):
def test_segmentation_decoder(self, tfds_name): def test_segmentation_decoder(self, tfds_name):
decoder = tfds_factory.get_segmentation_decoder(tfds_name) decoder = tfds_factory.get_segmentation_decoder(tfds_name)
self.assertIsInstance(decoder, base_decoder.Decoder) self.assertIsInstance(decoder, base_decoder.Decoder)
decoded_tensor = decoder.decode(self._create_test_example())
self.assertLen(decoded_tensor, 4)
self.assertIn('image/encoded', decoded_tensor)
self.assertIn('image/segmentation/class/encoded', decoded_tensor)
self.assertIn('image/height', decoded_tensor)
self.assertIn('image/width', decoded_tensor)
@parameterized.parameters( @parameterized.parameters(
('coco'), ('coco'),
......
...@@ -54,16 +54,20 @@ IMAGE_KEY = 'image/encoded' ...@@ -54,16 +54,20 @@ IMAGE_KEY = 'image/encoded'
CLASSIFICATION_LABEL_KEY = 'image/class/label' CLASSIFICATION_LABEL_KEY = 'image/class/label'
LABEL_KEY = 'clip/label/index' LABEL_KEY = 'clip/label/index'
AUDIO_KEY = 'features/audio' AUDIO_KEY = 'features/audio'
DUMP_SOURCE_ID = b'123'
def make_image_bytes(shape: Sequence[int]): def encode_image(image_array: np.array, fmt: str) -> bytes:
"""Generates image and return bytes in JPEG format.""" image = Image.fromarray(image_array)
with io.BytesIO() as output:
image.save(output, format=fmt)
return output.getvalue()
def make_image_bytes(shape: Sequence[int], fmt: str = 'JPEG') -> bytes:
"""Generates image and return bytes in specified format."""
random_image = np.random.randint(0, 256, size=shape, dtype=np.uint8) random_image = np.random.randint(0, 256, size=shape, dtype=np.uint8)
random_image = Image.fromarray(random_image) return encode_image(random_image, fmt=fmt)
with io.BytesIO() as buffer:
random_image.save(buffer, format='JPEG')
raw_image_bytes = buffer.getvalue()
return raw_image_bytes
def put_int64_to_context(seq_example: tf.train.SequenceExample, def put_int64_to_context(seq_example: tf.train.SequenceExample,
...@@ -164,3 +168,102 @@ def create_3d_image_test_example(image_height: int, image_width: int, ...@@ -164,3 +168,102 @@ def create_3d_image_test_example(image_height: int, image_width: int,
bytes_list=tf.train.BytesList(value=[labels.tobytes()]))) bytes_list=tf.train.BytesList(value=[labels.tobytes()])))
} }
return tf.train.Example(features=tf.train.Features(feature=feature)) return tf.train.Example(features=tf.train.Features(feature=feature))
def create_detection_test_example(image_height: int, image_width: int,
image_channel: int,
num_instances: int) -> tf.train.Example:
"""Creates and returns a test example containing box and mask annotations.
Args:
image_height: The height of test image.
image_width: The width of test image.
image_channel: The channel of test image.
num_instances: The number of object instances per image.
Returns:
A tf.train.Example for testing.
"""
image = make_image_bytes([image_height, image_width, image_channel])
if num_instances == 0:
xmins = []
xmaxs = []
ymins = []
ymaxs = []
labels = []
areas = []
is_crowds = []
masks = []
labels_text = []
else:
xmins = list(np.random.rand(num_instances))
xmaxs = list(np.random.rand(num_instances))
ymins = list(np.random.rand(num_instances))
ymaxs = list(np.random.rand(num_instances))
labels_text = [b'class_1'] * num_instances
labels = list(np.random.randint(100, size=num_instances))
areas = [(xmax - xmin) * (ymax - ymin) * image_height * image_width
for xmin, xmax, ymin, ymax in zip(xmins, xmaxs, ymins, ymaxs)]
is_crowds = [0] * num_instances
masks = []
for _ in range(num_instances):
mask = make_image_bytes([image_height, image_width], fmt='PNG')
masks.append(mask)
return tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded': (tf.train.Feature(
bytes_list=tf.train.BytesList(value=[image]))),
'image/source_id': (tf.train.Feature(
bytes_list=tf.train.BytesList(value=[DUMP_SOURCE_ID]))),
'image/height': (tf.train.Feature(
int64_list=tf.train.Int64List(value=[image_height]))),
'image/width': (tf.train.Feature(
int64_list=tf.train.Int64List(value=[image_width]))),
'image/object/bbox/xmin': (tf.train.Feature(
float_list=tf.train.FloatList(value=xmins))),
'image/object/bbox/xmax': (tf.train.Feature(
float_list=tf.train.FloatList(value=xmaxs))),
'image/object/bbox/ymin': (tf.train.Feature(
float_list=tf.train.FloatList(value=ymins))),
'image/object/bbox/ymax': (tf.train.Feature(
float_list=tf.train.FloatList(value=ymaxs))),
'image/object/class/label': (tf.train.Feature(
int64_list=tf.train.Int64List(value=labels))),
'image/object/class/text': (tf.train.Feature(
bytes_list=tf.train.BytesList(value=labels_text))),
'image/object/is_crowd': (tf.train.Feature(
int64_list=tf.train.Int64List(value=is_crowds))),
'image/object/area': (tf.train.Feature(
float_list=tf.train.FloatList(value=areas))),
'image/object/mask': (tf.train.Feature(
bytes_list=tf.train.BytesList(value=masks))),
}))
def create_segmentation_test_example(image_height: int, image_width: int,
image_channel: int) -> tf.train.Example:
"""Creates and returns a test example containing mask annotations.
Args:
image_height: The height of test image.
image_width: The width of test image.
image_channel: The channel of test image.
Returns:
A tf.train.Example for testing.
"""
image = make_image_bytes([image_height, image_width, image_channel])
mask = make_image_bytes([image_height, image_width], fmt='PNG')
return tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded': (tf.train.Feature(
bytes_list=tf.train.BytesList(value=[image]))),
'image/segmentation/class/encoded': (tf.train.Feature(
bytes_list=tf.train.BytesList(value=[mask]))),
'image/height': (tf.train.Feature(
int64_list=tf.train.Int64List(value=[image_height]))),
'image/width': (tf.train.Feature(
int64_list=tf.train.Int64List(value=[image_width])))
}))
...@@ -31,7 +31,7 @@ def process_source_id(source_id: tf.Tensor) -> tf.Tensor: ...@@ -31,7 +31,7 @@ def process_source_id(source_id: tf.Tensor) -> tf.Tensor:
A formatted source ID. A formatted source ID.
""" """
if source_id.dtype == tf.string: if source_id.dtype == tf.string:
source_id = tf.cast(tf.strings.to_number(source_id), tf.int64) source_id = tf.strings.to_number(source_id, tf.int64)
with tf.control_dependencies([source_id]): with tf.control_dependencies([source_id]):
source_id = tf.cond( source_id = tf.cond(
pred=tf.equal(tf.size(input=source_id), 0), pred=tf.equal(tf.size(input=source_id), 0),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment