Commit 0de0482e authored by Jiageng Zhang's avatar Jiageng Zhang Committed by A. Unique TensorFlower
Browse files

Internal change

PiperOrigin-RevId: 474934779
parent fe7bdaf9
...@@ -20,9 +20,12 @@ from official.core import base_trainer ...@@ -20,9 +20,12 @@ from official.core import base_trainer
from official.core import config_definitions from official.core import config_definitions
from official.core import exp_factory from official.core import exp_factory
from official.core import export_base from official.core import export_base
from official.core import file_writers
from official.core import input_reader from official.core import input_reader
from official.core import registry from official.core import registry
from official.core import savedmodel_checkpoint_manager from official.core import savedmodel_checkpoint_manager
from official.core import task_factory from official.core import task_factory
from official.core import tf_example_builder
from official.core import tf_example_feature_key
from official.core import train_lib from official.core import train_lib
from official.core import train_utils from official.core import train_utils
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""File writer functions for dataset preparation, infra validation, and unit tests."""
import io
from typing import Optional, Sequence, Union
import riegeli
import tensorflow as tf
def write_small_dataset(examples: Sequence[Union[tf.train.Example,
tf.train.SequenceExample]],
output_path: str,
file_type: str = 'tfrecord') -> None:
"""Writes `examples` to a file at `output_path` with type `file_type`.
CAVEAT: This function is not recommended for writing large datasets, since it
will loop through `examples` and perform write operation sequentially.
Args:
examples: List of tf.train.Example or tf.train.SequenceExample.
output_path: Output path for the dataset.
file_type: A string indicating the file format, could be: 'tfrecord',
'tfrecord_compressed', 'riegeli'.
"""
file_type = file_type.lower()
if file_type == 'tfrecord':
_write_tfrecord(examples, output_path)
elif file_type == 'tfrecord_compressed':
_write_tfrecord(examples, output_path,
tf.io.TFRecordOptions(compression_type='GZIP'))
elif file_type == 'riegeli':
_write_riegeli(examples, output_path)
else:
raise ValueError(f'Unknown file_type: {file_type}')
def _write_tfrecord(examples: Sequence[Union[tf.train.Example,
tf.train.SequenceExample]],
output_path: str,
options: Optional[tf.io.TFRecordOptions] = None) -> None:
"""Writes `examples` to a TFRecord file at `output_path`.
Args:
examples: A list of tf.train.Example.
output_path: Output path for the dataset.
options: Options used for manipulating TFRecord files.
"""
with tf.io.TFRecordWriter(output_path, options) as writer:
for example in examples:
writer.write(example.SerializeToString())
def _write_riegeli(examples: Sequence[Union[tf.train.Example,
tf.train.SequenceExample]],
output_path: str) -> None:
"""Writes `examples` to a Riegeli file at `output_path`.
Args:
examples: A list of tf.train.Example.
output_path: Output path for the dataset.
"""
with io.FileIO(output_path, 'wb') as fileio:
with riegeli.RecordWriter(fileio) as writer:
writer.write_messages(examples)
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for file_writers."""
import os
from absl.testing import parameterized
import tensorflow as tf
from official.core import file_writers
from official.core import tf_example_builder
class FileWritersTest(tf.test.TestCase, parameterized.TestCase):
def setUp(self):
super().setUp()
example_builder = tf_example_builder.TfExampleBuilder()
example_builder.add_bytes_feature('foo', 'Hello World!')
self._example = example_builder.example
@parameterized.parameters('tfrecord', 'TFRecord', 'tfrecord_compressed',
'TFRecord_Compressed', 'riegeli', 'Riegeli')
def test_write_small_dataset_success(self, file_type):
temp_dir = self.create_tempdir()
temp_dataset_file = os.path.join(temp_dir.full_path, 'train')
file_writers.write_small_dataset([self._example], temp_dataset_file,
file_type)
self.assertTrue(os.path.exists(temp_dataset_file))
def test_write_small_dataset_unrecognized_format(self):
file_type = 'bar'
temp_dir = self.create_tempdir()
temp_dataset_file = os.path.join(temp_dir.full_path, 'train')
with self.assertRaises(ValueError):
file_writers.write_small_dataset([self._example], temp_dataset_file,
file_type)
if __name__ == '__main__':
tf.test.main()
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Builder class for preparing tf.train.Example."""
# https://www.python.org/dev/peps/pep-0563/#enabling-the-future-behavior-in-python-3-7
from __future__ import annotations
from typing import Mapping, Sequence, Union
import numpy as np
import tensorflow as tf
BytesValueType = Union[bytes, Sequence[bytes], str, Sequence[str]]
_to_array = lambda v: [v] if not isinstance(v, (list, np.ndarray)) else v
_to_bytes = lambda v: v.encode() if isinstance(v, str) else v
_to_bytes_array = lambda v: list(map(_to_bytes, _to_array(v)))
class TfExampleBuilder(object):
"""Builder class for preparing tf.train.Example.
Read API doc at https://www.tensorflow.org/api_docs/python/tf/train/Example.
Example usage:
>>> example_builder = TfExampleBuilder()
>>> example = (
example_builder.add_bytes_feature('feature_a', 'foobarbaz')
.add_ints_feature('feature_b', [1, 2, 3])
.example)
"""
def __init__(self) -> None:
self._example = tf.train.Example()
@property
def example(self) -> tf.train.Example:
"""Returns a copy of the generated tf.train.Example proto."""
return self._example
@property
def serialized_example(self) -> str:
"""Returns a serialized string of the generated tf.train.Example proto."""
return self._example.SerializeToString()
def set(self, example: tf.train.Example) -> TfExampleBuilder:
"""Sets the example."""
self._example = example
return self
def reset(self) -> TfExampleBuilder:
"""Resets the example to an empty proto."""
self._example = tf.train.Example()
return self
###### Basic APIs for primitive data types ######
def add_feature_dict(
self, feature_dict: Mapping[str, tf.train.Feature]) -> TfExampleBuilder:
"""Adds the predefined `feature_dict` to the example.
Note: Please prefer to using feature-type-specific methods.
Args:
feature_dict: A dictionary from tf.Example feature key to
tf.train.Feature.
Returns:
The builder object for subsequent method calls.
"""
for k, v in feature_dict.items():
self._example.features.feature[k].CopyFrom(v)
return self
def add_feature(self, key: str,
feature: tf.train.Feature) -> TfExampleBuilder:
"""Adds predefined `feature` with `key` to the example.
Args:
key: String key of the feature.
feature: The feature to be added to the example.
Returns:
The builder object for subsequent method calls.
"""
self._example.features.feature[key].CopyFrom(feature)
return self
def add_bytes_feature(self, key: str,
value: BytesValueType) -> TfExampleBuilder:
"""Adds byte(s) or string(s) with `key` to the example.
Args:
key: String key of the feature.
value: The byte(s) or string(s) to be added to the example.
Returns:
The builder object for subsequent method calls.
"""
return self.add_feature(
key,
tf.train.Feature(
bytes_list=tf.train.BytesList(value=_to_bytes_array(value))))
def add_ints_feature(self, key: str,
value: Union[int, Sequence[int]]) -> TfExampleBuilder:
"""Adds integer(s) with `key` to the example.
Args:
key: String key of the feature.
value: The integer(s) to be added to the example.
Returns:
The builder object for subsequent method calls.
"""
return self.add_feature(
key,
tf.train.Feature(int64_list=tf.train.Int64List(value=_to_array(value))))
def add_floats_feature(
self, key: str, value: Union[float, Sequence[float]]) -> TfExampleBuilder:
"""Adds float(s) with `key` to the example.
Args:
key: String key of the feature.
value: The float(s) to be added to the example.
Returns:
The builder object for subsequent method calls.
"""
return self.add_feature(
key,
tf.train.Feature(float_list=tf.train.FloatList(value=_to_array(value))))
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for tf_example_builder.
See `test_add_image_matrix_feature_with_fake_image` for the typical structure of
a unit test.
"""
from absl.testing import parameterized
import tensorflow as tf
from official.core import tf_example_builder
class TfExampleBuilderTest(tf.test.TestCase, parameterized.TestCase):
def test_init_an_empty_example(self):
example_builder = tf_example_builder.TfExampleBuilder()
example = example_builder.example
self.assertProtoEquals('', example)
def test_init_an_empty_serialized_example(self):
example_builder = tf_example_builder.TfExampleBuilder()
example = example_builder.serialized_example
self.assertProtoEquals('', example)
def test_add_feature(self):
example_builder = tf_example_builder.TfExampleBuilder()
example_builder.add_feature(
'foo',
tf.train.Feature(
bytes_list=tf.train.BytesList(value=[b'Hello World!'])))
example = example_builder.example
# Use proto text to show how the entire proto would look like.
self.assertProtoEquals(
"""
features: {
feature: {
key: "foo"
value: {
bytes_list: {
value: "Hello World!"
}
}
}
}""", example)
def test_add_feature_dict(self):
example_builder = tf_example_builder.TfExampleBuilder()
example_builder.add_feature_dict({
'foo':
tf.train.Feature(
bytes_list=tf.train.BytesList(value=[b'Hello World!'])),
'bar':
tf.train.Feature(
int64_list=tf.train.Int64List(value=[299, 792, 458]))
})
example = example_builder.example
# Use proto text to show how the entire proto would look like.
self.assertProtoEquals(
"""
features: {
feature: {
key: "foo"
value: {
bytes_list: {
value: "Hello World!"
}
}
}
feature: {
key: "bar"
value: {
int64_list: {
value: 299
value: 792
value: 458
}
}
}
}""", example)
@parameterized.named_parameters(
('single_bytes', b'Hello World!', b'Hello World!'),
('single_string', 'Hello World!', b'Hello World!'))
def test_add_single_byte_feature(self, value, expected_value):
example_builder = tf_example_builder.TfExampleBuilder()
example_builder.add_bytes_feature('foo', value)
example = example_builder.example
# Use constructor to easily work with test parameters.
self.assertProtoEquals(
tf.train.Example(
features=tf.train.Features(
feature={
'foo':
tf.train.Feature(
bytes_list=tf.train.BytesList(
value=[expected_value]))
})), example)
@parameterized.named_parameters(
('multiple_bytes', [b'Hello World!', b'Good Morning!'
], [b'Hello World!', b'Good Morning!']),
('multiple_sring', ['Hello World!', 'Good Morning!'
], [b'Hello World!', b'Good Morning!']))
def test_add_multiple_bytes_feature(self, values, expected_values):
example_builder = tf_example_builder.TfExampleBuilder()
example_builder.add_bytes_feature('foo', values)
example = example_builder.example
self.assertProtoEquals(
tf.train.Example(
features=tf.train.Features(
feature={
'foo':
tf.train.Feature(
bytes_list=tf.train.BytesList(
value=expected_values))
})), example)
@parameterized.named_parameters(
('single_integer', 123, [123]),
('multiple_integers', [123, 456, 789], [123, 456, 789]))
def test_add_ints_feature(self, value, expected_value):
example_builder = tf_example_builder.TfExampleBuilder()
example_builder.add_ints_feature('bar', value)
example = example_builder.example
self.assertProtoEquals(
tf.train.Example(
features=tf.train.Features(
feature={
'bar':
tf.train.Feature(
int64_list=tf.train.Int64List(value=expected_value))
})), example)
@parameterized.named_parameters(
('single_float', 3.14, [3.14]),
('multiple_floats', [3.14, 1.57, 6.28], [3.14, 1.57, 6.28]))
def test_add_floats_feature(self, value, expected_value):
example_builder = tf_example_builder.TfExampleBuilder()
example_builder.add_floats_feature('baz', value)
example = example_builder.example
self.assertProtoEquals(
tf.train.Example(
features=tf.train.Features(
feature={
'baz':
tf.train.Feature(
float_list=tf.train.FloatList(value=expected_value))
})), example)
if __name__ == '__main__':
tf.test.main()
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Data classes for tf.Example proto feature keys.
Feature keys are grouped by feature types. Key names follow conventions in
go/tf-example.
"""
import dataclasses
import functools
from typing import Optional
# Disable init function to use the one defined in base class.
dataclass = functools.partial(dataclasses.dataclass(init=False))
@dataclass
class TfExampleFeatureKeyBase:
"""Base dataclass for defining tf.Example proto feature keys.
This class defines the logic of adding prefix to feature keys. Subclasses
will define feature keys for a specific feature type in data fields.
NOTE: Please follow subclass examples in this module to define feature keys
for a new feature type.
"""
def __init__(self, prefix: Optional[str] = None):
"""Instantiates the feature key class.
Adds a string prefix to all fields of a feature key instance if `prefix` is
not None nor empty.
Example usage:
>>> test_key = EncodedImageFeatureKey()
>>> test_key.encoded
image/encoded
>>> test_key = EncodedImageFeatureKey('prefix')
>>> test_key.encoded
prefix/image/encoded
Args:
prefix: A prefix string that will be added before the feature key string
with a trailing slash '/'.
"""
if prefix:
for field in dataclasses.fields(self):
key_name = field.name
key_value = getattr(self, key_name)
setattr(self, key_name, f'{prefix}/{key_value}')
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for tf_example_feature_key."""
import dataclasses
import inspect
from absl.testing import absltest
from absl.testing import parameterized
from official.core import tf_example_feature_key
@tf_example_feature_key.dataclass
class TestFeatureKey(tf_example_feature_key.TfExampleFeatureKeyBase):
test: str = 'foo/bar'
class TfExampleFeatureKeyTest(parameterized.TestCase):
def test_add_prefix_success(self):
test_key = TestFeatureKey('prefix')
self.assertEqual(test_key.test, 'prefix/foo/bar')
@parameterized.parameters(None, '')
def test_add_prefix_skip_success(self, prefix):
test_key = TestFeatureKey(prefix)
self.assertEqual(test_key.test, 'foo/bar')
def test_all_feature_key_classes_are_valid(self):
for _, obj in inspect.getmembers(tf_example_feature_key):
if inspect.isclass(obj):
self.assertTrue(dataclasses.is_dataclass(obj))
self.assertTrue(
issubclass(obj, tf_example_feature_key.TfExampleFeatureKeyBase))
if __name__ == '__main__':
absltest.main()
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Generates fake feature for testing and validation."""
import collections
from typing import Optional, Tuple, Union
import numpy as np
_RGB_CHANNELS = 3
def generate_image_np(height: int,
width: int,
num_channels: int = _RGB_CHANNELS) -> np.ndarray:
"""Returns a fake numpy image matrix array."""
return np.reshape(
np.mod(np.arange(height * width * num_channels), 255).astype(np.uint8),
newshape=(height, width, num_channels))
def generate_normalized_boxes_np(num_boxes: int) -> np.ndarray:
"""Returns a fake numpy normalized boxes array."""
xmins = np.reshape(np.arange(num_boxes) / (2 * num_boxes), (num_boxes, 1))
ymins = np.reshape(np.arange(num_boxes) / (2 * num_boxes), (num_boxes, 1))
xmaxs = xmins + .5
ymaxs = ymins + .5
return np.concatenate((ymins, xmins, ymaxs, xmaxs), axis=-1)
def generate_boxes_np(height: int, width: int, num_boxes: int) -> np.ndarray:
"""Returns a fake numpy absolute boxes array."""
normalized_boxes = generate_normalized_boxes_np(num_boxes)
normalized_boxes[:, 1::2] *= height
normalized_boxes[:, 0::2] *= width
return normalized_boxes
def generate_classes_np(num_classes: int,
size: Optional[int] = None) -> Union[int, np.ndarray]:
"""Returns a fake class or a fake numpy classes array."""
if size is None:
return num_classes - 1
return np.arange(size) % num_classes
def generate_confidences_np(
size: Optional[int] = None) -> Union[float, np.ndarray]:
"""Returns a fake confidence score or a fake numpy confidence score array."""
if size is None:
return 0.5
return np.arange(size) / size
def generate_instance_masks_np(height: int,
width: int,
boxes_np: np.ndarray,
normalized: bool = True) -> np.ndarray:
"""Returns a fake numpy instance mask matrices array."""
num_boxes = len(boxes_np)
instance_masks_np = np.zeros((num_boxes, height, width, 1))
if normalized:
boxes_np[:, 1::2] *= height
boxes_np[:, ::2] *= width
xmins = boxes_np[:, 0].astype(int)
ymins = boxes_np[:, 1].astype(int)
box_widths = boxes_np[:, 2].astype(int) - xmins
box_heights = boxes_np[:, 3].astype(int) - ymins
for i, (x, y, w, h) in enumerate(zip(xmins, ymins, box_widths, box_heights)):
instance_masks_np[i, y:y + h, x:x + w, :] = np.reshape(
np.mod(np.arange(h * w), 2).astype(np.uint8), newshape=(h, w, 1))
return instance_masks_np
def generate_semantic_mask_np(height: int, width: int,
num_classes: int) -> np.ndarray:
"""Returns a fake numpy semantic mask array."""
return generate_image_np(height, width, num_channels=1) % num_classes
def generate_panoptic_masks_np(
semantic_mask: np.ndarray, instance_masks: np.ndarray,
instance_classes: np.ndarray,
stuff_classes_offset: int) -> Tuple[np.ndarray, np.ndarray]:
"""Returns fake numpy panoptic category and instance mask arrays."""
panoptic_category_mask = np.zeros_like(semantic_mask)
panoptic_instance_mask = np.zeros_like(semantic_mask)
instance_ids = collections.defaultdict(int)
for instance_mask, instance_class in zip(instance_masks, instance_classes):
if instance_class == 0:
continue
instance_ids[instance_class] += 1
# If a foreground pixel is labelled previously, replace the old category
# class and instance ID with the new one.
foreground_indices = np.where(np.equal(instance_mask, 1))
# Note that instance class start from index 1.
panoptic_category_mask[foreground_indices] = instance_class + 1
panoptic_instance_mask[foreground_indices] = instance_ids[instance_class]
# If there are pixels remains unlablled (labelled as background), then the
# semantic labels will be used (if it has one).
# Note that in panoptic FPN, the panoptic labels are expected in this order,
# 0 (background), 1 ..., N (stuffs), N + 1, ..., N + M - 2 (things)
# N classes for stuff classes, without background class, and M classes for
# thing classes, with 0 representing the background class and 1 representing
# all stuff classes.
background_indices = np.where(np.equal(panoptic_category_mask, 0))
panoptic_category_mask[background_indices] = (
semantic_mask[background_indices] + stuff_classes_offset)
return panoptic_category_mask, panoptic_instance_mask
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Image-related utilities that are useful to prepare dataset."""
import dataclasses
import imghdr
import io
from typing import Tuple
import numpy as np
from PIL import Image
@dataclasses.dataclass
class ImageFormat:
"""Supported image formats.
For model development, this library should support the same image formats as
`tf.io.decode_image`[1].
[1]: https://www.tensorflow.org/api_docs/python/tf/io/decode_image
"""
bmp: str = 'BMP'
png: str = 'PNG'
jpeg: str = 'JPEG'
def validate_image_format(format_str: str) -> str:
"""Validates `format_str` and returns canonical format.
This function accepts image format in lower case and will returns the upper
case string as canonical format.
Args:
format_str: Image format string.
Returns:
Canonical image format string.
Raises:
ValueError: If the canonical format is not listed in `ImageFormat`.
"""
canonical_format = format_str.upper()
if canonical_format in dataclasses.asdict(ImageFormat()).values():
return canonical_format
raise ValueError(f'Image format is invalid: {format_str}')
def encode_image(image_np: np.ndarray, image_format: str) -> bytes:
"""Encodes `image_np` specified by `image_format`.
Args:
image_np: Numpy image array.
image_format: An enum specifying the format of the generated image.
Returns:
Encoded image string
"""
if len(image_np.shape) > 2 and image_np.shape[2] == 1:
image_pil = Image.fromarray(np.squeeze(image_np), 'L')
else:
image_pil = Image.fromarray(image_np)
with io.BytesIO() as output:
image_pil.save(output, format=validate_image_format(image_format))
return output.getvalue()
def decode_image(image_bytes: bytes) -> np.ndarray:
image_pil = Image.open(io.BytesIO(image_bytes))
image_np = np.array(image_pil)
if len(image_np.shape) < 3:
image_np = image_np[..., np.newaxis]
return image_np
def decode_image_metadata(image_bytes: bytes) -> Tuple[int, int, int, str]:
"""Decodes image metadata from encoded image string.
Args:
image_bytes: Encoded image string.
Returns:
A tuple of height, width, number of channels, and encoding format.
"""
image_np = decode_image(image_bytes)
# https://pillow.readthedocs.io/en/stable/reference/Image.html#image-attributes
height, width, num_channels = image_np.shape
image_format = imghdr.what(file=None, h=image_bytes)
return height, width, num_channels, validate_image_format(image_format)
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for image_utils."""
import imghdr
from unittest import mock
from absl.testing import parameterized
import tensorflow as tf
from official.vision.data import fake_feature_generator
from official.vision.data import image_utils
class ImageUtilsTest(parameterized.TestCase, tf.test.TestCase):
@parameterized.named_parameters(
('RGB_PNG', 128, 64, 3, 'PNG'), ('RGB_JPEG', 2, 1, 3, 'JPEG'),
('GREY_BMP', 32, 32, 1, 'BMP'), ('GREY_PNG', 128, 128, 1, 'png'))
def test_encode_image_then_decode_image(self, height, width, num_channels,
image_format):
image_np = fake_feature_generator.generate_image_np(height, width,
num_channels)
image_str = image_utils.encode_image(image_np, image_format)
actual_image_np = image_utils.decode_image(image_str)
# JPEG encoding does not keep the pixel value.
if image_format != 'JPEG':
self.assertAllClose(actual_image_np, image_np)
self.assertEqual(actual_image_np.shape, image_np.shape)
@parameterized.named_parameters(
('RGB_PNG', 128, 64, 3, 'PNG'), ('RGB_JPEG', 64, 128, 3, 'JPEG'),
('GREY_BMP', 32, 32, 1, 'BMP'), ('GREY_PNG', 128, 128, 1, 'png'))
def test_encode_image_then_decode_image_metadata(self, height, width,
num_channels, image_format):
image_np = fake_feature_generator.generate_image_np(height, width,
num_channels)
image_str = image_utils.encode_image(image_np, image_format)
(actual_height, actual_width, actual_num_channels, actual_format) = (
image_utils.decode_image_metadata(image_str))
self.assertEqual(actual_height, height)
self.assertEqual(actual_width, width)
self.assertEqual(actual_num_channels, num_channels)
self.assertEqual(actual_format, image_format.upper())
def test_encode_image_raise_error_with_invalid_image_format(self):
with self.assertRaisesRegex(ValueError, 'Image format is invalid: foo'):
image_np = fake_feature_generator.generate_image_np(2, 2, 1)
image_utils.encode_image(image_np, 'foo')
@mock.patch.object(imghdr, 'what', return_value='foo', autospec=True)
def test_decode_image_raise_error_with_invalid_image_format(self, _):
image_np = fake_feature_generator.generate_image_np(1, 1, 3)
image_str = image_utils.encode_image(image_np, 'PNG')
with self.assertRaisesRegex(ValueError, 'Image format is invalid: foo'):
image_utils.decode_image_metadata(image_str)
if __name__ == '__main__':
tf.test.main()
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Builder class for preparing tf.train.Example in vision tasks."""
# https://www.python.org/dev/peps/pep-0563/#enabling-the-future-behavior-in-python-3-7
from __future__ import annotations
import hashlib
from typing import Optional, Sequence, Union
import numpy as np
from official.core import tf_example_builder
from official.vision.data import image_utils
from official.vision.data import tf_example_feature_key
BytesValueType = Union[bytes, Sequence[bytes], str, Sequence[str]]
_to_array = lambda v: [v] if not isinstance(v, (list, np.ndarray)) else v
_to_bytes = lambda v: v.encode() if isinstance(v, str) else v
_to_bytes_array = lambda v: list(map(_to_bytes, _to_array(v)))
class TfExampleBuilder(tf_example_builder.TfExampleBuilder):
"""Builder class for preparing tf.train.Example in vision task.
Read API doc at https://www.tensorflow.org/api_docs/python/tf/train/Example.
"""
def add_image_matrix_feature(
self,
image_matrix: np.ndarray,
image_format: str = 'PNG',
image_source_id: Optional[bytes] = None,
feature_prefix: Optional[str] = None) -> 'TfExampleBuilder':
"""Encodes and adds image features to the example.
See `tf_example_feature_key.EncodedImageFeatureKey` for list of feature keys
that will be added to the example.
Example usages:
>>> example_builder = TfExampleBuilder()
* For adding RGB image feature with PNG encoding:
>>> example_builder.add_image_matrix_feature(image_matrix)
* For adding RGB image feature with a pre-generated source ID.
>>> example_builder.add_image_matrix_feature(
image_matrix, image_source_id=image_source_id)
* For adding single-channel depth image feature with JPEG encoding:
>>> example_builder.add_image_matrix_feature(
image_matrix, image_format=ImageFormat.JPEG,
feature_prefix='depth')
Args:
image_matrix: Numpy image matrix with shape (height, width, channels)
image_format: Image format string, defaults to 'PNG'.
image_source_id: Unique string ID to identify the image. Hashed image will
be used if the field is not provided.
feature_prefix: Feature prefix for image features.
Returns:
The builder object for subsequent method calls.
"""
encoded_image = image_utils.encode_image(image_matrix, image_format)
height, width, num_channels = image_matrix.shape
return self.add_encoded_image_feature(encoded_image, image_format, height,
width, num_channels, image_source_id,
feature_prefix)
def add_encoded_image_feature(
self,
encoded_image: bytes,
image_format: Optional[str] = None,
height: Optional[int] = None,
width: Optional[int] = None,
num_channels: Optional[int] = None,
image_source_id: Optional[bytes] = None,
feature_prefix: Optional[str] = None) -> 'TfExampleBuilder':
"""Adds encoded image features to the example.
See `tf_example_feature_key.EncodedImageFeatureKey` for list of feature keys
that will be added to the example.
Image format, height, width, and channels are inferred from the encoded
image bytes if any of them is not provided. Hashed image will be used if
pre-generated source ID is not provided.
Example usages:
>>> example_builder = TfExampleBuilder()
* For adding RGB image feature:
>>> example_builder.add_encoded_image_feature(image_bytes)
* For adding RGB image feature with pre-generated source ID:
>>> example_builder.add_encoded_image_feature(
image_bytes, image_source_id=image_source_id)
* For adding single-channel depth image feature:
>>> example_builder.add_encoded_image_feature(
image_bytes, feature_prefix='depth')
Args:
encoded_image: Encoded image string.
image_format: Image format string.
height: Number of rows.
width: Number of columns.
num_channels: Number of channels.
image_source_id: Unique string ID to identify the image.
feature_prefix: Feature prefix for image features.
Returns:
The builder object for subsequent method calls.
"""
if not all((height, width, num_channels, image_format)):
(height, width, num_channels, image_format) = (
image_utils.decode_image_metadata(encoded_image))
else:
image_format = image_utils.validate_image_format(image_format)
feature_key = tf_example_feature_key.EncodedImageFeatureKey(feature_prefix)
# If source ID is not provided, we use hashed encoded image as the source
# ID. Note that we only keep 24 bits to be consistent with the Model Garden
# requirement, which will transform the source ID into float32.
if not image_source_id:
hashed_image = int(hashlib.blake2s(encoded_image).hexdigest(), 16)
image_source_id = _to_bytes(str(hash(hashed_image) % ((1 << 24) + 1)))
return (
self.add_bytes_feature(feature_key.encoded, encoded_image)
.add_bytes_feature(feature_key.format, image_format)
.add_ints_feature(feature_key.height, [height])
.add_ints_feature(feature_key.width, [width])
.add_ints_feature(feature_key.num_channels, num_channels)
.add_bytes_feature(feature_key.source_id, image_source_id))
def add_boxes_feature(
self,
xmins: Sequence[float],
xmaxs: Sequence[float],
ymins: Sequence[float],
ymaxs: Sequence[float],
labels: Sequence[int],
confidences: Optional[Sequence[float]] = None,
normalized: bool = True,
feature_prefix: Optional[str] = None) -> 'TfExampleBuilder':
"""Adds box and label features to the example.
Four features will be generated for xmin, ymin, xmax, and ymax. One feature
will be generated for label. Different feature keys will be used for
normalized boxes and pixel-value boxes, depending on the value of
`normalized`.
Example usages:
>>> example_builder = TfExampleBuilder()
>>> example_builder.add_boxes_feature(xmins, xmaxs, ymins, ymaxs, labels)
Args:
xmins: A list of minimum X coordinates.
xmaxs: A list of maximum X coordinates.
ymins: A list of minimum Y coordinates.
ymaxs: A list of maximum Y coordinates.
labels: The labels of added boxes.
confidences: The confidences of added boxes.
normalized: Indicate if the coordinates of boxes are normalized.
feature_prefix: Feature prefix for added box features.
Returns:
The builder object for subsequent method calls.
"""
if normalized:
feature_key = tf_example_feature_key.BoxFeatureKey(feature_prefix)
else:
feature_key = tf_example_feature_key.BoxPixelFeatureKey(feature_prefix)
self.add_floats_feature(feature_key.xmin, xmins)
self.add_floats_feature(feature_key.xmax, xmaxs)
self.add_floats_feature(feature_key.ymin, ymins)
self.add_floats_feature(feature_key.ymax, ymaxs)
self.add_ints_feature(feature_key.label, labels)
if confidences is not None:
self.add_floats_feature(feature_key.confidence, confidences)
return self
def _compute_mask_areas(
self, instance_mask_matrices: np.ndarray) -> Sequence[float]:
return np.sum(
instance_mask_matrices, axis=(1, 2, 3),
dtype=np.float).flatten().tolist()
def add_instance_mask_matrices_feature(
self,
instance_mask_matrices: np.ndarray,
feature_prefix: Optional[str] = None) -> 'TfExampleBuilder':
"""Encodes and adds instance mask features to the example.
See `tf_example_feature_key.EncodedInstanceMaskFeatureKey` for list of
feature keys that will be added to the example. Please note that all masks
will be encoded as PNG images.
Example usages:
>>> example_builder = TfExampleBuilder()
>>> example_builder.add_instance_mask_matrices_feature(
instance_mask_matrices)
TODO(b/223653024): Provide a way to generate visualization mask from
feature mask.
Args:
instance_mask_matrices: Numpy instance mask matrices with shape
(num_instance, height, width, 1) or (num_instance, height, width).
feature_prefix: Feature prefix for instance mask features.
Returns:
The builder object for subsequent method calls.
"""
if len(instance_mask_matrices.shape) == 3:
instance_mask_matrices = instance_mask_matrices[..., np.newaxis]
mask_areas = self._compute_mask_areas(instance_mask_matrices)
encoded_instance_masks = list(
map(lambda x: image_utils.encode_image(x, 'PNG'),
instance_mask_matrices))
return self.add_encoded_instance_masks_feature(encoded_instance_masks,
mask_areas, feature_prefix)
def add_encoded_instance_masks_feature(
self,
encoded_instance_masks: Sequence[bytes],
mask_areas: Optional[Sequence[float]] = None,
feature_prefix: Optional[str] = None) -> 'TfExampleBuilder':
"""Adds encoded instance mask features to the example.
See `tf_example_feature_key.EncodedInstanceMaskFeatureKey` for list of
feature keys that will be added to the example.
Image area is inferred from the encoded instance mask bytes if not provided.
Example usages:
>>> example_builder = TfExampleBuilder()
>>> example_builder.add_encoded_instance_masks_feature(
instance_mask_bytes)
TODO(b/223653024): Provide a way to generate visualization mask from
feature mask.
Args:
encoded_instance_masks: A list of encoded instance mask string. Note that
the encoding is not changed in this function and it always assumes the
image is in "PNG" format.
mask_areas: Areas for each instance masks.
feature_prefix: Feature prefix for instance mask features.
Returns:
The builder object for subsequent method calls.
"""
encoded_instance_masks = _to_bytes_array(encoded_instance_masks)
if mask_areas is None:
instance_mask_matrices = np.array(
list(map(image_utils.decode_image, encoded_instance_masks)))
mask_areas = self._compute_mask_areas(instance_mask_matrices)
feature_key = tf_example_feature_key.EncodedInstanceMaskFeatureKey(
feature_prefix)
return (
self.add_bytes_feature(feature_key.mask, encoded_instance_masks)
.add_floats_feature(feature_key.area, mask_areas))
def add_semantic_mask_matrix_feature(
self,
mask_matrix: np.ndarray,
mask_format: str = 'PNG',
visualization_mask_matrix: Optional[np.ndarray] = None,
visualization_mask_format: str = 'PNG',
feature_prefix: Optional[str] = None) -> 'TfExampleBuilder':
"""Encodes and adds semantic mask features to the example.
See `tf_example_feature_key.EncodedSemanticMaskFeatureKey` for list of
feature keys that will be added to the example.
Example usages:
>>> example_builder = TfExampleBuilder()
* For adding semantic mask feature:
>>> example_builder.add_semantic_mask_matrix_feature(
semantic_mask_matrix)
* For adding semantic mask feature and visualization mask feature:
>>> example_builder.add_semantic_mask_matrix_feature(
semantic_mask_matrix,
visualization_mask_matrix=visualization_mask_matrix)
* For adding predicted semantic mask feature with visualization mask:
>>> example_builder.add_encoded_semantic_mask_feature(
predicted_mask_matrix,
visualization_mask_matrix=predicted_visualization_mask_matrix,
feature_prefix='predicted')
TODO(b/223653024): Provide a way to generate visualization mask from
feature mask.
Args:
mask_matrix: Numpy semantic mask matrix with shape (height, width, 1) or
(height, width).
mask_format: Mask format string, defaults to 'PNG'.
visualization_mask_matrix: Numpy visualization mask matrix for semantic
mask with shape (height, width, 3).
visualization_mask_format: Visualization mask format string, defaults to
'PNG'.
feature_prefix: Feature prefix for semantic mask features.
Returns:
The builder object for subsequent method calls.
"""
if len(mask_matrix.shape) == 2:
mask_matrix = mask_matrix[..., np.newaxis]
encoded_mask = image_utils.encode_image(mask_matrix, mask_format)
encoded_visualization_mask = None
if visualization_mask_matrix is not None:
encoded_visualization_mask = image_utils.encode_image(
visualization_mask_matrix, visualization_mask_format)
return self.add_encoded_semantic_mask_feature(encoded_mask, mask_format,
encoded_visualization_mask,
visualization_mask_format,
feature_prefix)
def add_encoded_semantic_mask_feature(
self, encoded_mask: bytes,
mask_format: str = 'PNG',
encoded_visualization_mask: Optional[bytes] = None,
visualization_mask_format: str = 'PNG',
feature_prefix: Optional[str] = None) -> 'TfExampleBuilder':
"""Adds encoded semantic mask features to the example.
See `tf_example_feature_key.EncodedSemanticMaskFeatureKey` for list of
feature keys that will be added to the example.
Example usages:
>>> example_builder = TfExampleBuilder()
* For adding semantic mask feature:
>>> example_builder.add_encoded_semantic_mask_feature(semantic_mask_bytes)
* For adding semantic mask feature and visualization mask feature:
>>> example_builder.add_encoded_semantic_mask_feature(
semantic_mask_bytes,
encoded_visualization_mask=visualization_mask_bytes)
* For adding predicted semantic mask feature with visualization mask:
>>> example_builder.add_encoded_semantic_mask_feature(
predicted_mask_bytes,
encoded_visualization_mask=predicted_visualization_mask_bytes,
feature_prefix='predicted')
TODO(b/223653024): Provide a way to generate visualization mask from
feature mask.
Args:
encoded_mask: Encoded semantic mask string.
mask_format: Semantic mask format string, defaults to 'PNG'.
encoded_visualization_mask: Encoded visualization mask string.
visualization_mask_format: Visualization mask format string, defaults to
'PNG'.
feature_prefix: Feature prefix for semantic mask features.
Returns:
The builder object for subsequent method calls.
"""
feature_key = tf_example_feature_key.EncodedSemanticMaskFeatureKey(
feature_prefix)
example_builder = (
self.add_bytes_feature(feature_key.mask, encoded_mask)
.add_bytes_feature(feature_key.mask_format, mask_format))
if encoded_visualization_mask is not None:
example_builder = (
example_builder.add_bytes_feature(
feature_key.visualization_mask, encoded_visualization_mask)
.add_bytes_feature(
feature_key.visualization_mask_format, visualization_mask_format))
return example_builder
def add_panoptic_mask_matrix_feature(
self,
panoptic_category_mask_matrix: np.ndarray,
panoptic_instance_mask_matrix: np.ndarray,
panoptic_category_mask_format: str = 'PNG',
panoptic_instance_mask_format: str = 'PNG',
feature_prefix: Optional[str] = None) -> 'TfExampleBuilder':
"""Encodes and adds panoptic mask features to the example.
See `tf_example_feature_key.EncodedPanopticMaskFeatureKey` for list of
feature keys that will be added to the example.
Example usages:
>>> example_builder = TfExampleBuilder()
>>> example_builder.add_panoptic_mask_matrix_feature(
panoptic_category_mask_matrix, panoptic_instance_mask_matrix)
TODO(b/223653024): Provide a way to generate visualization mask from
feature mask.
Args:
panoptic_category_mask_matrix: Numpy panoptic category mask matrix with
shape (height, width, 1) or (height, width).
panoptic_instance_mask_matrix: Numpy panoptic instance mask matrix with
shape (height, width, 1) or (height, width).
panoptic_category_mask_format: Panoptic category mask format string,
defaults to 'PNG'.
panoptic_instance_mask_format: Panoptic instance mask format string,
defaults to 'PNG'.
feature_prefix: Feature prefix for panoptic mask features.
Returns:
The builder object for subsequent method calls.
"""
if len(panoptic_category_mask_matrix.shape) == 2:
panoptic_category_mask_matrix = (
panoptic_category_mask_matrix[..., np.newaxis])
if len(panoptic_instance_mask_matrix.shape) == 2:
panoptic_instance_mask_matrix = (
panoptic_instance_mask_matrix[..., np.newaxis])
encoded_panoptic_category_mask = image_utils.encode_image(
panoptic_category_mask_matrix, panoptic_category_mask_format)
encoded_panoptic_instance_mask = image_utils.encode_image(
panoptic_instance_mask_matrix, panoptic_instance_mask_format)
return self.add_encoded_panoptic_mask_feature(
encoded_panoptic_category_mask, encoded_panoptic_instance_mask,
panoptic_category_mask_format, panoptic_instance_mask_format,
feature_prefix)
def add_encoded_panoptic_mask_feature(
self,
encoded_panoptic_category_mask: bytes,
encoded_panoptic_instance_mask: bytes,
panoptic_category_mask_format: str = 'PNG',
panoptic_instance_mask_format: str = 'PNG',
feature_prefix: Optional[str] = None) -> 'TfExampleBuilder':
"""Adds encoded panoptic mask features to the example.
See `tf_example_feature_key.EncodedPanopticMaskFeatureKey` for list of
feature keys that will be added to the example.
Example usages:
>>> example_builder = TfExampleBuilder()
>>> example_builder.add_encoded_panoptic_mask_feature(
encoded_panoptic_category_mask, encoded_panoptic_instance_mask)
TODO(b/223653024): Provide a way to generate visualization mask from
feature mask.
Args:
encoded_panoptic_category_mask: Encoded panoptic category mask string.
encoded_panoptic_instance_mask: Encoded panoptic instance mask string.
panoptic_category_mask_format: Panoptic category mask format string,
defaults to 'PNG'.
panoptic_instance_mask_format: Panoptic instance mask format string,
defaults to 'PNG'.
feature_prefix: Feature prefix for panoptic mask features.
Returns:
The builder object for subsequent method calls.
"""
feature_key = tf_example_feature_key.EncodedPanopticMaskFeatureKey(
feature_prefix)
return (
self.add_bytes_feature(
feature_key.category_mask, encoded_panoptic_category_mask)
.add_bytes_feature(
feature_key.category_mask_format, panoptic_category_mask_format)
.add_bytes_feature(
feature_key.instance_mask, encoded_panoptic_instance_mask)
.add_bytes_feature(
feature_key.instance_mask_format, panoptic_instance_mask_format))
This diff is collapsed.
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Data classes for tf.Example proto feature keys in vision tasks.
Feature keys are grouped by feature types. Key names follow conventions in
go/tf-example.
"""
import dataclasses
import functools
from official.core import tf_example_feature_key
# Disable init function to use the one defined in base class.
dataclass = functools.partial(dataclasses.dataclass(init=False))
@dataclass
class EncodedImageFeatureKey(tf_example_feature_key.TfExampleFeatureKeyBase):
"""Feature keys for a single encoded image.
The image matrix is expected to be in the shape of (height, width,
num_channels).
Attributes:
encoded: encoded image bytes.
format: format string, e.g. 'PNG'.
height: number of rows.
width: number of columns.
num_channels: number of channels.
source_id: Unique string ID to identify the image.
"""
encoded: str = 'image/encoded'
format: str = 'image/format'
height: str = 'image/height'
width: str = 'image/width'
num_channels: str = 'image/channels'
source_id: str = 'image/source_id'
@dataclass
class BoxFeatureKey(tf_example_feature_key.TfExampleFeatureKeyBase):
"""Feature keys for normalized boxes representing objects in an image.
Each box is defined by ((ymin, xmin), (ymax, xmax)).
The origin point of an image matrix is top left.
Note: The coordinate values are normalized to [0, 1], this is commonly adopted
by most model implementations.
Attributes:
xmin: The x coordinate (column) of top-left corner.
xmax: The x coordinate (column) of bottom-right corner.
ymin: The y coordinate (row) of top-left corner.
ymax: The y coordinate (row) of bottom-right corner.
label: The class id.
confidence: The confidence score of the box, could be prior score (for
training) or predicted score (for prediction).
"""
xmin: str = 'image/object/bbox/xmin'
xmax: str = 'image/object/bbox/xmax'
ymin: str = 'image/object/bbox/ymin'
ymax: str = 'image/object/bbox/ymax'
label: str = 'image/object/class/label'
confidence: str = 'image/object/bbox/confidence'
@dataclass
class BoxPixelFeatureKey(tf_example_feature_key.TfExampleFeatureKeyBase):
"""Feature keys for boxes in pixel values representing objects in an image.
Each box is defined by ((ymin, xmin), (ymax, xmax)).
Note: The coordinate values are in the scale of the context image. The image
size is usually stored in `EncodedImageFeatureKey`.
Attributes:
xmin: The x coordinate (column) of top-left corner.
xmax: The x coordinate (column) of bottom-right corner.
ymin: The y coordinate (row) of top-left corner.
ymax: The y coordinate (row) of bottom-right corner.
label: The class id.
confidence: The confidence score of the box, could be prior score (for
training) or predicted score (for prediction).
"""
xmin: str = 'image/object/bbox/xmin_pixels'
xmax: str = 'image/object/bbox/xmax_pixels'
ymin: str = 'image/object/bbox/ymin_pixels'
ymax: str = 'image/object/bbox/ymax_pixels'
label: str = 'image/object/class/label'
confidence: str = 'image/object/bbox/confidence'
@dataclass
class EncodedInstanceMaskFeatureKey(
tf_example_feature_key.TfExampleFeatureKeyBase):
"""Feature keys for a single encoded instance mask.
The instance mask matrices are expected to be in the shape of (num_instances,
height, width, 1) or (num_instance, height, width). The height and width
correspond to the image height and width. For each instance mask, the pixel
value is either 0, representing a background, or 1, representing the object.
TODO(b/223653024): Add keys for visualization mask as well.
Attributes:
mask: Encoded instance mask bytes.
area: Total number of pixels that are marked as objects.
"""
mask: str = 'image/object/mask'
area: str = 'image/object/area'
@dataclass
class EncodedSemanticMaskFeatureKey(
tf_example_feature_key.TfExampleFeatureKeyBase):
"""Feature keys for a encoded semantic mask and its associated images.
The semantic mask matrix is expected to be in the shape of (height, width, 1)
or (height, width). The visualization mask matrix is expected to be in the
shape of (height, width, 3). The height and width correspond to the image
height and width. Each pixel in the semantic mask respresents a class.
Attributes:
mask: Encoded semantic mask bytes.
mask_format: Format string for semantic mask, e.g. 'PNG'.
visualization_mask: Encoded visualization mask bytes.
visualization_mask_format: Format string for visualization mask, e.g.
'PNG'.
"""
mask: str = 'image/segmentation/class/encoded'
mask_format: str = 'image/segmentation/class/format'
visualization_mask: str = 'image/segmentation/class/visualization/encoded'
visualization_mask_format: str = 'image/segmentation/class/visualization/format'
@dataclass
class EncodedPanopticMaskFeatureKey(
tf_example_feature_key.TfExampleFeatureKeyBase):
"""Feature keys for encoded panoptic category and instance masks.
Both panoptic mask matrices are expected to be in the shape of (height, width,
1) or (height, width). The height and width correspond to the image height and
width. For category mask, each pixel represents a class ID, and for instance
mask, each pixel represents an instance ID.
TODO(b/223653024): Add keys for visualization mask as well.
Attributes:
category_mask: Encoded panoptic category mask bytes.
category_mask_format: Format string for panoptic category mask, e.g.
'PNG'.
instance_mask: Encoded panoptic instance mask bytes.
instance_mask_format: Format string for panoptic instance mask, e.g.
'PNG'.
"""
category_mask: str = 'image/panoptic/category/encoded'
category_mask_format: str = 'image/panoptic/category/format'
instance_mask: str = 'image/panoptic/instance/encoded'
instance_mask_format: str = 'image/panoptic/instance/format'
...@@ -42,32 +42,32 @@ class FooTrainTest(tf.test.TestCase): ...@@ -42,32 +42,32 @@ class FooTrainTest(tf.test.TestCase):
``` ```
""" """
import io
from typing import Sequence, Union from typing import Sequence, Union
import numpy as np import numpy as np
from PIL import Image
import tensorflow as tf import tensorflow as tf
from official.core import file_writers
from official.vision.data import fake_feature_generator
from official.vision.data import image_utils
from official.vision.data import tf_example_builder
IMAGE_KEY = 'image/encoded' IMAGE_KEY = 'image/encoded'
CLASSIFICATION_LABEL_KEY = 'image/class/label' CLASSIFICATION_LABEL_KEY = 'image/class/label'
DISTILATION_LABEL_KEY = 'image/class/soft_labels' DISTILLATION_LABEL_KEY = 'image/class/soft_labels'
LABEL_KEY = 'clip/label/index' LABEL_KEY = 'clip/label/index'
AUDIO_KEY = 'features/audio' AUDIO_KEY = 'features/audio'
DUMP_SOURCE_ID = b'123' DUMP_SOURCE_ID = b'7435790'
def encode_image(image_array: np.array, fmt: str) -> bytes: def encode_image(image_array: np.ndarray, fmt: str) -> bytes:
image = Image.fromarray(image_array) return image_utils.encode_image(image_array, fmt)
with io.BytesIO() as output:
image.save(output, format=fmt)
return output.getvalue()
def make_image_bytes(shape: Sequence[int], fmt: str = 'JPEG') -> bytes: def make_image_bytes(shape: Sequence[int], fmt: str = 'JPEG') -> bytes:
"""Generates image and return bytes in specified format.""" """Generates image and return bytes in specified format."""
random_image = np.random.randint(0, 256, size=shape, dtype=np.uint8) image = fake_feature_generator.generate_image_np(*shape)
return encode_image(random_image, fmt=fmt) return encode_image(image, fmt=fmt)
def put_int64_to_context(seq_example: tf.train.SequenceExample, def put_int64_to_context(seq_example: tf.train.SequenceExample,
...@@ -114,84 +114,122 @@ def make_video_test_example(image_shape: Sequence[int] = (263, 320, 3), ...@@ -114,84 +114,122 @@ def make_video_test_example(image_shape: Sequence[int] = (263, 320, 3),
def dump_to_tfrecord(record_file: str, def dump_to_tfrecord(record_file: str,
tf_examples: Sequence[Union[tf.train.Example, tf_examples: Sequence[Union[tf.train.Example,
tf.train.SequenceExample]]): tf.train.SequenceExample]]):
"""Writes serialized Example to TFRecord file with path.""" """Writes serialized Example to TFRecord file with path.
with tf.io.TFRecordWriter(record_file) as writer:
for tf_example in tf_examples:
writer.write(tf_example.SerializeToString())
Note that examples are expected to be not seriazlied.
def _encode_image(image_array: np.ndarray, fmt: str) -> bytes: Args:
"""Util function to encode an image.""" record_file: The name of the output file.
image = Image.fromarray(image_array) tf_examples: A list of examples to be stored.
with io.BytesIO() as output: """
image.save(output, format=fmt) file_writers.write_small_dataset(tf_examples, record_file, 'tfrecord')
return output.getvalue()
def create_classification_example( def create_classification_example(
image_height: int, image_height: int,
image_width: int, image_width: int,
image_format: str = 'JPEG', image_format: str = 'JPEG',
is_multilabel: bool = False) -> tf.train.Example: is_multilabel: bool = False,
"""Creates image and labels for image classification input pipeline.""" output_serialized_example: bool = True) -> tf.train.Example:
image = _encode_image( """Creates image and labels for image classification input pipeline.
np.uint8(np.random.rand(image_height, image_width, 3) * 255),
fmt=image_format) Args:
labels = [0, 1] if is_multilabel else [0] image_height: The height of test image.
serialized_example = tf.train.Example( image_width: The width of test image.
features=tf.train.Features( image_format: The format of test image.
feature={ is_multilabel: A boolean flag represents whether the test image can have
IMAGE_KEY: (tf.train.Feature( multiple labels.
bytes_list=tf.train.BytesList(value=[image]))), output_serialized_example: A boolean flag represents whether to return a
CLASSIFICATION_LABEL_KEY: (tf.train.Feature( serialized example.
int64_list=tf.train.Int64List(value=labels))),
})).SerializeToString() Returns:
return serialized_example A tf.train.Example for testing.
"""
image = fake_feature_generator.generate_image_np(image_height, image_width)
labels = fake_feature_generator.generate_classes_np(2,
int(is_multilabel) +
1).tolist()
builder = tf_example_builder.TfExampleBuilder()
example = builder.add_image_matrix_feature(image,
image_format).add_ints_feature(
CLASSIFICATION_LABEL_KEY,
labels).example
if output_serialized_example:
return example.SerializeToString()
return example
def create_distillation_example( def create_distillation_example(
image_height: int, image_height: int,
image_width: int, image_width: int,
num_labels: int, num_labels: int,
image_format: str = 'JPEG') -> tf.train.Example: image_format: str = 'JPEG',
"""Creates image and labels for image classification with distillation.""" output_serialized_example: bool = True) -> tf.train.Example:
image = _encode_image( """Creates image and labels for image classification with distillation.
np.uint8(np.random.rand(image_height, image_width, 3) * 255),
fmt=image_format) Args:
soft_labels = [0.6] * num_labels image_height: The height of test image.
labels = [0] image_width: The width of test image.
serialized_example = tf.train.Example( num_labels: The number of labels used in test image.
features=tf.train.Features( image_format: The format of test image.
feature={ output_serialized_example: A boolean flag represents whether to return a
IMAGE_KEY: (tf.train.Feature( serialized example.
bytes_list=tf.train.BytesList(value=[image]))),
CLASSIFICATION_LABEL_KEY: (tf.train.Feature( Returns:
int64_list=tf.train.Int64List(value=labels))), A tf.train.Example for testing.
DISTILATION_LABEL_KEY: (tf.train.Feature( """
float_list=tf.train.FloatList(value=soft_labels))), image = fake_feature_generator.generate_image_np(image_height, image_width)
})).SerializeToString() labels = fake_feature_generator.generate_classes_np(2, 1).tolist()
return serialized_example soft_labels = (fake_feature_generator.generate_classes_np(1, num_labels) +
0.6).tolist()
builder = tf_example_builder.TfExampleBuilder()
def create_3d_image_test_example(image_height: int, image_width: int, example = builder.add_image_matrix_feature(image,
image_volume: int, image_format).add_ints_feature(
image_channel: int) -> tf.train.Example: CLASSIFICATION_LABEL_KEY,
"""Creates 3D image and label.""" labels).add_floats_feature(
images = np.random.rand(image_height, image_width, image_volume, DISTILLATION_LABEL_KEY,
image_channel) soft_labels).example
images = images.astype(np.float32) if output_serialized_example:
return example.SerializeToString()
labels = np.random.randint( return example
low=2, size=(image_height, image_width, image_volume, image_channel))
labels = labels.astype(np.float32)
def create_3d_image_test_example(
feature = { image_height: int,
IMAGE_KEY: (tf.train.Feature( image_width: int,
bytes_list=tf.train.BytesList(value=[images.tobytes()]))), image_volume: int,
CLASSIFICATION_LABEL_KEY: (tf.train.Feature( image_channel: int,
bytes_list=tf.train.BytesList(value=[labels.tobytes()]))) output_serialized_example: bool = False) -> tf.train.Example:
} """Creates 3D image and label.
return tf.train.Example(features=tf.train.Features(feature=feature))
Args:
image_height: The height of test 3D image.
image_width: The width of test 3D image.
image_volume: The volume of test 3D image.
image_channel: The channel of test 3D image.
output_serialized_example: A boolean flag represents whether to return a
serialized example.
Returns:
A tf.train.Example for testing.
"""
image = fake_feature_generator.generate_image_np(image_height, image_width,
image_channel)
images = image[:, :, np.newaxis, :]
images = np.tile(images, [1, 1, image_volume, 1]).astype(np.float32)
shape = [image_height, image_width, image_volume, image_channel]
labels = fake_feature_generator.generate_classes_np(
2, np.prod(shape)).reshape(shape).astype(np.float32)
builder = tf_example_builder.TfExampleBuilder()
example = builder.add_bytes_feature(IMAGE_KEY,
images.tobytes()).add_bytes_feature(
CLASSIFICATION_LABEL_KEY,
labels.tobytes()).example
if output_serialized_example:
return example.SerializeToString()
return example
def create_detection_test_example( def create_detection_test_example(
...@@ -199,7 +237,8 @@ def create_detection_test_example( ...@@ -199,7 +237,8 @@ def create_detection_test_example(
image_width: int, image_width: int,
image_channel: int, image_channel: int,
num_instances: int, num_instances: int,
fill_image_size: bool = True) -> tf.train.Example: fill_image_size: bool = True,
output_serialized_example: bool = False) -> tf.train.Example:
"""Creates and returns a test example containing box and mask annotations. """Creates and returns a test example containing box and mask annotations.
Args: Args:
...@@ -208,95 +247,64 @@ def create_detection_test_example( ...@@ -208,95 +247,64 @@ def create_detection_test_example(
image_channel: The channel of test image. image_channel: The channel of test image.
num_instances: The number of object instances per image. num_instances: The number of object instances per image.
fill_image_size: If image height and width will be added to the example. fill_image_size: If image height and width will be added to the example.
output_serialized_example: A boolean flag represents whether to return a
serialized example.
Returns: Returns:
A tf.train.Example for testing. A tf.train.Example for testing.
""" """
image = make_image_bytes([image_height, image_width, image_channel]) image = fake_feature_generator.generate_image_np(image_height, image_width,
if num_instances == 0: image_channel)
xmins = [] boxes = fake_feature_generator.generate_normalized_boxes_np(num_instances)
xmaxs = [] ymins, xmins, ymaxs, xmaxs = boxes.T.tolist()
ymins = [] is_crowds = [0] * num_instances
ymaxs = [] labels = fake_feature_generator.generate_classes_np(
labels = [] 2, size=num_instances).tolist()
areas = [] labels_text = [b'class_1'] * num_instances
is_crowds = [] masks = fake_feature_generator.generate_instance_masks_np(
masks = [] image_height, image_width, boxes)
labels_text = []
else: builder = tf_example_builder.TfExampleBuilder()
xmins = list(np.random.rand(num_instances))
xmaxs = list(np.random.rand(num_instances)) example = builder.add_image_matrix_feature(image).add_boxes_feature(
ymins = list(np.random.rand(num_instances)) xmins, xmaxs, ymins, ymaxs,
ymaxs = list(np.random.rand(num_instances)) labels).add_instance_mask_matrices_feature(masks).add_ints_feature(
labels_text = [b'class_1'] * num_instances 'image/object/is_crowd',
labels = list(np.random.randint(100, size=num_instances)) is_crowds).add_bytes_feature('image/object/class/text',
areas = [(xmax - xmin) * (ymax - ymin) * image_height * image_width labels_text).example
for xmin, xmax, ymin, ymax in zip(xmins, xmaxs, ymins, ymaxs)] if not fill_image_size:
is_crowds = [0] * num_instances del example.features.feature['image/height']
masks = [] del example.features.feature['image/width']
for _ in range(num_instances):
mask = make_image_bytes([image_height, image_width], fmt='PNG') if output_serialized_example:
masks.append(mask) return example.SerializeToString()
return example
feature = {
'image/encoded':
(tf.train.Feature(bytes_list=tf.train.BytesList(value=[image]))), def create_segmentation_test_example(
'image/source_id': (tf.train.Feature( image_height: int,
bytes_list=tf.train.BytesList(value=[DUMP_SOURCE_ID]))), image_width: int,
'image/object/bbox/xmin': image_channel: int,
(tf.train.Feature(float_list=tf.train.FloatList(value=xmins))), output_serialized_example: bool = False) -> tf.train.Example:
'image/object/bbox/xmax':
(tf.train.Feature(float_list=tf.train.FloatList(value=xmaxs))),
'image/object/bbox/ymin':
(tf.train.Feature(float_list=tf.train.FloatList(value=ymins))),
'image/object/bbox/ymax':
(tf.train.Feature(float_list=tf.train.FloatList(value=ymaxs))),
'image/object/class/label':
(tf.train.Feature(int64_list=tf.train.Int64List(value=labels))),
'image/object/class/text':
(tf.train.Feature(bytes_list=tf.train.BytesList(value=labels_text))),
'image/object/is_crowd':
(tf.train.Feature(int64_list=tf.train.Int64List(value=is_crowds))),
'image/object/area':
(tf.train.Feature(float_list=tf.train.FloatList(value=areas))),
'image/object/mask':
(tf.train.Feature(bytes_list=tf.train.BytesList(value=masks))),
}
if fill_image_size:
feature.update({
'image/height': (tf.train.Feature(
int64_list=tf.train.Int64List(value=[image_height]))),
'image/width': (tf.train.Feature(
int64_list=tf.train.Int64List(value=[image_width]))),
})
return tf.train.Example(features=tf.train.Features(feature=feature))
def create_segmentation_test_example(image_height: int, image_width: int,
image_channel: int) -> tf.train.Example:
"""Creates and returns a test example containing mask annotations. """Creates and returns a test example containing mask annotations.
Args: Args:
image_height: The height of test image. image_height: The height of test image.
image_width: The width of test image. image_width: The width of test image.
image_channel: The channel of test image. image_channel: The channel of test image.
output_serialized_example: A boolean flag represents whether to return a
serialized example.
Returns: Returns:
A tf.train.Example for testing. A tf.train.Example for testing.
""" """
image = make_image_bytes([image_height, image_width, image_channel]) image = fake_feature_generator.generate_image_np(image_height, image_width,
mask = make_image_bytes([image_height, image_width], fmt='PNG') image_channel)
return tf.train.Example( mask = fake_feature_generator.generate_semantic_mask_np(
features=tf.train.Features( image_height, image_width, 3)
feature={ builder = tf_example_builder.TfExampleBuilder()
'image/encoded': (tf.train.Feature( example = builder.add_image_matrix_feature(
bytes_list=tf.train.BytesList(value=[image]))), image).add_semantic_mask_matrix_feature(mask).example
'image/segmentation/class/encoded': (tf.train.Feature( if output_serialized_example:
bytes_list=tf.train.BytesList(value=[mask]))), return example.SerializeToString()
'image/height': (tf.train.Feature( return example
int64_list=tf.train.Int64List(value=[image_height]))),
'image/width': (tf.train.Feature(
int64_list=tf.train.Int64List(value=[image_width])))
}))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment