Commit cc748b2a authored by Abdullah Rashwan's avatar Abdullah Rashwan Committed by A. Unique TensorFlower
Browse files

Internal change

PiperOrigin-RevId: 329754787
parent 2f788e1d
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""The generic parser interface."""
import abc
class Parser(object):
"""Parses data and produces tensors to be consumed by models."""
__metaclass__ = abc.ABCMeta
@abc.abstractmethod
def _parse_train_data(self, decoded_tensors):
"""Generates images and labels that are usable for model training.
Args:
decoded_tensors: a dict of Tensors produced by the decoder.
Returns:
images: the image tensor.
labels: a dict of Tensors that contains labels.
"""
pass
@abc.abstractmethod
def _parse_eval_data(self, decoded_tensors):
"""Generates images and labels that are usable for model evaluation.
Args:
decoded_tensors: a dict of Tensors produced by the decoder.
Returns:
images: the image tensor.
labels: a dict of Tensors that contains labels.
"""
pass
def parse_fn(self, is_training):
"""Returns a parse fn that reads and parses raw tensors from the decoder.
Args:
is_training: a `bool` to indicate whether it is in training mode.
Returns:
parse: a `callable` that takes the serialized examle and generate the
images, labels tuple where labels is a dict of Tensors that contains
labels.
"""
def parse(decoded_tensors):
"""Parses the serialized example data."""
if is_training:
return self._parse_train_data(decoded_tensors)
else:
return self._parse_eval_data(decoded_tensors)
return parse
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Data parser and processing for RetinaNet.
Parse image and ground truths in a dataset to training targets and package them
into (image, labels) tuple for RetinaNet.
"""
# Import libraries
import tensorflow as tf
from official.vision.beta.dataloaders import parser
from official.vision.beta.dataloaders import utils
from official.vision.beta.ops import anchor
from official.vision.beta.ops import box_ops
from official.vision.beta.ops import preprocess_ops
class Parser(parser.Parser):
"""Parser to parse an image and its annotations into a dictionary of tensors."""
def __init__(self,
output_size,
min_level,
max_level,
num_scales,
aspect_ratios,
anchor_size,
match_threshold=0.5,
unmatched_threshold=0.5,
aug_rand_hflip=False,
aug_scale_min=1.0,
aug_scale_max=1.0,
use_autoaugment=False,
autoaugment_policy_name='v0',
skip_crowd_during_training=True,
max_num_instances=100,
dtype='bfloat16',
mode=None):
"""Initializes parameters for parsing annotations in the dataset.
Args:
output_size: `Tensor` or `list` for [height, width] of output image. The
output_size should be divided by the largest feature stride 2^max_level.
min_level: `int` number of minimum level of the output feature pyramid.
max_level: `int` number of maximum level of the output feature pyramid.
num_scales: `int` number representing intermediate scales added on each
level. For instances, num_scales=2 adds one additional intermediate
anchor scales [2^0, 2^0.5] on each level.
aspect_ratios: `list` of float numbers representing the aspect raito
anchors added on each level. The number indicates the ratio of width to
height. For instances, aspect_ratios=[1.0, 2.0, 0.5] adds three anchors
on each scale level.
anchor_size: `float` number representing the scale of size of the base
anchor to the feature stride 2^level.
match_threshold: `float` number between 0 and 1 representing the
lower-bound threshold to assign positive labels for anchors. An anchor
with a score over the threshold is labeled positive.
unmatched_threshold: `float` number between 0 and 1 representing the
upper-bound threshold to assign negative labels for anchors. An anchor
with a score below the threshold is labeled negative.
aug_rand_hflip: `bool`, if True, augment training with random horizontal
flip.
aug_scale_min: `float`, the minimum scale applied to `output_size` for
data augmentation during training.
aug_scale_max: `float`, the maximum scale applied to `output_size` for
data augmentation during training.
use_autoaugment: `bool`, if True, use the AutoAugment augmentation policy
during training.
autoaugment_policy_name: `string` that specifies the name of the
AutoAugment policy that will be used during training.
skip_crowd_during_training: `bool`, if True, skip annotations labeled with
`is_crowd` equals to 1.
max_num_instances: `int` number of maximum number of instances in an
image. The groundtruth data will be padded to `max_num_instances`.
dtype: `str`, data type. One of {`bfloat16`, `float32`, `float16`}.
mode: a ModeKeys. Specifies if this is training, evaluation, prediction or
prediction with groundtruths in the outputs.
"""
self._mode = mode
self._max_num_instances = max_num_instances
self._skip_crowd_during_training = skip_crowd_during_training
# Anchor.
self._output_size = output_size
self._min_level = min_level
self._max_level = max_level
self._num_scales = num_scales
self._aspect_ratios = aspect_ratios
self._anchor_size = anchor_size
self._match_threshold = match_threshold
self._unmatched_threshold = unmatched_threshold
# Data augmentation.
self._aug_rand_hflip = aug_rand_hflip
self._aug_scale_min = aug_scale_min
self._aug_scale_max = aug_scale_max
# Data Augmentation with AutoAugment.
self._use_autoaugment = use_autoaugment
self._autoaugment_policy_name = autoaugment_policy_name
# Device.
self._use_bfloat16 = True if dtype == 'bfloat16' else False
def _parse_train_data(self, data):
"""Parses data for training and evaluation."""
classes = data['groundtruth_classes']
boxes = data['groundtruth_boxes']
is_crowds = data['groundtruth_is_crowd']
# Skips annotations with `is_crowd` = True.
if self._skip_crowd_during_training:
num_groundtrtuhs = tf.shape(input=classes)[0]
with tf.control_dependencies([num_groundtrtuhs, is_crowds]):
indices = tf.cond(
pred=tf.greater(tf.size(input=is_crowds), 0),
true_fn=lambda: tf.where(tf.logical_not(is_crowds))[:, 0],
false_fn=lambda: tf.cast(tf.range(num_groundtrtuhs), tf.int64))
classes = tf.gather(classes, indices)
boxes = tf.gather(boxes, indices)
# Gets original image and its size.
image = data['image']
image_shape = tf.shape(input=image)[0:2]
# Normalizes image with mean and std pixel values.
image = preprocess_ops.normalize_image(image)
# Flips image randomly during training.
if self._aug_rand_hflip:
image, boxes, _ = preprocess_ops.random_horizontal_flip(image, boxes)
# Converts boxes from normalized coordinates to pixel coordinates.
boxes = box_ops.denormalize_boxes(boxes, image_shape)
# Resizes and crops image.
image, image_info = preprocess_ops.resize_and_crop_image(
image,
self._output_size,
padded_size=preprocess_ops.compute_padded_size(self._output_size,
2**self._max_level),
aug_scale_min=self._aug_scale_min,
aug_scale_max=self._aug_scale_max)
image_height, image_width, _ = image.get_shape().as_list()
# Resizes and crops boxes.
image_scale = image_info[2, :]
offset = image_info[3, :]
boxes = preprocess_ops.resize_and_crop_boxes(boxes, image_scale,
image_info[1, :], offset)
# Filters out ground truth boxes that are all zeros.
indices = box_ops.get_non_empty_box_indices(boxes)
boxes = tf.gather(boxes, indices)
classes = tf.gather(classes, indices)
# Assigns anchors.
input_anchor = anchor.build_anchor_generator(
min_level=self._min_level,
max_level=self._max_level,
num_scales=self._num_scales,
aspect_ratios=self._aspect_ratios,
anchor_size=self._anchor_size)
anchor_boxes = input_anchor(image_size=(image_height, image_width))
anchor_labeler = anchor.AnchorLabeler(self._match_threshold,
self._unmatched_threshold)
(cls_targets, box_targets, cls_weights,
box_weights) = anchor_labeler.label_anchors(
anchor_boxes, boxes,
tf.cast(tf.expand_dims(classes, axis=1), tf.float32))
# If bfloat16 is used, casts input image to tf.bfloat16.
if self._use_bfloat16:
image = tf.cast(image, dtype=tf.bfloat16)
# Packs labels for model_fn outputs.
labels = {
'cls_targets': cls_targets,
'box_targets': box_targets,
'anchor_boxes': anchor_boxes,
'cls_weights': cls_weights,
'box_weights': box_weights,
'image_info': image_info,
}
return image, labels
def _parse_eval_data(self, data):
"""Parses data for training and evaluation."""
groundtruths = {}
classes = data['groundtruth_classes']
boxes = data['groundtruth_boxes']
# Gets original image and its size.
image = data['image']
image_shape = tf.shape(input=image)[0:2]
# Normalizes image with mean and std pixel values.
image = preprocess_ops.normalize_image(image)
# Converts boxes from normalized coordinates to pixel coordinates.
boxes = box_ops.denormalize_boxes(boxes, image_shape)
# Resizes and crops image.
image, image_info = preprocess_ops.resize_and_crop_image(
image,
self._output_size,
padded_size=preprocess_ops.compute_padded_size(self._output_size,
2**self._max_level),
aug_scale_min=1.0,
aug_scale_max=1.0)
image_height, image_width, _ = image.get_shape().as_list()
# Resizes and crops boxes.
image_scale = image_info[2, :]
offset = image_info[3, :]
boxes = preprocess_ops.resize_and_crop_boxes(boxes, image_scale,
image_info[1, :], offset)
# Filters out ground truth boxes that are all zeros.
indices = box_ops.get_non_empty_box_indices(boxes)
boxes = tf.gather(boxes, indices)
classes = tf.gather(classes, indices)
# Assigns anchors.
input_anchor = anchor.build_anchor_generator(
min_level=self._min_level,
max_level=self._max_level,
num_scales=self._num_scales,
aspect_ratios=self._aspect_ratios,
anchor_size=self._anchor_size)
anchor_boxes = input_anchor(image_size=(image_height, image_width))
anchor_labeler = anchor.AnchorLabeler(self._match_threshold,
self._unmatched_threshold)
(cls_targets, box_targets, cls_weights,
box_weights) = anchor_labeler.label_anchors(
anchor_boxes, boxes,
tf.cast(tf.expand_dims(classes, axis=1), tf.float32))
# If bfloat16 is used, casts input image to tf.bfloat16.
if self._use_bfloat16:
image = tf.cast(image, dtype=tf.bfloat16)
# Sets up groundtruth data for evaluation.
groundtruths = {
'source_id': data['source_id'],
'height': data['height'],
'width': data['width'],
'num_detections': tf.shape(data['groundtruth_classes']),
'image_info': image_info,
'boxes': box_ops.denormalize_boxes(
data['groundtruth_boxes'], image_shape),
'classes': data['groundtruth_classes'],
'areas': data['groundtruth_area'],
'is_crowds': tf.cast(data['groundtruth_is_crowd'], tf.int32),
}
groundtruths['source_id'] = utils.process_source_id(
groundtruths['source_id'])
groundtruths = utils.pad_groundtruths_to_fixed_size(
groundtruths, self._max_num_instances)
# Packs labels for model_fn outputs.
labels = {
'cls_targets': cls_targets,
'box_targets': box_targets,
'anchor_boxes': anchor_boxes,
'cls_weights': cls_weights,
'box_weights': box_weights,
'image_info': image_info,
'groundtruths': groundtruths,
}
return image, labels
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for retinanet_parser.py."""
# Import libraries
from absl.testing import parameterized
import tensorflow as tf
from official.core import input_reader
from official.modeling.hyperparams import config_definitions as cfg
from official.vision.beta.dataloaders import retinanet_input
from official.vision.beta.dataloaders import tf_example_decoder
class RetinaNetInputTest(parameterized.TestCase, tf.test.TestCase):
@parameterized.parameters(
([512, 640], True, True, True),
([640, 640], False, False, False),
)
def testRetinanetInputReader(self,
output_size,
skip_crowd_during_training,
use_autoaugment,
is_training):
batch_size = 2
min_level = 3
max_level = 7
num_scales = 3
aspect_ratios = [0.5, 1.0, 2.0]
anchor_size = 3
max_num_instances = 100
params = cfg.DataConfig(
input_path='/placer/prod/home/snaggletooth/test/data/coco/val*',
global_batch_size=batch_size,
is_training=is_training)
decoder = tf_example_decoder.TfExampleDecoder()
parser = retinanet_input.Parser(
output_size=output_size,
min_level=min_level,
max_level=max_level,
num_scales=num_scales,
aspect_ratios=aspect_ratios,
anchor_size=anchor_size,
skip_crowd_during_training=skip_crowd_during_training,
use_autoaugment=use_autoaugment,
max_num_instances=max_num_instances,
dtype='bfloat16')
reader = input_reader.InputReader(
params,
dataset_fn=tf.data.TFRecordDataset,
decoder_fn=decoder.decode,
parser_fn=parser.parse_fn(params.is_training))
dataset = reader.read()
iterator = iter(dataset)
image, labels = next(iterator)
np_image = image.numpy()
np_labels = tf.nest.map_structure(lambda x: x.numpy(), labels)
# Checks image shape.
self.assertEqual(list(np_image.shape),
[batch_size, output_size[0], output_size[1], 3])
# Checks keys in labels.
if is_training:
self.assertCountEqual(
np_labels.keys(),
['cls_targets', 'box_targets', 'anchor_boxes', 'cls_weights',
'box_weights', 'image_info'])
else:
self.assertCountEqual(
np_labels.keys(),
['cls_targets', 'box_targets', 'anchor_boxes', 'cls_weights',
'box_weights', 'groundtruths', 'image_info'])
# Checks shapes of `image_info` and `anchor_boxes`.
self.assertEqual(np_labels['image_info'].shape, (batch_size, 4, 2))
n_anchors = 0
for level in range(min_level, max_level + 1):
stride = 2 ** level
output_size_l = [output_size[0] / stride, output_size[1] / stride]
anchors_per_location = num_scales * len(aspect_ratios)
self.assertEqual(
list(np_labels['anchor_boxes'][level].shape),
[batch_size, output_size_l[0], output_size_l[1],
4 * anchors_per_location])
n_anchors += output_size_l[0] * output_size_l[1] * anchors_per_location
# Checks shapes of training objectives.
self.assertEqual(np_labels['cls_weights'].shape, (batch_size, n_anchors))
for level in range(min_level, max_level + 1):
stride = 2 ** level
output_size_l = [output_size[0] / stride, output_size[1] / stride]
anchors_per_location = num_scales * len(aspect_ratios)
self.assertEqual(
list(np_labels['cls_targets'][level].shape),
[batch_size, output_size_l[0], output_size_l[1],
anchors_per_location])
self.assertEqual(
list(np_labels['box_targets'][level].shape),
[batch_size, output_size_l[0], output_size_l[1],
4 * anchors_per_location])
# Checks shape of groundtruths for eval.
if not is_training:
self.assertEqual(np_labels['groundtruths']['source_id'].shape,
(batch_size,))
self.assertEqual(np_labels['groundtruths']['classes'].shape,
(batch_size, max_num_instances))
self.assertEqual(np_labels['groundtruths']['boxes'].shape,
(batch_size, max_num_instances, 4))
self.assertEqual(np_labels['groundtruths']['areas'].shape,
(batch_size, max_num_instances))
self.assertEqual(np_labels['groundtruths']['is_crowds'].shape,
(batch_size, max_num_instances))
if __name__ == '__main__':
tf.test.main()
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tensorflow Example proto decoder for object detection.
A decoder to decode string tensors containing serialized tensorflow.Example
protos for object detection.
"""
import csv
# Import libraries
import tensorflow as tf
from official.vision.beta.dataloaders import decoder
def _generate_source_id(image_bytes):
return tf.strings.as_string(
tf.strings.to_hash_bucket_fast(image_bytes, 2 ** 63 - 1))
class TfExampleDecoder(decoder.Decoder):
"""Tensorflow Example proto decoder."""
def __init__(self,
include_mask=False,
regenerate_source_id=False):
self._include_mask = include_mask
self._regenerate_source_id = regenerate_source_id
self._keys_to_features = {
'image/encoded': tf.io.FixedLenFeature((), tf.string),
'image/source_id': tf.io.FixedLenFeature((), tf.string),
'image/height': tf.io.FixedLenFeature((), tf.int64),
'image/width': tf.io.FixedLenFeature((), tf.int64),
'image/object/bbox/xmin': tf.io.VarLenFeature(tf.float32),
'image/object/bbox/xmax': tf.io.VarLenFeature(tf.float32),
'image/object/bbox/ymin': tf.io.VarLenFeature(tf.float32),
'image/object/bbox/ymax': tf.io.VarLenFeature(tf.float32),
'image/object/class/label': tf.io.VarLenFeature(tf.int64),
'image/object/area': tf.io.VarLenFeature(tf.float32),
'image/object/is_crowd': tf.io.VarLenFeature(tf.int64),
}
if include_mask:
self._keys_to_features.update({
'image/object/mask': tf.io.VarLenFeature(tf.string),
})
def _decode_image(self, parsed_tensors):
"""Decodes the image and set its static shape."""
image = tf.io.decode_image(parsed_tensors['image/encoded'], channels=3)
image.set_shape([None, None, 3])
return image
def _decode_boxes(self, parsed_tensors):
"""Concat box coordinates in the format of [ymin, xmin, ymax, xmax]."""
xmin = parsed_tensors['image/object/bbox/xmin']
xmax = parsed_tensors['image/object/bbox/xmax']
ymin = parsed_tensors['image/object/bbox/ymin']
ymax = parsed_tensors['image/object/bbox/ymax']
return tf.stack([ymin, xmin, ymax, xmax], axis=-1)
def _decode_classes(self, parsed_tensors):
return parsed_tensors['image/object/class/label']
def _decode_areas(self, parsed_tensors):
xmin = parsed_tensors['image/object/bbox/xmin']
xmax = parsed_tensors['image/object/bbox/xmax']
ymin = parsed_tensors['image/object/bbox/ymin']
ymax = parsed_tensors['image/object/bbox/ymax']
height = tf.cast(parsed_tensors['image/height'], dtype=tf.float32)
width = tf.cast(parsed_tensors['image/width'], dtype=tf.float32)
return tf.cond(
tf.greater(tf.shape(parsed_tensors['image/object/area'])[0], 0),
lambda: parsed_tensors['image/object/area'],
lambda: (xmax - xmin) * (ymax - ymin) * height * width)
def _decode_masks(self, parsed_tensors):
"""Decode a set of PNG masks to the tf.float32 tensors."""
def _decode_png_mask(png_bytes):
mask = tf.squeeze(
tf.io.decode_png(png_bytes, channels=1, dtype=tf.uint8), axis=-1)
mask = tf.cast(mask, dtype=tf.float32)
mask.set_shape([None, None])
return mask
height = parsed_tensors['image/height']
width = parsed_tensors['image/width']
masks = parsed_tensors['image/object/mask']
return tf.cond(
pred=tf.greater(tf.size(input=masks), 0),
true_fn=lambda: tf.map_fn(_decode_png_mask, masks, dtype=tf.float32),
false_fn=lambda: tf.zeros([0, height, width], dtype=tf.float32))
def decode(self, serialized_example):
"""Decode the serialized example.
Args:
serialized_example: a single serialized tf.Example string.
Returns:
decoded_tensors: a dictionary of tensors with the following fields:
- source_id: a string scalar tensor.
- image: a uint8 tensor of shape [None, None, 3].
- height: an integer scalar tensor.
- width: an integer scalar tensor.
- groundtruth_classes: a int64 tensor of shape [None].
- groundtruth_is_crowd: a bool tensor of shape [None].
- groundtruth_area: a float32 tensor of shape [None].
- groundtruth_boxes: a float32 tensor of shape [None, 4].
- groundtruth_instance_masks: a float32 tensor of shape
[None, None, None].
- groundtruth_instance_masks_png: a string tensor of shape [None].
"""
parsed_tensors = tf.io.parse_single_example(
serialized=serialized_example, features=self._keys_to_features)
for k in parsed_tensors:
if isinstance(parsed_tensors[k], tf.SparseTensor):
if parsed_tensors[k].dtype == tf.string:
parsed_tensors[k] = tf.sparse.to_dense(
parsed_tensors[k], default_value='')
else:
parsed_tensors[k] = tf.sparse.to_dense(
parsed_tensors[k], default_value=0)
if self._regenerate_source_id:
source_id = _generate_source_id(parsed_tensors['image/encoded'])
else:
source_id = tf.cond(
tf.greater(tf.strings.length(parsed_tensors['image/source_id']), 0),
lambda: parsed_tensors['image/source_id'],
lambda: _generate_source_id(parsed_tensors['image/encoded']))
image = self._decode_image(parsed_tensors)
boxes = self._decode_boxes(parsed_tensors)
classes = self._decode_classes(parsed_tensors)
areas = self._decode_areas(parsed_tensors)
is_crowds = tf.cond(
tf.greater(tf.shape(parsed_tensors['image/object/is_crowd'])[0], 0),
lambda: tf.cast(parsed_tensors['image/object/is_crowd'], dtype=tf.bool),
lambda: tf.zeros_like(classes, dtype=tf.bool))
if self._include_mask:
masks = self._decode_masks(parsed_tensors)
decoded_tensors = {
'source_id': source_id,
'image': image,
'height': parsed_tensors['image/height'],
'width': parsed_tensors['image/width'],
'groundtruth_classes': classes,
'groundtruth_is_crowd': is_crowds,
'groundtruth_area': areas,
'groundtruth_boxes': boxes,
}
if self._include_mask:
decoded_tensors.update({
'groundtruth_instance_masks': masks,
'groundtruth_instance_masks_png': parsed_tensors['image/object/mask'],
})
return decoded_tensors
class TfExampleDecoderLabelMap(TfExampleDecoder):
"""Tensorflow Example proto decoder."""
def __init__(self, label_map, include_mask=False, regenerate_source_id=False):
super(TfExampleDecoderLabelMap, self).__init__(
include_mask=include_mask, regenerate_source_id=regenerate_source_id)
self._keys_to_features.update({
'image/object/class/text': tf.io.VarLenFeature(tf.string),
})
name_to_id = self._process_label_map(label_map)
self._name_to_id_table = tf.lookup.StaticHashTable(
tf.lookup.KeyValueTensorInitializer(
keys=tf.constant(list(name_to_id.keys()), dtype=tf.string),
values=tf.constant(list(name_to_id.values()), dtype=tf.int64)),
default_value=-1)
def _process_label_map(self, label_map):
if label_map.endswith('.csv'):
name_to_id = self._process_csv(label_map)
else:
raise ValueError('The label map file is in incorrect format.')
return name_to_id
def _process_csv(self, label_map):
name_to_id = {}
with tf.io.gfile.GFile(label_map, 'r') as f:
reader = csv.reader(f, delimiter=',')
for row in reader:
if len(row) != 2:
raise ValueError('Each row of the csv label map file must be in '
'`id,name` format. length = {}'.format(len(row)))
id_index = int(row[0])
name = row[1]
name_to_id[name] = id_index
return name_to_id
def _decode_classes(self, parsed_tensors):
return self._name_to_id_table.lookup(
parsed_tensors['image/object/class/text'])
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for tf_example_decoder.py."""
import io
# Import libraries
from absl.testing import parameterized
import numpy as np
from PIL import Image
import tensorflow as tf
from official.vision.beta.dataloaders import tf_example_decoder
DUMP_SOURCE_ID = b'123'
def _encode_image(image_array, fmt):
image = Image.fromarray(image_array)
with io.BytesIO() as output:
image.save(output, format=fmt)
return output.getvalue()
class TfExampleDecoderTest(tf.test.TestCase, parameterized.TestCase):
@parameterized.parameters(
(100, 100, 0, True),
(100, 100, 1, True),
(100, 100, 2, True),
(100, 100, 0, False),
(100, 100, 1, False),
(100, 100, 2, False),
)
def test_result_shape(self,
image_height,
image_width,
num_instances,
regenerate_source_id):
decoder = tf_example_decoder.TfExampleDecoder(
include_mask=True, regenerate_source_id=regenerate_source_id)
image = _encode_image(
np.uint8(np.random.rand(image_height, image_width, 3) * 255),
fmt='JPEG')
if num_instances == 0:
xmins = []
xmaxs = []
ymins = []
ymaxs = []
labels = []
areas = []
is_crowds = []
masks = []
else:
xmins = list(np.random.rand(num_instances))
xmaxs = list(np.random.rand(num_instances))
ymins = list(np.random.rand(num_instances))
ymaxs = list(np.random.rand(num_instances))
labels = list(np.random.randint(100, size=num_instances))
areas = [(xmax - xmin) * (ymax - ymin) * image_height * image_width
for xmin, xmax, ymin, ymax in zip(xmins, xmaxs, ymins, ymaxs)]
is_crowds = [0] * num_instances
masks = []
for _ in range(num_instances):
mask = _encode_image(
np.uint8(np.random.rand(image_height, image_width) * 255),
fmt='PNG')
masks.append(mask)
serialized_example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded': (
tf.train.Feature(
bytes_list=tf.train.BytesList(value=[image]))),
'image/source_id': (
tf.train.Feature(
bytes_list=tf.train.BytesList(value=[DUMP_SOURCE_ID]))),
'image/height': (
tf.train.Feature(
int64_list=tf.train.Int64List(value=[image_height]))),
'image/width': (
tf.train.Feature(
int64_list=tf.train.Int64List(value=[image_width]))),
'image/object/bbox/xmin': (
tf.train.Feature(
float_list=tf.train.FloatList(value=xmins))),
'image/object/bbox/xmax': (
tf.train.Feature(
float_list=tf.train.FloatList(value=xmaxs))),
'image/object/bbox/ymin': (
tf.train.Feature(
float_list=tf.train.FloatList(value=ymins))),
'image/object/bbox/ymax': (
tf.train.Feature(
float_list=tf.train.FloatList(value=ymaxs))),
'image/object/class/label': (
tf.train.Feature(
int64_list=tf.train.Int64List(value=labels))),
'image/object/is_crowd': (
tf.train.Feature(
int64_list=tf.train.Int64List(value=is_crowds))),
'image/object/area': (
tf.train.Feature(
float_list=tf.train.FloatList(value=areas))),
'image/object/mask': (
tf.train.Feature(
bytes_list=tf.train.BytesList(value=masks))),
})).SerializeToString()
decoded_tensors = decoder.decode(
tf.convert_to_tensor(value=serialized_example))
results = tf.nest.map_structure(lambda x: x.numpy(), decoded_tensors)
self.assertAllEqual(
(image_height, image_width, 3), results['image'].shape)
if not regenerate_source_id:
self.assertEqual(DUMP_SOURCE_ID, results['source_id'])
self.assertEqual(image_height, results['height'])
self.assertEqual(image_width, results['width'])
self.assertAllEqual(
(num_instances,), results['groundtruth_classes'].shape)
self.assertAllEqual(
(num_instances,), results['groundtruth_is_crowd'].shape)
self.assertAllEqual(
(num_instances,), results['groundtruth_area'].shape)
self.assertAllEqual(
(num_instances, 4), results['groundtruth_boxes'].shape)
self.assertAllEqual(
(num_instances, image_height, image_width),
results['groundtruth_instance_masks'].shape)
self.assertAllEqual(
(num_instances,), results['groundtruth_instance_masks_png'].shape)
def test_result_content(self):
decoder = tf_example_decoder.TfExampleDecoder(include_mask=True)
image_content = [[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]],
[[0, 0, 0], [255, 255, 255], [255, 255, 255], [0, 0, 0]],
[[0, 0, 0], [255, 255, 255], [255, 255, 255], [0, 0, 0]],
[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]]]
image = _encode_image(np.uint8(image_content), fmt='PNG')
image_height = 4
image_width = 4
num_instances = 2
xmins = [0, 0.25]
xmaxs = [0.5, 1.0]
ymins = [0, 0]
ymaxs = [0.5, 1.0]
labels = [3, 1]
areas = [
0.25 * image_height * image_width, 0.75 * image_height * image_width
]
is_crowds = [1, 0]
mask_content = [[[255, 255, 0, 0],
[255, 255, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0]],
[[0, 255, 255, 255],
[0, 255, 255, 255],
[0, 255, 255, 255],
[0, 255, 255, 255]]]
masks = [_encode_image(np.uint8(m), fmt='PNG') for m in list(mask_content)]
serialized_example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded': (
tf.train.Feature(
bytes_list=tf.train.BytesList(value=[image]))),
'image/source_id': (
tf.train.Feature(
bytes_list=tf.train.BytesList(value=[DUMP_SOURCE_ID]))),
'image/height': (
tf.train.Feature(
int64_list=tf.train.Int64List(value=[image_height]))),
'image/width': (
tf.train.Feature(
int64_list=tf.train.Int64List(value=[image_width]))),
'image/object/bbox/xmin': (
tf.train.Feature(
float_list=tf.train.FloatList(value=xmins))),
'image/object/bbox/xmax': (
tf.train.Feature(
float_list=tf.train.FloatList(value=xmaxs))),
'image/object/bbox/ymin': (
tf.train.Feature(
float_list=tf.train.FloatList(value=ymins))),
'image/object/bbox/ymax': (
tf.train.Feature(
float_list=tf.train.FloatList(value=ymaxs))),
'image/object/class/label': (
tf.train.Feature(
int64_list=tf.train.Int64List(value=labels))),
'image/object/is_crowd': (
tf.train.Feature(
int64_list=tf.train.Int64List(value=is_crowds))),
'image/object/area': (
tf.train.Feature(
float_list=tf.train.FloatList(value=areas))),
'image/object/mask': (
tf.train.Feature(
bytes_list=tf.train.BytesList(value=masks))),
})).SerializeToString()
decoded_tensors = decoder.decode(
tf.convert_to_tensor(value=serialized_example))
results = tf.nest.map_structure(lambda x: x.numpy(), decoded_tensors)
self.assertAllEqual(
(image_height, image_width, 3), results['image'].shape)
self.assertAllEqual(image_content, results['image'])
self.assertEqual(DUMP_SOURCE_ID, results['source_id'])
self.assertEqual(image_height, results['height'])
self.assertEqual(image_width, results['width'])
self.assertAllEqual(
(num_instances,), results['groundtruth_classes'].shape)
self.assertAllEqual(
(num_instances,), results['groundtruth_is_crowd'].shape)
self.assertAllEqual(
(num_instances,), results['groundtruth_area'].shape)
self.assertAllEqual(
(num_instances, 4), results['groundtruth_boxes'].shape)
self.assertAllEqual(
(num_instances, image_height, image_width),
results['groundtruth_instance_masks'].shape)
self.assertAllEqual(
(num_instances,), results['groundtruth_instance_masks_png'].shape)
self.assertAllEqual(
[3, 1], results['groundtruth_classes'])
self.assertAllEqual(
[True, False], results['groundtruth_is_crowd'])
self.assertNDArrayNear(
[0.25 * image_height * image_width, 0.75 * image_height * image_width],
results['groundtruth_area'], 1e-4)
self.assertNDArrayNear(
[[0, 0, 0.5, 0.5], [0, 0.25, 1.0, 1.0]],
results['groundtruth_boxes'], 1e-4)
self.assertNDArrayNear(
mask_content, results['groundtruth_instance_masks'], 1e-4)
self.assertAllEqual(
masks, results['groundtruth_instance_masks_png'])
def test_handling_missing_fields(self):
decoder = tf_example_decoder.TfExampleDecoder(include_mask=True)
image_content = [[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]],
[[0, 0, 0], [255, 255, 255], [255, 255, 255], [0, 0, 0]],
[[0, 0, 0], [255, 255, 255], [255, 255, 255], [0, 0, 0]],
[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]]]
image = _encode_image(np.uint8(image_content), fmt='PNG')
image_height = 4
image_width = 4
num_instances = 2
xmins = [0, 0.25]
xmaxs = [0.5, 1.0]
ymins = [0, 0]
ymaxs = [0.5, 1.0]
labels = [3, 1]
mask_content = [[[255, 255, 0, 0],
[255, 255, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0]],
[[0, 255, 255, 255],
[0, 255, 255, 255],
[0, 255, 255, 255],
[0, 255, 255, 255]]]
masks = [_encode_image(np.uint8(m), fmt='PNG') for m in list(mask_content)]
serialized_example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded': (
tf.train.Feature(
bytes_list=tf.train.BytesList(value=[image]))),
'image/source_id': (
tf.train.Feature(
bytes_list=tf.train.BytesList(value=[DUMP_SOURCE_ID]))),
'image/height': (
tf.train.Feature(
int64_list=tf.train.Int64List(value=[image_height]))),
'image/width': (
tf.train.Feature(
int64_list=tf.train.Int64List(value=[image_width]))),
'image/object/bbox/xmin': (
tf.train.Feature(
float_list=tf.train.FloatList(value=xmins))),
'image/object/bbox/xmax': (
tf.train.Feature(
float_list=tf.train.FloatList(value=xmaxs))),
'image/object/bbox/ymin': (
tf.train.Feature(
float_list=tf.train.FloatList(value=ymins))),
'image/object/bbox/ymax': (
tf.train.Feature(
float_list=tf.train.FloatList(value=ymaxs))),
'image/object/class/label': (
tf.train.Feature(
int64_list=tf.train.Int64List(value=labels))),
'image/object/mask': (
tf.train.Feature(
bytes_list=tf.train.BytesList(value=masks))),
})).SerializeToString()
decoded_tensors = decoder.decode(
tf.convert_to_tensor(serialized_example))
results = tf.nest.map_structure(lambda x: x.numpy(), decoded_tensors)
self.assertAllEqual(
(image_height, image_width, 3), results['image'].shape)
self.assertAllEqual(image_content, results['image'])
self.assertEqual(DUMP_SOURCE_ID, results['source_id'])
self.assertEqual(image_height, results['height'])
self.assertEqual(image_width, results['width'])
self.assertAllEqual(
(num_instances,), results['groundtruth_classes'].shape)
self.assertAllEqual(
(num_instances,), results['groundtruth_is_crowd'].shape)
self.assertAllEqual(
(num_instances,), results['groundtruth_area'].shape)
self.assertAllEqual(
(num_instances, 4), results['groundtruth_boxes'].shape)
self.assertAllEqual(
(num_instances, image_height, image_width),
results['groundtruth_instance_masks'].shape)
self.assertAllEqual(
(num_instances,), results['groundtruth_instance_masks_png'].shape)
self.assertAllEqual(
[3, 1], results['groundtruth_classes'])
self.assertAllEqual(
[False, False], results['groundtruth_is_crowd'])
self.assertNDArrayNear(
[0.25 * image_height * image_width, 0.75 * image_height * image_width],
results['groundtruth_area'], 1e-4)
self.assertNDArrayNear(
[[0, 0, 0.5, 0.5], [0, 0.25, 1.0, 1.0]],
results['groundtruth_boxes'], 1e-4)
self.assertNDArrayNear(
mask_content, results['groundtruth_instance_masks'], 1e-4)
self.assertAllEqual(
masks, results['groundtruth_instance_masks_png'])
if __name__ == '__main__':
tf.test.main()
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tensorflow Example proto decoder for object detection.
A decoder to decode string tensors containing serialized tensorflow.Example
protos for object detection.
"""
import csv
# Import libraries
import tensorflow as tf
from official.vision.beta.dataloaders import tf_example_decoder
class TfExampleDecoderLabelMap(tf_example_decoder.TfExampleDecoder):
"""Tensorflow Example proto decoder."""
def __init__(self, label_map, include_mask=False, regenerate_source_id=False):
super(TfExampleDecoderLabelMap, self).__init__(
include_mask=include_mask, regenerate_source_id=regenerate_source_id)
self._keys_to_features.update({
'image/object/class/text': tf.io.VarLenFeature(tf.string),
})
name_to_id = self._process_label_map(label_map)
self._name_to_id_table = tf.lookup.StaticHashTable(
tf.lookup.KeyValueTensorInitializer(
keys=tf.constant(list(name_to_id.keys()), dtype=tf.string),
values=tf.constant(list(name_to_id.values()), dtype=tf.int64)),
default_value=-1)
def _process_label_map(self, label_map):
if label_map.endswith('.csv'):
name_to_id = self._process_csv(label_map)
else:
raise ValueError('The label map file is in incorrect format.')
return name_to_id
def _process_csv(self, label_map):
name_to_id = {}
with tf.io.gfile.GFile(label_map, 'r') as f:
reader = csv.reader(f, delimiter=',')
for row in reader:
if len(row) != 2:
raise ValueError('Each row of the csv label map file must be in '
'`id,name` format. length = {}'.format(len(row)))
id_index = int(row[0])
name = row[1]
name_to_id[name] = id_index
return name_to_id
def _decode_classes(self, parsed_tensors):
return self._name_to_id_table.lookup(
parsed_tensors['image/object/class/text'])
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for tf_example_label_map_decoder.py."""
import io
import os
# Import libraries
from absl.testing import parameterized
import numpy as np
from PIL import Image
import tensorflow as tf
from official.vision.beta.dataloaders import tf_example_label_map_decoder
DUMP_SOURCE_ID = b'123'
LABEL_MAP_CSV_CONTENT = '0,class_0\n1,class_1\n2,class_2'
def _encode_image(image_array, fmt):
image = Image.fromarray(image_array)
with io.BytesIO() as output:
image.save(output, format=fmt)
return output.getvalue()
class TfExampleDecoderLabelMapTest(tf.test.TestCase, parameterized.TestCase):
@parameterized.parameters(
(100, 100, 0),
(100, 100, 1),
(100, 100, 2),
(100, 100, 0),
(100, 100, 1),
(100, 100, 2),
)
def test_result_shape(self, image_height, image_width, num_instances):
label_map_dir = self.get_temp_dir()
label_map_name = 'label_map.csv'
label_map_path = os.path.join(label_map_dir, label_map_name)
with open(label_map_path, 'w') as f:
f.write(LABEL_MAP_CSV_CONTENT)
decoder = tf_example_label_map_decoder.TfExampleDecoderLabelMap(
label_map_path, include_mask=True)
image = _encode_image(
np.uint8(np.random.rand(image_height, image_width, 3) * 255),
fmt='JPEG')
if num_instances == 0:
xmins = []
xmaxs = []
ymins = []
ymaxs = []
labels = []
areas = []
is_crowds = []
masks = []
else:
xmins = list(np.random.rand(num_instances))
xmaxs = list(np.random.rand(num_instances))
ymins = list(np.random.rand(num_instances))
ymaxs = list(np.random.rand(num_instances))
labels = list(np.random.randint(100, size=num_instances))
areas = [(xmax - xmin) * (ymax - ymin) * image_height * image_width
for xmin, xmax, ymin, ymax in zip(xmins, xmaxs, ymins, ymaxs)]
is_crowds = [0] * num_instances
masks = []
labels = [b'class_1'] * num_instances
for _ in range(num_instances):
mask = _encode_image(
np.uint8(np.random.rand(image_height, image_width) * 255),
fmt='PNG')
masks.append(mask)
serialized_example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded': (
tf.train.Feature(
bytes_list=tf.train.BytesList(value=[image]))),
'image/source_id': (
tf.train.Feature(
bytes_list=tf.train.BytesList(value=[DUMP_SOURCE_ID]))),
'image/height': (
tf.train.Feature(
int64_list=tf.train.Int64List(value=[image_height]))),
'image/width': (
tf.train.Feature(
int64_list=tf.train.Int64List(value=[image_width]))),
'image/object/bbox/xmin': (
tf.train.Feature(
float_list=tf.train.FloatList(value=xmins))),
'image/object/bbox/xmax': (
tf.train.Feature(
float_list=tf.train.FloatList(value=xmaxs))),
'image/object/bbox/ymin': (
tf.train.Feature(
float_list=tf.train.FloatList(value=ymins))),
'image/object/bbox/ymax': (
tf.train.Feature(
float_list=tf.train.FloatList(value=ymaxs))),
'image/object/class/text': (
tf.train.Feature(
bytes_list=tf.train.BytesList(value=labels))),
'image/object/is_crowd': (
tf.train.Feature(
int64_list=tf.train.Int64List(value=is_crowds))),
'image/object/area': (
tf.train.Feature(
float_list=tf.train.FloatList(value=areas))),
'image/object/mask': (
tf.train.Feature(
bytes_list=tf.train.BytesList(value=masks))),
})).SerializeToString()
decoded_tensors = decoder.decode(
tf.convert_to_tensor(value=serialized_example))
results = tf.nest.map_structure(lambda x: x.numpy(), decoded_tensors)
self.assertAllEqual(
(image_height, image_width, 3), results['image'].shape)
self.assertEqual(DUMP_SOURCE_ID, results['source_id'])
self.assertEqual(image_height, results['height'])
self.assertEqual(image_width, results['width'])
self.assertAllEqual(
(num_instances,), results['groundtruth_classes'].shape)
self.assertAllEqual(
(num_instances,), results['groundtruth_is_crowd'].shape)
self.assertAllEqual(
(num_instances,), results['groundtruth_area'].shape)
self.assertAllEqual(
(num_instances, 4), results['groundtruth_boxes'].shape)
self.assertAllEqual(
(num_instances, image_height, image_width),
results['groundtruth_instance_masks'].shape)
self.assertAllEqual(
(num_instances,), results['groundtruth_instance_masks_png'].shape)
def test_result_content(self):
label_map_dir = self.get_temp_dir()
label_map_name = 'label_map.csv'
label_map_path = os.path.join(label_map_dir, label_map_name)
with open(label_map_path, 'w') as f:
f.write(LABEL_MAP_CSV_CONTENT)
decoder = tf_example_label_map_decoder.TfExampleDecoderLabelMap(
label_map_path, include_mask=True)
image_content = [[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]],
[[0, 0, 0], [255, 255, 255], [255, 255, 255], [0, 0, 0]],
[[0, 0, 0], [255, 255, 255], [255, 255, 255], [0, 0, 0]],
[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]]]
image = _encode_image(np.uint8(image_content), fmt='PNG')
image_height = 4
image_width = 4
num_instances = 2
xmins = [0, 0.25]
xmaxs = [0.5, 1.0]
ymins = [0, 0]
ymaxs = [0.5, 1.0]
labels = [b'class_2', b'class_0']
areas = [
0.25 * image_height * image_width, 0.75 * image_height * image_width
]
is_crowds = [1, 0]
mask_content = [[[255, 255, 0, 0],
[255, 255, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0]],
[[0, 255, 255, 255],
[0, 255, 255, 255],
[0, 255, 255, 255],
[0, 255, 255, 255]]]
masks = [_encode_image(np.uint8(m), fmt='PNG') for m in list(mask_content)]
serialized_example = tf.train.Example(
features=tf.train.Features(
feature={
'image/encoded': (
tf.train.Feature(
bytes_list=tf.train.BytesList(value=[image]))),
'image/source_id': (
tf.train.Feature(
bytes_list=tf.train.BytesList(value=[DUMP_SOURCE_ID]))),
'image/height': (
tf.train.Feature(
int64_list=tf.train.Int64List(value=[image_height]))),
'image/width': (
tf.train.Feature(
int64_list=tf.train.Int64List(value=[image_width]))),
'image/object/bbox/xmin': (
tf.train.Feature(
float_list=tf.train.FloatList(value=xmins))),
'image/object/bbox/xmax': (
tf.train.Feature(
float_list=tf.train.FloatList(value=xmaxs))),
'image/object/bbox/ymin': (
tf.train.Feature(
float_list=tf.train.FloatList(value=ymins))),
'image/object/bbox/ymax': (
tf.train.Feature(
float_list=tf.train.FloatList(value=ymaxs))),
'image/object/class/text': (
tf.train.Feature(
bytes_list=tf.train.BytesList(value=labels))),
'image/object/is_crowd': (
tf.train.Feature(
int64_list=tf.train.Int64List(value=is_crowds))),
'image/object/area': (
tf.train.Feature(
float_list=tf.train.FloatList(value=areas))),
'image/object/mask': (
tf.train.Feature(
bytes_list=tf.train.BytesList(value=masks))),
})).SerializeToString()
decoded_tensors = decoder.decode(
tf.convert_to_tensor(value=serialized_example))
results = tf.nest.map_structure(lambda x: x.numpy(), decoded_tensors)
self.assertAllEqual(
(image_height, image_width, 3), results['image'].shape)
self.assertAllEqual(image_content, results['image'])
self.assertEqual(DUMP_SOURCE_ID, results['source_id'])
self.assertEqual(image_height, results['height'])
self.assertEqual(image_width, results['width'])
self.assertAllEqual(
(num_instances,), results['groundtruth_classes'].shape)
self.assertAllEqual(
(num_instances,), results['groundtruth_is_crowd'].shape)
self.assertAllEqual(
(num_instances,), results['groundtruth_area'].shape)
self.assertAllEqual(
(num_instances, 4), results['groundtruth_boxes'].shape)
self.assertAllEqual(
(num_instances, image_height, image_width),
results['groundtruth_instance_masks'].shape)
self.assertAllEqual(
(num_instances,), results['groundtruth_instance_masks_png'].shape)
self.assertAllEqual(
[2, 0], results['groundtruth_classes'])
self.assertAllEqual(
[True, False], results['groundtruth_is_crowd'])
self.assertNDArrayNear(
[0.25 * image_height * image_width, 0.75 * image_height * image_width],
results['groundtruth_area'], 1e-4)
self.assertNDArrayNear(
[[0, 0, 0.5, 0.5], [0, 0.25, 1.0, 1.0]],
results['groundtruth_boxes'], 1e-4)
self.assertNDArrayNear(
mask_content, results['groundtruth_instance_masks'], 1e-4)
self.assertAllEqual(
masks, results['groundtruth_instance_masks_png'])
if __name__ == '__main__':
tf.test.main()
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Data loader utils."""
# Import libraries
import tensorflow as tf
from official.vision.beta.ops import preprocess_ops
def process_source_id(source_id):
"""Processes source_id to the right format."""
if source_id.dtype == tf.string:
source_id = tf.cast(tf.strings.to_number(source_id), tf.int32)
with tf.control_dependencies([source_id]):
source_id = tf.cond(
pred=tf.equal(tf.size(input=source_id), 0),
true_fn=lambda: tf.cast(tf.constant(-1), tf.int32),
false_fn=lambda: tf.identity(source_id))
return source_id
def pad_groundtruths_to_fixed_size(groundtruths, size):
"""Pads the first dimension of groundtruths labels to the fixed size."""
groundtruths['boxes'] = preprocess_ops.clip_or_pad_to_fixed_size(
groundtruths['boxes'], size, -1)
groundtruths['is_crowds'] = preprocess_ops.clip_or_pad_to_fixed_size(
groundtruths['is_crowds'], size, 0)
groundtruths['areas'] = preprocess_ops.clip_or_pad_to_fixed_size(
groundtruths['areas'], size, -1)
groundtruths['classes'] = preprocess_ops.clip_or_pad_to_fixed_size(
groundtruths['classes'], size, -1)
return groundtruths
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Parser for video and label datasets."""
from typing import Dict, Optional, Tuple
from absl import logging
import tensorflow as tf
from official.vision.beta.configs import video_classification as exp_cfg
from official.vision.beta.dataloaders import decoder
from official.vision.beta.dataloaders import parser
from official.vision.beta.ops import preprocess_ops_3d
IMAGE_KEY = 'image/encoded'
LABEL_KEY = 'clip/label/index'
def _process_image(image: tf.Tensor,
is_training: bool = True,
num_frames: int = 32,
stride: int = 1,
num_test_clips: int = 1,
min_resize: int = 224,
crop_size: int = 200,
zero_centering_image: bool = False,
seed: Optional[int] = None) -> tf.Tensor:
"""Processes a serialized image tensor.
Args:
image: Input Tensor of shape [timesteps] and type tf.string of serialized
frames.
is_training: Whether or not in training mode. If True, random sample, crop
and left right flip is used.
num_frames: Number of frames per subclip.
stride: Temporal stride to sample frames.
num_test_clips: Number of test clips (1 by default). If more than 1, this
will sample multiple linearly spaced clips within each video at test time.
If 1, then a single clip in the middle of the video is sampled. The clips
are aggreagated in the batch dimension.
min_resize: Frames are resized so that min(height, width) is min_resize.
crop_size: Final size of the frame after cropping the resized frames. Both
height and width are the same.
zero_centering_image: If True, frames are normalized to values in [-1, 1].
If False, values in [0, 1].
seed: A deterministic seed to use when sampling.
Returns:
Processed frames. Tensor of shape
[num_frames * num_test_clips, crop_size, crop_size, 3].
"""
# Validate parameters.
if is_training and num_test_clips != 1:
logging.warning(
'`num_test_clips` %d is ignored since `is_training` is `True`.',
num_test_clips)
# Temporal sampler.
if is_training:
# Sample random clip.
image = preprocess_ops_3d.sample_sequence(image, num_frames, True, stride,
seed)
elif num_test_clips > 1:
# Sample linspace clips.
image = preprocess_ops_3d.sample_linspace_sequence(image, num_test_clips,
num_frames, stride)
else:
# Sample middle clip.
image = preprocess_ops_3d.sample_sequence(image, num_frames, False, stride)
# Decode JPEG string to tf.uint8.
image = preprocess_ops_3d.decode_jpeg(image, 3)
# Resize images (resize happens only if necessary to save compute).
image = preprocess_ops_3d.resize_smallest(image, min_resize)
if is_training:
# Standard image data augmentation: random crop and random flip.
image = preprocess_ops_3d.crop_image(image, crop_size, crop_size, True,
seed)
image = preprocess_ops_3d.random_flip_left_right(image, seed)
else:
# Central crop of the frames.
image = preprocess_ops_3d.crop_image(image, crop_size, crop_size, False)
# Cast the frames in float32, normalizing according to zero_centering_image.
return preprocess_ops_3d.normalize_image(image, zero_centering_image)
def _postprocess_image(image: tf.Tensor,
is_training: bool = True,
num_frames: int = 32,
num_test_clips: int = 1) -> tf.Tensor:
"""Processes a batched Tensor of frames.
The same parameters used in process should be used here.
Args:
image: Input Tensor of shape [batch, timesteps, height, width, 3].
is_training: Whether or not in training mode. If True, random sample, crop
and left right flip is used.
num_frames: Number of frames per subclip.
num_test_clips: Number of test clips (1 by default). If more than 1, this
will sample multiple linearly spaced clips within each video at test time.
If 1, then a single clip in the middle of the video is sampled. The clips
are aggreagated in the batch dimension.
Returns:
Processed frames. Tensor of shape
[batch * num_test_clips, num_frames, height, width, 3].
"""
if num_test_clips > 1 and not is_training:
# In this case, multiple clips are merged together in batch dimenstion which
# will be B * num_test_clips.
image = tf.reshape(
image, (-1, num_frames, image.shape[2], image.shape[3], image.shape[4]))
return image
def _process_label(label: tf.Tensor,
one_hot_label: bool = True,
num_classes: Optional[int] = None) -> tf.Tensor:
"""Processes label Tensor."""
# Validate parameters.
if one_hot_label and not num_classes:
raise ValueError(
'`num_classes` should be given when requesting one hot label.')
# Cast to tf.int32.
label = tf.cast(label, dtype=tf.int32)
if one_hot_label:
# Replace label index by one hot representation.
label = tf.one_hot(label, num_classes)
return label
class Decoder(decoder.Decoder):
"""A tf.Example decoder for classification task."""
def __init__(self, image_key: str = IMAGE_KEY, label_key: str = LABEL_KEY):
self._image_key = IMAGE_KEY
self._label_key = LABEL_KEY
self._context_description = {
# One integer stored in context.
self._label_key: tf.io.FixedLenFeature((), tf.int64),
}
self._sequence_description = {
# Each image is a string encoding JPEG.
self._image_key: tf.io.FixedLenSequenceFeature((), tf.string),
}
def decode(self, serialized_example):
"""Parses a single tf.Example into image and label tensors."""
context, sequences = tf.io.parse_single_sequence_example(
serialized_example, self._context_description,
self._sequence_description)
return {
self._image_key: sequences[self._image_key],
self._label_key: context[self._label_key]
}
class Parser(parser.Parser):
"""Parses a video and label dataset."""
def __init__(self,
input_params: exp_cfg.DataConfig,
image_key: str = IMAGE_KEY,
label_key: str = LABEL_KEY):
self._num_frames = input_params.feature_shape[0]
self._stride = input_params.temporal_stride
self._num_test_clips = input_params.num_test_clips
self._min_resize = input_params.min_image_size
self._crop_size = input_params.feature_shape[1]
self._one_hot_label = input_params.one_hot
self._num_classes = input_params.num_classes
self._image_key = image_key
self._label_key = label_key
def _parse_train_data(
self, decoded_tensors: Dict[str, tf.Tensor]
) -> Tuple[Dict[str, tf.Tensor], tf.Tensor]:
"""Parses data for training."""
# Process image and label.
image = decoded_tensors[self._image_key]
label = decoded_tensors[self._label_key]
image = _process_image(
image=image,
is_training=True,
num_frames=self._num_frames,
stride=self._stride,
num_test_clips=self._num_test_clips,
min_resize=self._min_resize,
crop_size=self._crop_size)
label = _process_label(label, self._one_hot_label, self._num_classes)
return {'image': image}, label
def _parse_eval_data(
self, decoded_tensors: Dict[str, tf.Tensor]
) -> Tuple[Dict[str, tf.Tensor], tf.Tensor]:
"""Parses data for evaluation."""
image = decoded_tensors[self._image_key]
label = decoded_tensors[self._label_key]
image = _process_image(
image=image,
is_training=False,
num_frames=self._num_frames,
stride=self._stride,
num_test_clips=self._num_test_clips,
min_resize=self._min_resize,
crop_size=self._crop_size)
label = _process_label(label, self._one_hot_label, self._num_classes)
return {'image': image}, label
class PostBatchProcessor(object):
"""Processes a video and label dataset which is batched."""
def __init__(self, input_params: exp_cfg.DataConfig):
self._is_training = input_params.is_training
self._num_frames = input_params.feature_shape[0]
self._num_test_clips = input_params.num_test_clips
def __call__(
self,
image: Dict[str, tf.Tensor],
label: tf.Tensor) -> Tuple[Dict[str, tf.Tensor], tf.Tensor]:
"""Parses a single tf.Example into image and label tensors."""
image = image['image']
image = _postprocess_image(
image=image,
is_training=self._is_training,
num_frames=self._num_frames,
num_test_clips=self._num_test_clips)
return {'image': image}, label
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
import io
# Import libraries
import numpy as np
from PIL import Image
import tensorflow as tf
from official.vision.beta.configs import video_classification as exp_cfg
from official.vision.beta.dataloaders import video_input
class DecoderTest(tf.test.TestCase):
"""A tf.SequenceExample decoder for the video classification task."""
def test_decoder(self):
decoder = video_input.Decoder()
# Create fake data.
random_image = np.random.randint(0, 256, size=(263, 320, 3), dtype=np.uint8)
random_image = Image.fromarray(random_image)
label = 42
with io.BytesIO() as buffer:
random_image.save(buffer, format='JPEG')
raw_image_bytes = buffer.getvalue()
seq_example = tf.train.SequenceExample()
seq_example.feature_lists.feature_list.get_or_create(
video_input.IMAGE_KEY).feature.add().bytes_list.value[:] = [
raw_image_bytes
]
seq_example.feature_lists.feature_list.get_or_create(
video_input.IMAGE_KEY).feature.add().bytes_list.value[:] = [
raw_image_bytes
]
seq_example.context.feature[video_input.LABEL_KEY].int64_list.value[:] = [
label
]
serialized_example = seq_example.SerializeToString()
decoded_tensors = decoder.decode(tf.convert_to_tensor(serialized_example))
results = tf.nest.map_structure(lambda x: x.numpy(), decoded_tensors)
self.assertCountEqual([video_input.IMAGE_KEY, video_input.LABEL_KEY],
results.keys())
self.assertEqual(label, results[video_input.LABEL_KEY])
class VideoAndLabelParserTest(tf.test.TestCase):
def test_video_input(self):
params = exp_cfg.kinetics600(is_training=True)
params.feature_shape = (2, 224, 224, 3)
params.min_image_size = 224
decoder = video_input.Decoder()
parser = video_input.Parser(params).parse_fn(params.is_training)
# Create fake data.
random_image = np.random.randint(0, 256, size=(263, 320, 3), dtype=np.uint8)
random_image = Image.fromarray(random_image)
with io.BytesIO() as buffer:
random_image.save(buffer, format='JPEG')
raw_image_bytes = buffer.getvalue()
seq_example = tf.train.SequenceExample()
seq_example.feature_lists.feature_list.get_or_create(
video_input.IMAGE_KEY).feature.add().bytes_list.value[:] = [
raw_image_bytes
]
seq_example.feature_lists.feature_list.get_or_create(
video_input.IMAGE_KEY).feature.add().bytes_list.value[:] = [
raw_image_bytes
]
seq_example.context.feature[video_input.LABEL_KEY].int64_list.value[:] = [
42
]
input_tensor = tf.constant(seq_example.SerializeToString())
decoded_tensors = decoder.decode(input_tensor)
output_tensor = parser(decoded_tensors)
image_features, label = output_tensor
image = image_features['image']
self.assertAllEqual(image.shape, (2, 224, 224, 3))
self.assertAllEqual(label.shape, (600,))
if __name__ == '__main__':
tf.test.main()
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""The COCO-style evaluator.
The following snippet demonstrates the use of interfaces:
evaluator = COCOEvaluator(...)
for _ in range(num_evals):
for _ in range(num_batches_per_eval):
predictions, groundtruth = predictor.predict(...) # pop a batch.
evaluator.update_state(groundtruths, predictions)
evaluator.result() # finish one full eval and reset states.
See also: https://github.com/cocodataset/cocoapi/
"""
import atexit
import tempfile
# Import libraries
from absl import logging
import numpy as np
from pycocotools import cocoeval
import six
import tensorflow as tf
from official.vision.beta.evaluation import coco_utils
class COCOEvaluator(object):
"""COCO evaluation metric class."""
def __init__(self, annotation_file, include_mask, need_rescale_bboxes=True):
"""Constructs COCO evaluation class.
The class provides the interface to COCO metrics_fn. The
_update_op() takes detections from each image and push them to
self.detections. The _evaluate() loads a JSON file in COCO annotation format
as the groundtruths and runs COCO evaluation.
Args:
annotation_file: a JSON file that stores annotations of the eval dataset.
If `annotation_file` is None, groundtruth annotations will be loaded
from the dataloader.
include_mask: a boolean to indicate whether or not to include the mask
eval.
need_rescale_bboxes: If true bboxes in `predictions` will be rescaled back
to absolute values (`image_info` is needed in this case).
"""
if annotation_file:
if annotation_file.startswith('gs://'):
_, local_val_json = tempfile.mkstemp(suffix='.json')
tf.io.gfile.remove(local_val_json)
tf.io.gfile.copy(annotation_file, local_val_json)
atexit.register(tf.io.gfile.remove, local_val_json)
else:
local_val_json = annotation_file
self._coco_gt = coco_utils.COCOWrapper(
eval_type=('mask' if include_mask else 'box'),
annotation_file=local_val_json)
self._annotation_file = annotation_file
self._include_mask = include_mask
self._metric_names = [
'AP', 'AP50', 'AP75', 'APs', 'APm', 'APl', 'ARmax1', 'ARmax10',
'ARmax100', 'ARs', 'ARm', 'ARl'
]
self._required_prediction_fields = [
'source_id', 'num_detections', 'detection_classes', 'detection_scores',
'detection_boxes'
]
self._need_rescale_bboxes = need_rescale_bboxes
if self._need_rescale_bboxes:
self._required_prediction_fields.append('image_info')
self._required_groundtruth_fields = [
'source_id', 'height', 'width', 'classes', 'boxes'
]
if self._include_mask:
mask_metric_names = ['mask_' + x for x in self._metric_names]
self._metric_names.extend(mask_metric_names)
self._required_prediction_fields.extend(['detection_masks'])
self._required_groundtruth_fields.extend(['masks'])
self.reset_states()
@property
def name(self):
return 'coco_metric'
def reset_states(self):
"""Resets internal states for a fresh run."""
self._predictions = {}
if not self._annotation_file:
self._groundtruths = {}
def result(self):
"""Evaluates detection results, and reset_states."""
metric_dict = self.evaluate()
# Cleans up the internal variables in order for a fresh eval next time.
self.reset_states()
return metric_dict
def evaluate(self):
"""Evaluates with detections from all images with COCO API.
Returns:
coco_metric: float numpy array with shape [24] representing the
coco-style evaluation metrics (box and mask).
"""
if not self._annotation_file:
logging.info('There is no annotation_file in COCOEvaluator.')
gt_dataset = coco_utils.convert_groundtruths_to_coco_dataset(
self._groundtruths)
coco_gt = coco_utils.COCOWrapper(
eval_type=('mask' if self._include_mask else 'box'),
gt_dataset=gt_dataset)
else:
logging.info('Using annotation file: %s', self._annotation_file)
coco_gt = self._coco_gt
coco_predictions = coco_utils.convert_predictions_to_coco_annotations(
self._predictions)
coco_dt = coco_gt.loadRes(predictions=coco_predictions)
image_ids = [ann['image_id'] for ann in coco_predictions]
coco_eval = cocoeval.COCOeval(coco_gt, coco_dt, iouType='bbox')
coco_eval.params.imgIds = image_ids
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()
coco_metrics = coco_eval.stats
if self._include_mask:
mcoco_eval = cocoeval.COCOeval(coco_gt, coco_dt, iouType='segm')
mcoco_eval.params.imgIds = image_ids
mcoco_eval.evaluate()
mcoco_eval.accumulate()
mcoco_eval.summarize()
mask_coco_metrics = mcoco_eval.stats
if self._include_mask:
metrics = np.hstack((coco_metrics, mask_coco_metrics))
else:
metrics = coco_metrics
metrics_dict = {}
for i, name in enumerate(self._metric_names):
metrics_dict[name] = metrics[i].astype(np.float32)
return metrics_dict
def _process_predictions(self, predictions):
image_scale = np.tile(predictions['image_info'][:, 2:3, :], (1, 1, 2))
predictions['detection_boxes'] = (
predictions['detection_boxes'].astype(np.float32))
predictions['detection_boxes'] /= image_scale
if 'detection_outer_boxes' in predictions:
predictions['detection_outer_boxes'] = (
predictions['detection_outer_boxes'].astype(np.float32))
predictions['detection_outer_boxes'] /= image_scale
def _convert_to_numpy(self, groundtruths, predictions):
"""Converts tesnors to numpy arrays."""
if groundtruths:
labels = tf.nest.map_structure(lambda x: x.numpy(), groundtruths)
numpy_groundtruths = {}
for key, val in labels.items():
if isinstance(val, tuple):
val = np.concatenate(val)
numpy_groundtruths[key] = val
else:
numpy_groundtruths = groundtruths
if predictions:
outputs = tf.nest.map_structure(lambda x: x.numpy(), predictions)
numpy_predictions = {}
for key, val in outputs.items():
if isinstance(val, tuple):
val = np.concatenate(val)
numpy_predictions[key] = val
else:
numpy_predictions = predictions
return numpy_groundtruths, numpy_predictions
def update_state(self, groundtruths, predictions):
"""Update and aggregate detection results and groundtruth data.
Args:
groundtruths: a dictionary of Tensors including the fields below.
See also different parsers under `../dataloader` for more details.
Required fields:
- source_id: a numpy array of int or string of shape [batch_size].
- height: a numpy array of int of shape [batch_size].
- width: a numpy array of int of shape [batch_size].
- num_detections: a numpy array of int of shape [batch_size].
- boxes: a numpy array of float of shape [batch_size, K, 4].
- classes: a numpy array of int of shape [batch_size, K].
Optional fields:
- is_crowds: a numpy array of int of shape [batch_size, K]. If the
field is absent, it is assumed that this instance is not crowd.
- areas: a numy array of float of shape [batch_size, K]. If the
field is absent, the area is calculated using either boxes or
masks depending on which one is available.
- masks: a numpy array of float of shape
[batch_size, K, mask_height, mask_width],
predictions: a dictionary of tensors including the fields below.
See different parsers under `../dataloader` for more details.
Required fields:
- source_id: a numpy array of int or string of shape [batch_size].
- image_info [if `need_rescale_bboxes` is True]: a numpy array of
float of shape [batch_size, 4, 2].
- num_detections: a numpy array of
int of shape [batch_size].
- detection_boxes: a numpy array of float of shape [batch_size, K, 4].
- detection_classes: a numpy array of int of shape [batch_size, K].
- detection_scores: a numpy array of float of shape [batch_size, K].
Optional fields:
- detection_masks: a numpy array of float of shape
[batch_size, K, mask_height, mask_width].
Raises:
ValueError: if the required prediction or groundtruth fields are not
present in the incoming `predictions` or `groundtruths`.
"""
groundtruths, predictions = self._convert_to_numpy(groundtruths,
predictions)
for k in self._required_prediction_fields:
if k not in predictions:
raise ValueError(
'Missing the required key `{}` in predictions!'.format(k))
if self._need_rescale_bboxes:
self._process_predictions(predictions)
for k, v in six.iteritems(predictions):
if k not in self._predictions:
self._predictions[k] = [v]
else:
self._predictions[k].append(v)
if not self._annotation_file:
assert groundtruths
for k in self._required_groundtruth_fields:
if k not in groundtruths:
raise ValueError(
'Missing the required key `{}` in groundtruths!'.format(k))
for k, v in six.iteritems(groundtruths):
if k not in self._groundtruths:
self._groundtruths[k] = [v]
else:
self._groundtruths[k].append(v)
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for coco_evaluator."""
import io
import os
# Import libraries
from absl import logging
from absl.testing import absltest
from absl.testing import parameterized
import numpy as np
from PIL import Image
from pycocotools.coco import COCO
import six
import tensorflow as tf
from official.vision.beta.evaluation import coco_evaluator
from official.vision.beta.evaluation import coco_utils
_COCO_JSON_FILE = '/placer/prod/home/snaggletooth/test/data/coco/instances_val2017.json'
_SAVED_COCO_JSON_FILE = 'tmp.json'
def get_groundtruth_annotations(image_id, coco, include_mask=False):
anns = coco.loadAnns(coco.getAnnIds([image_id]))
if not anns:
return None
image = coco.loadImgs([image_id])[0]
groundtruths = {
'boxes': [],
'classes': [],
'is_crowds': [],
'areas': [],
}
if include_mask:
groundtruths['masks'] = []
for ann in anns:
# Creates detections from groundtruths.
# Converts [x, y, w, h] to [y1, x1, y2, x2] box format.
box = [ann['bbox'][1],
ann['bbox'][0],
(ann['bbox'][1] + ann['bbox'][3]),
(ann['bbox'][0] + ann['bbox'][2])]
# Creates groundtruths.
groundtruths['boxes'].append(box)
groundtruths['classes'].append(ann['category_id'])
groundtruths['is_crowds'].append(ann['iscrowd'])
groundtruths['areas'].append(ann['area'])
if include_mask:
mask_img = Image.fromarray(coco.annToMask(ann).astype(np.uint8))
with io.BytesIO() as stream:
mask_img.save(stream, format='PNG')
mask_bytes = stream.getvalue()
groundtruths['masks'].append(mask_bytes)
for key, val in groundtruths.items():
groundtruths[key] = np.stack(val, axis=0)
groundtruths['source_id'] = image['id']
groundtruths['height'] = image['height']
groundtruths['width'] = image['width']
groundtruths['num_detections'] = len(anns)
for k, v in six.iteritems(groundtruths):
groundtruths[k] = np.expand_dims(v, axis=0)
return groundtruths
def get_predictions(image_id, coco, include_mask=False):
anns = coco.loadAnns(coco.getAnnIds([image_id]))
if not anns:
return None
image = coco.loadImgs([image_id])[0]
predictions = {
'detection_boxes': [],
'detection_classes': [],
'detection_scores': [],
}
if include_mask:
predictions['detection_masks'] = []
for ann in anns:
# Creates detections from groundtruths.
# Converts [x, y, w, h] to [y1, x1, y2, x2] box format and
# does the denormalization.
box = [ann['bbox'][1],
ann['bbox'][0],
(ann['bbox'][1] + ann['bbox'][3]),
(ann['bbox'][0] + ann['bbox'][2])]
predictions['detection_boxes'].append(box)
predictions['detection_classes'].append(ann['category_id'])
predictions['detection_scores'].append(1)
if include_mask:
mask = coco.annToMask(ann)
predictions['detection_masks'].append(mask)
for key, val in predictions.items():
predictions[key] = np.expand_dims(np.stack(val, axis=0), axis=0)
predictions['source_id'] = np.array([image['id']])
predictions['num_detections'] = np.array([len(anns)])
predictions['image_info'] = np.array(
[[[image['height'], image['width']],
[image['height'], image['width']],
[1, 1],
[0, 0]]], dtype=np.float32)
return predictions
def get_fake_predictions(image_id, coco, include_mask=False):
anns = coco.loadAnns(coco.getAnnIds([image_id]))
if not anns:
return None
label_id_max = max([ann['category_id'] for ann in anns])
image = coco.loadImgs([image_id])[0]
num_detections = 100
xmin = np.random.randint(
low=0, high=int(image['width'] / 2), size=(1, num_detections))
xmax = np.random.randint(
low=int(image['width'] / 2), high=image['width'],
size=(1, num_detections))
ymin = np.random.randint(
low=0, high=int(image['height'] / 2), size=(1, num_detections))
ymax = np.random.randint(
low=int(image['height'] / 2), high=image['height'],
size=(1, num_detections))
predictions = {
'detection_boxes': np.stack([ymin, xmin, ymax, xmax], axis=-1),
'detection_classes': np.random.randint(
low=0, high=(label_id_max + 1), size=(1, num_detections)),
'detection_scores': np.random.random(size=(1, num_detections)),
}
if include_mask:
predictions['detection_masks'] = np.random.randint(
1, size=(1, num_detections, image['height'], image['width']))
predictions['source_id'] = np.array([image['id']])
predictions['num_detections'] = np.array([num_detections])
predictions['image_info'] = np.array(
[[[image['height'], image['width']],
[image['height'], image['width']],
[1, 1],
[0, 0]]], dtype=np.float32)
return predictions
class DummyGroundtruthGenerator(object):
def __init__(self, include_mask, image_id, coco):
self._include_mask = include_mask
self._image_id = image_id
self._coco = coco
def __call__(self):
yield get_groundtruth_annotations(
self._image_id, self._coco, self._include_mask)
class COCOEvaluatorTest(parameterized.TestCase, absltest.TestCase):
def setUp(self):
super(COCOEvaluatorTest, self).setUp()
temp = self.create_tempdir()
self._saved_coco_json_file = os.path.join(temp.full_path,
_SAVED_COCO_JSON_FILE)
def tearDown(self):
super(COCOEvaluatorTest, self).tearDown()
@parameterized.parameters(
(False, False), (False, True), (True, False), (True, True))
def testEval(self, include_mask, use_fake_predictions):
coco = COCO(annotation_file=_COCO_JSON_FILE)
index = np.random.randint(len(coco.dataset['images']))
image_id = coco.dataset['images'][index]['id']
# image_id = 26564
# image_id = 324158
if use_fake_predictions:
predictions = get_fake_predictions(
image_id, coco, include_mask=include_mask)
else:
predictions = get_predictions(image_id, coco, include_mask=include_mask)
if not predictions:
logging.info('Empty predictions for index=%d', index)
return
predictions = tf.nest.map_structure(
lambda x: tf.convert_to_tensor(x) if x is not None else None,
predictions)
evaluator_w_json = coco_evaluator.COCOEvaluator(
annotation_file=_COCO_JSON_FILE, include_mask=include_mask)
evaluator_w_json.update_state(groundtruths=None, predictions=predictions)
results_w_json = evaluator_w_json.result()
dummy_generator = DummyGroundtruthGenerator(
include_mask=include_mask, image_id=image_id, coco=coco)
coco_utils.generate_annotation_file(dummy_generator,
self._saved_coco_json_file)
evaluator_no_json = coco_evaluator.COCOEvaluator(
annotation_file=self._saved_coco_json_file, include_mask=include_mask)
evaluator_no_json.update_state(groundtruths=None, predictions=predictions)
results_no_json = evaluator_no_json.result()
for k, v in results_w_json.items():
self.assertEqual(v, results_no_json[k])
@parameterized.parameters(
(False, False), (False, True), (True, False), (True, True))
def testEvalOnTheFly(self, include_mask, use_fake_predictions):
coco = COCO(annotation_file=_COCO_JSON_FILE)
index = np.random.randint(len(coco.dataset['images']))
image_id = coco.dataset['images'][index]['id']
# image_id = 26564
# image_id = 324158
if use_fake_predictions:
predictions = get_fake_predictions(
image_id, coco, include_mask=include_mask)
else:
predictions = get_predictions(image_id, coco, include_mask=include_mask)
if not predictions:
logging.info('Empty predictions for index=%d', index)
return
predictions = tf.nest.map_structure(
lambda x: tf.convert_to_tensor(x) if x is not None else None,
predictions)
evaluator_w_json = coco_evaluator.COCOEvaluator(
annotation_file=_COCO_JSON_FILE, include_mask=include_mask)
evaluator_w_json.update_state(groundtruths=None, predictions=predictions)
results_w_json = evaluator_w_json.result()
groundtruths = get_groundtruth_annotations(image_id, coco, include_mask)
groundtruths = tf.nest.map_structure(
lambda x: tf.convert_to_tensor(x) if x is not None else None,
groundtruths)
evaluator_no_json = coco_evaluator.COCOEvaluator(
annotation_file=None, include_mask=include_mask)
evaluator_no_json.update_state(groundtruths, predictions)
results_no_json = evaluator_no_json.result()
for k, v in results_w_json.items():
self.assertEqual(v, results_no_json[k])
if __name__ == '__main__':
absltest.main()
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Util functions related to pycocotools and COCO eval."""
import copy
import json
# Import libraries
from absl import logging
import numpy as np
from PIL import Image
from pycocotools import coco
from pycocotools import mask as mask_api
import six
import tensorflow as tf
from official.vision.beta.dataloaders import tf_example_decoder
from official.vision.beta.ops import box_ops
from official.vision.beta.ops import mask_ops
class COCOWrapper(coco.COCO):
"""COCO wrapper class.
This class wraps COCO API object, which provides the following additional
functionalities:
1. Support string type image id.
2. Support loading the groundtruth dataset using the external annotation
dictionary.
3. Support loading the prediction results using the external annotation
dictionary.
"""
def __init__(self, eval_type='box', annotation_file=None, gt_dataset=None):
"""Instantiates a COCO-style API object.
Args:
eval_type: either 'box' or 'mask'.
annotation_file: a JSON file that stores annotations of the eval dataset.
This is required if `gt_dataset` is not provided.
gt_dataset: the groundtruth eval datatset in COCO API format.
"""
if ((annotation_file and gt_dataset) or
((not annotation_file) and (not gt_dataset))):
raise ValueError('One and only one of `annotation_file` and `gt_dataset` '
'needs to be specified.')
if eval_type not in ['box', 'mask']:
raise ValueError('The `eval_type` can only be either `box` or `mask`.')
coco.COCO.__init__(self, annotation_file=annotation_file)
self._eval_type = eval_type
if gt_dataset:
self.dataset = gt_dataset
self.createIndex()
def loadRes(self, predictions):
"""Loads result file and return a result api object.
Args:
predictions: a list of dictionary each representing an annotation in COCO
format. The required fields are `image_id`, `category_id`, `score`,
`bbox`, `segmentation`.
Returns:
res: result COCO api object.
Raises:
ValueError: if the set of image id from predctions is not the subset of
the set of image id of the groundtruth dataset.
"""
res = coco.COCO()
res.dataset['images'] = copy.deepcopy(self.dataset['images'])
res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
image_ids = [ann['image_id'] for ann in predictions]
if set(image_ids) != (set(image_ids) & set(self.getImgIds())):
raise ValueError('Results do not correspond to the current dataset!')
for ann in predictions:
x1, x2, y1, y2 = [ann['bbox'][0], ann['bbox'][0] + ann['bbox'][2],
ann['bbox'][1], ann['bbox'][1] + ann['bbox'][3]]
if self._eval_type == 'box':
ann['area'] = ann['bbox'][2] * ann['bbox'][3]
ann['segmentation'] = [
[x1, y1, x1, y2, x2, y2, x2, y1]]
elif self._eval_type == 'mask':
ann['area'] = mask_api.area(ann['segmentation'])
res.dataset['annotations'] = copy.deepcopy(predictions)
res.createIndex()
return res
def convert_predictions_to_coco_annotations(predictions):
"""Converts a batch of predictions to annotations in COCO format.
Args:
predictions: a dictionary of lists of numpy arrays including the following
fields. K below denotes the maximum number of instances per image.
Required fields:
- source_id: a list of numpy arrays of int or string of shape
[batch_size].
- num_detections: a list of numpy arrays of int of shape [batch_size].
- detection_boxes: a list of numpy arrays of float of shape
[batch_size, K, 4], where coordinates are in the original image
space (not the scaled image space).
- detection_classes: a list of numpy arrays of int of shape
[batch_size, K].
- detection_scores: a list of numpy arrays of float of shape
[batch_size, K].
Optional fields:
- detection_masks: a list of numpy arrays of float of shape
[batch_size, K, mask_height, mask_width].
Returns:
coco_predictions: prediction in COCO annotation format.
"""
coco_predictions = []
num_batches = len(predictions['source_id'])
batch_size = predictions['source_id'][0].shape[0]
max_num_detections = predictions['detection_classes'][0].shape[1]
use_outer_box = 'detection_outer_boxes' in predictions
for i in range(num_batches):
predictions['detection_boxes'][i] = box_ops.yxyx_to_xywh(
predictions['detection_boxes'][i])
if use_outer_box:
predictions['detection_outer_boxes'][i] = box_ops.yxyx_to_xywh(
predictions['detection_outer_boxes'][i])
mask_boxes = predictions['detection_outer_boxes']
else:
mask_boxes = predictions['detection_boxes']
for j in range(batch_size):
if 'detection_masks' in predictions:
image_masks = mask_ops.paste_instance_masks(
predictions['detection_masks'][i][j],
mask_boxes[i][j],
int(predictions['image_info'][i][j, 0, 0]),
int(predictions['image_info'][i][j, 0, 1]))
binary_masks = (image_masks > 0.0).astype(np.uint8)
encoded_masks = [
mask_api.encode(np.asfortranarray(binary_mask))
for binary_mask in list(binary_masks)]
for k in range(max_num_detections):
ann = {}
ann['image_id'] = predictions['source_id'][i][j]
ann['category_id'] = predictions['detection_classes'][i][j, k]
ann['bbox'] = predictions['detection_boxes'][i][j, k]
ann['score'] = predictions['detection_scores'][i][j, k]
if 'detection_masks' in predictions:
ann['segmentation'] = encoded_masks[k]
coco_predictions.append(ann)
for i, ann in enumerate(coco_predictions):
ann['id'] = i + 1
return coco_predictions
def convert_groundtruths_to_coco_dataset(groundtruths, label_map=None):
"""Converts groundtruths to the dataset in COCO format.
Args:
groundtruths: a dictionary of numpy arrays including the fields below.
Note that each element in the list represent the number for a single
example without batch dimension. K below denotes the actual number of
instances for each image.
Required fields:
- source_id: a list of numpy arrays of int or string of shape
[batch_size].
- height: a list of numpy arrays of int of shape [batch_size].
- width: a list of numpy arrays of int of shape [batch_size].
- num_detections: a list of numpy arrays of int of shape [batch_size].
- boxes: a list of numpy arrays of float of shape [batch_size, K, 4],
where coordinates are in the original image space (not the
normalized coordinates).
- classes: a list of numpy arrays of int of shape [batch_size, K].
Optional fields:
- is_crowds: a list of numpy arrays of int of shape [batch_size, K]. If
th field is absent, it is assumed that this instance is not crowd.
- areas: a list of numy arrays of float of shape [batch_size, K]. If the
field is absent, the area is calculated using either boxes or
masks depending on which one is available.
- masks: a list of numpy arrays of string of shape [batch_size, K],
label_map: (optional) a dictionary that defines items from the category id
to the category name. If `None`, collect the category mappping from the
`groundtruths`.
Returns:
coco_groundtruths: the groundtruth dataset in COCO format.
"""
source_ids = np.concatenate(groundtruths['source_id'], axis=0)
heights = np.concatenate(groundtruths['height'], axis=0)
widths = np.concatenate(groundtruths['width'], axis=0)
gt_images = [{'id': int(i), 'height': int(h), 'width': int(w)} for i, h, w
in zip(source_ids, heights, widths)]
gt_annotations = []
num_batches = len(groundtruths['source_id'])
batch_size = groundtruths['source_id'][0].shape[0]
for i in range(num_batches):
for j in range(batch_size):
num_instances = groundtruths['num_detections'][i][j]
for k in range(int(num_instances)):
ann = {}
ann['image_id'] = int(groundtruths['source_id'][i][j])
if 'is_crowds' in groundtruths:
ann['iscrowd'] = int(groundtruths['is_crowds'][i][j, k])
else:
ann['iscrowd'] = 0
ann['category_id'] = int(groundtruths['classes'][i][j, k])
boxes = groundtruths['boxes'][i]
ann['bbox'] = [
float(boxes[j, k, 1]),
float(boxes[j, k, 0]),
float(boxes[j, k, 3] - boxes[j, k, 1]),
float(boxes[j, k, 2] - boxes[j, k, 0])]
if 'areas' in groundtruths:
ann['area'] = float(groundtruths['areas'][i][j, k])
else:
ann['area'] = float(
(boxes[j, k, 3] - boxes[j, k, 1]) *
(boxes[j, k, 2] - boxes[j, k, 0]))
if 'masks' in groundtruths:
mask = Image.open(six.BytesIO(groundtruths['masks'][i][j, k]))
width, height = mask.size
np_mask = (
np.array(mask.getdata()).reshape(height, width).astype(np.uint8))
np_mask[np_mask > 0] = 255
encoded_mask = mask_api.encode(np.asfortranarray(np_mask))
ann['segmentation'] = encoded_mask
if 'areas' not in groundtruths:
ann['area'] = mask_api.area(encoded_mask)
gt_annotations.append(ann)
for i, ann in enumerate(gt_annotations):
ann['id'] = i + 1
if label_map:
gt_categories = [{'id': i, 'name': label_map[i]} for i in label_map]
else:
category_ids = [gt['category_id'] for gt in gt_annotations]
gt_categories = [{'id': i} for i in set(category_ids)]
gt_dataset = {
'images': gt_images,
'categories': gt_categories,
'annotations': copy.deepcopy(gt_annotations),
}
return gt_dataset
class COCOGroundtruthGenerator:
"""Generates the groundtruth annotations from a single example."""
def __init__(self, file_pattern, num_examples, include_mask):
self._file_pattern = file_pattern
self._num_examples = num_examples
self._include_mask = include_mask
self._dataset_fn = tf.data.TFRecordDataset
def _parse_single_example(self, example):
"""Parses a single serialized tf.Example proto.
Args:
example: a serialized tf.Example proto string.
Returns:
A dictionary of groundtruth with the following fields:
source_id: a scalar tensor of int64 representing the image source_id.
height: a scalar tensor of int64 representing the image height.
width: a scalar tensor of int64 representing the image width.
boxes: a float tensor of shape [K, 4], representing the groundtruth
boxes in absolute coordinates with respect to the original image size.
classes: a int64 tensor of shape [K], representing the class labels of
each instances.
is_crowds: a bool tensor of shape [K], indicating whether the instance
is crowd.
areas: a float tensor of shape [K], indicating the area of each
instance.
masks: a string tensor of shape [K], containing the bytes of the png
mask of each instance.
"""
decoder = tf_example_decoder.TfExampleDecoder(
include_mask=self._include_mask)
decoded_tensors = decoder.decode(example)
image = decoded_tensors['image']
image_size = tf.shape(image)[0:2]
boxes = box_ops.denormalize_boxes(
decoded_tensors['groundtruth_boxes'], image_size)
groundtruths = {
'source_id': tf.string_to_number(
decoded_tensors['source_id'], out_type=tf.int64),
'height': decoded_tensors['height'],
'width': decoded_tensors['width'],
'num_detections': tf.shape(decoded_tensors['groundtruth_classes'])[0],
'boxes': boxes,
'classes': decoded_tensors['groundtruth_classes'],
'is_crowds': decoded_tensors['groundtruth_is_crowd'],
'areas': decoded_tensors['groundtruth_area'],
}
if self._include_mask:
groundtruths.update({
'masks': decoded_tensors['groundtruth_instance_masks_png'],
})
return groundtruths
def _build_pipeline(self):
"""Builds data pipeline to generate groundtruth annotations."""
dataset = tf.data.Dataset.list_files(self._file_pattern, shuffle=False)
dataset = dataset.interleave(
map_func=lambda filename: self._dataset_fn(filename).prefetch(1),
cycle_length=12,
num_parallel_calls=tf.data.experimental.AUTOTUNE)
dataset = dataset.map(self._parse_single_example,
num_parallel_calls=tf.data.experimental.AUTOTUNE)
dataset = dataset.batch(1, drop_remainder=False)
dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
return dataset
def __call__(self):
for groundtruth_result in self._build_pipeline():
yield groundtruth_result
def scan_and_generator_annotation_file(file_pattern: str,
num_samples: int,
include_mask: bool,
annotation_file: str):
"""Scans and generate the COCO-style annotation JSON file given a dataset."""
groundtruth_generator = COCOGroundtruthGenerator(
file_pattern, num_samples, include_mask)
generate_annotation_file(groundtruth_generator, annotation_file)
def generate_annotation_file(groundtruth_generator,
annotation_file):
"""Generates COCO-style annotation JSON file given a groundtruth generator."""
groundtruths = {}
logging.info('Loading groundtruth annotations from dataset to memory...')
for groundtruth in groundtruth_generator():
for k, v in six.iteritems(groundtruth):
if k not in groundtruths:
groundtruths[k] = [v]
else:
groundtruths[k].append(v)
gt_dataset = convert_groundtruths_to_coco_dataset(groundtruths)
logging.info('Saving groundtruth annotations to the JSON file...')
with tf.io.gfile.GFile(annotation_file, 'w') as f:
f.write(json.dumps(gt_dataset))
logging.info('Done saving the JSON file...')
This diff is collapsed.
This diff is collapsed.
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Backbones package definition."""
from official.vision.beta.modeling.backbones.efficientnet import EfficientNet
from official.vision.beta.modeling.backbones.resnet import ResNet
from official.vision.beta.modeling.backbones.resnet_3d import ResNet3D
from official.vision.beta.modeling.backbones.revnet import RevNet
from official.vision.beta.modeling.backbones.spinenet import SpineNet
This diff is collapsed.
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for EfficientNet."""
# Import libraries
from absl.testing import parameterized
import tensorflow as tf
from official.vision.beta.modeling.backbones import efficientnet
class EfficientNetTest(parameterized.TestCase, tf.test.TestCase):
@parameterized.parameters(32, 224)
def test_network_creation(self, input_size):
"""Test creation of EfficientNet family models."""
tf.keras.backend.set_image_data_format('channels_last')
network = efficientnet.EfficientNet(model_id='b0')
inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
endpoints = network(inputs)
self.assertAllEqual([1, input_size / 2**2, input_size / 2**2, 24],
endpoints[2].shape.as_list())
self.assertAllEqual([1, input_size / 2**3, input_size / 2**3, 40],
endpoints[3].shape.as_list())
self.assertAllEqual([1, input_size / 2**4, input_size / 2**4, 112],
endpoints[4].shape.as_list())
self.assertAllEqual([1, input_size / 2**5, input_size / 2**5, 320],
endpoints[5].shape.as_list())
@parameterized.parameters('b0', 'b3', 'b6')
def test_network_scaling(self, model_id):
"""Test compound scaling."""
efficientnet_params = {
'b0': 4049564,
'b3': 10783528,
'b6': 40960136,
}
tf.keras.backend.set_image_data_format('channels_last')
input_size = 32
network = efficientnet.EfficientNet(model_id=model_id, se_ratio=0.25)
self.assertEqual(network.count_params(), efficientnet_params[model_id])
inputs = tf.keras.Input(shape=(input_size, input_size, 3), batch_size=1)
_ = network(inputs)
@parameterized.parameters(1, 3)
def test_input_specs(self, input_dim):
"""Test different input feature dimensions."""
tf.keras.backend.set_image_data_format('channels_last')
input_specs = tf.keras.layers.InputSpec(shape=[None, None, None, input_dim])
network = efficientnet.EfficientNet(model_id='b0', input_specs=input_specs)
inputs = tf.keras.Input(shape=(128, 128, input_dim), batch_size=1)
_ = network(inputs)
def test_serialize_deserialize(self):
# Create a network object that sets all of its config options.
kwargs = dict(
model_id='b0',
se_ratio=0.25,
stochastic_depth_drop_rate=None,
use_sync_bn=False,
kernel_initializer='VarianceScaling',
kernel_regularizer=None,
bias_regularizer=None,
activation='relu',
norm_momentum=0.99,
norm_epsilon=0.001,
)
network = efficientnet.EfficientNet(**kwargs)
expected_config = dict(kwargs)
self.assertEqual(network.get_config(), expected_config)
# Create another network object from the first object's config.
new_network = efficientnet.EfficientNet.from_config(network.get_config())
# Validate that the config can be forced to JSON.
_ = new_network.to_json()
# If the serialization was successful, the new config should match the old.
self.assertAllEqual(network.get_config(), new_network.get_config())
if __name__ == '__main__':
tf.test.main()
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment