Unverified Commit 0225b135 authored by Srihari Humbarwadi's avatar Srihari Humbarwadi Committed by GitHub
Browse files

Merge branch 'tensorflow:master' into panoptic-deeplab-modeling

parents 7479dbb8 4c571a3c
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The COCO-style evaluator.
The following snippet demonstrates the use of interfaces:
evaluator = COCOEvaluator(...)
for _ in range(num_evals):
for _ in range(num_batches_per_eval):
predictions, groundtruth = predictor.predict(...) # pop a batch.
evaluator.update_state(groundtruths, predictions)
evaluator.result() # finish one full eval and reset states.
See also: https://github.com/cocodataset/cocoapi/
"""
import atexit
import tempfile
# Import libraries
from absl import logging
import numpy as np
from pycocotools import cocoeval
import six
import tensorflow as tf
from official.vision.evaluation import coco_utils
class COCOEvaluator(object):
"""COCO evaluation metric class."""
def __init__(self,
annotation_file,
include_mask,
need_rescale_bboxes=True,
per_category_metrics=False):
"""Constructs COCO evaluation class.
The class provides the interface to COCO metrics_fn. The
_update_op() takes detections from each image and push them to
self.detections. The _evaluate() loads a JSON file in COCO annotation format
as the groundtruths and runs COCO evaluation.
Args:
annotation_file: a JSON file that stores annotations of the eval dataset.
If `annotation_file` is None, groundtruth annotations will be loaded
from the dataloader.
include_mask: a boolean to indicate whether or not to include the mask
eval.
need_rescale_bboxes: If true bboxes in `predictions` will be rescaled back
to absolute values (`image_info` is needed in this case).
per_category_metrics: Whether to return per category metrics.
"""
if annotation_file:
if annotation_file.startswith('gs://'):
_, local_val_json = tempfile.mkstemp(suffix='.json')
tf.io.gfile.remove(local_val_json)
tf.io.gfile.copy(annotation_file, local_val_json)
atexit.register(tf.io.gfile.remove, local_val_json)
else:
local_val_json = annotation_file
self._coco_gt = coco_utils.COCOWrapper(
eval_type=('mask' if include_mask else 'box'),
annotation_file=local_val_json)
self._annotation_file = annotation_file
self._include_mask = include_mask
self._per_category_metrics = per_category_metrics
self._metric_names = [
'AP', 'AP50', 'AP75', 'APs', 'APm', 'APl', 'ARmax1', 'ARmax10',
'ARmax100', 'ARs', 'ARm', 'ARl'
]
self._required_prediction_fields = [
'source_id', 'num_detections', 'detection_classes', 'detection_scores',
'detection_boxes'
]
self._need_rescale_bboxes = need_rescale_bboxes
if self._need_rescale_bboxes:
self._required_prediction_fields.append('image_info')
self._required_groundtruth_fields = [
'source_id', 'height', 'width', 'classes', 'boxes'
]
if self._include_mask:
mask_metric_names = ['mask_' + x for x in self._metric_names]
self._metric_names.extend(mask_metric_names)
self._required_prediction_fields.extend(['detection_masks'])
self._required_groundtruth_fields.extend(['masks'])
self.reset_states()
@property
def name(self):
return 'coco_metric'
def reset_states(self):
"""Resets internal states for a fresh run."""
self._predictions = {}
if not self._annotation_file:
self._groundtruths = {}
def result(self):
"""Evaluates detection results, and reset_states."""
metric_dict = self.evaluate()
# Cleans up the internal variables in order for a fresh eval next time.
self.reset_states()
return metric_dict
def evaluate(self):
"""Evaluates with detections from all images with COCO API.
Returns:
coco_metric: float numpy array with shape [24] representing the
coco-style evaluation metrics (box and mask).
"""
if not self._annotation_file:
logging.info('There is no annotation_file in COCOEvaluator.')
gt_dataset = coco_utils.convert_groundtruths_to_coco_dataset(
self._groundtruths)
coco_gt = coco_utils.COCOWrapper(
eval_type=('mask' if self._include_mask else 'box'),
gt_dataset=gt_dataset)
else:
logging.info('Using annotation file: %s', self._annotation_file)
coco_gt = self._coco_gt
coco_predictions = coco_utils.convert_predictions_to_coco_annotations(
self._predictions)
coco_dt = coco_gt.loadRes(predictions=coco_predictions)
image_ids = [ann['image_id'] for ann in coco_predictions]
coco_eval = cocoeval.COCOeval(coco_gt, coco_dt, iouType='bbox')
coco_eval.params.imgIds = image_ids
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()
coco_metrics = coco_eval.stats
if self._include_mask:
mcoco_eval = cocoeval.COCOeval(coco_gt, coco_dt, iouType='segm')
mcoco_eval.params.imgIds = image_ids
mcoco_eval.evaluate()
mcoco_eval.accumulate()
mcoco_eval.summarize()
mask_coco_metrics = mcoco_eval.stats
if self._include_mask:
metrics = np.hstack((coco_metrics, mask_coco_metrics))
else:
metrics = coco_metrics
metrics_dict = {}
for i, name in enumerate(self._metric_names):
metrics_dict[name] = metrics[i].astype(np.float32)
# Adds metrics per category.
if self._per_category_metrics:
metrics_dict.update(self._retrieve_per_category_metrics(coco_eval))
if self._include_mask:
metrics_dict.update(self._retrieve_per_category_metrics(
mcoco_eval, prefix='mask'))
return metrics_dict
def _retrieve_per_category_metrics(self, coco_eval, prefix=''):
"""Retrieves and per-category metrics and retuns them in a dict.
Args:
coco_eval: a cocoeval.COCOeval object containing evaluation data.
prefix: str, A string used to prefix metric names.
Returns:
metrics_dict: A dictionary with per category metrics.
"""
metrics_dict = {}
if prefix:
prefix = prefix + ' '
if hasattr(coco_eval, 'category_stats'):
for category_index, category_id in enumerate(coco_eval.params.catIds):
if self._annotation_file:
coco_category = self._coco_gt.cats[category_id]
# if 'name' is available use it, otherwise use `id`
category_display_name = coco_category.get('name', category_id)
else:
category_display_name = category_id
metrics_dict[prefix + 'Precision mAP ByCategory/{}'.format(
category_display_name
)] = coco_eval.category_stats[0][category_index].astype(np.float32)
metrics_dict[prefix + 'Precision mAP ByCategory@50IoU/{}'.format(
category_display_name
)] = coco_eval.category_stats[1][category_index].astype(np.float32)
metrics_dict[prefix + 'Precision mAP ByCategory@75IoU/{}'.format(
category_display_name
)] = coco_eval.category_stats[2][category_index].astype(np.float32)
metrics_dict[prefix + 'Precision mAP ByCategory (small) /{}'.format(
category_display_name
)] = coco_eval.category_stats[3][category_index].astype(np.float32)
metrics_dict[prefix + 'Precision mAP ByCategory (medium) /{}'.format(
category_display_name
)] = coco_eval.category_stats[4][category_index].astype(np.float32)
metrics_dict[prefix + 'Precision mAP ByCategory (large) /{}'.format(
category_display_name
)] = coco_eval.category_stats[5][category_index].astype(np.float32)
metrics_dict[prefix + 'Recall AR@1 ByCategory/{}'.format(
category_display_name
)] = coco_eval.category_stats[6][category_index].astype(np.float32)
metrics_dict[prefix + 'Recall AR@10 ByCategory/{}'.format(
category_display_name
)] = coco_eval.category_stats[7][category_index].astype(np.float32)
metrics_dict[prefix + 'Recall AR@100 ByCategory/{}'.format(
category_display_name
)] = coco_eval.category_stats[8][category_index].astype(np.float32)
metrics_dict[prefix + 'Recall AR (small) ByCategory/{}'.format(
category_display_name
)] = coco_eval.category_stats[9][category_index].astype(np.float32)
metrics_dict[prefix + 'Recall AR (medium) ByCategory/{}'.format(
category_display_name
)] = coco_eval.category_stats[10][category_index].astype(np.float32)
metrics_dict[prefix + 'Recall AR (large) ByCategory/{}'.format(
category_display_name
)] = coco_eval.category_stats[11][category_index].astype(np.float32)
return metrics_dict
def _process_predictions(self, predictions):
image_scale = np.tile(predictions['image_info'][:, 2:3, :], (1, 1, 2))
predictions['detection_boxes'] = (
predictions['detection_boxes'].astype(np.float32))
predictions['detection_boxes'] /= image_scale
if 'detection_outer_boxes' in predictions:
predictions['detection_outer_boxes'] = (
predictions['detection_outer_boxes'].astype(np.float32))
predictions['detection_outer_boxes'] /= image_scale
def _convert_to_numpy(self, groundtruths, predictions):
"""Converts tesnors to numpy arrays."""
if groundtruths:
labels = tf.nest.map_structure(lambda x: x.numpy(), groundtruths)
numpy_groundtruths = {}
for key, val in labels.items():
if isinstance(val, tuple):
val = np.concatenate(val)
numpy_groundtruths[key] = val
else:
numpy_groundtruths = groundtruths
if predictions:
outputs = tf.nest.map_structure(lambda x: x.numpy(), predictions)
numpy_predictions = {}
for key, val in outputs.items():
if isinstance(val, tuple):
val = np.concatenate(val)
numpy_predictions[key] = val
else:
numpy_predictions = predictions
return numpy_groundtruths, numpy_predictions
def update_state(self, groundtruths, predictions):
"""Update and aggregate detection results and groundtruth data.
Args:
groundtruths: a dictionary of Tensors including the fields below.
See also different parsers under `../dataloader` for more details.
Required fields:
- source_id: a numpy array of int or string of shape [batch_size].
- height: a numpy array of int of shape [batch_size].
- width: a numpy array of int of shape [batch_size].
- num_detections: a numpy array of int of shape [batch_size].
- boxes: a numpy array of float of shape [batch_size, K, 4].
- classes: a numpy array of int of shape [batch_size, K].
Optional fields:
- is_crowds: a numpy array of int of shape [batch_size, K]. If the
field is absent, it is assumed that this instance is not crowd.
- areas: a numy array of float of shape [batch_size, K]. If the
field is absent, the area is calculated using either boxes or
masks depending on which one is available.
- masks: a numpy array of float of shape
[batch_size, K, mask_height, mask_width],
predictions: a dictionary of tensors including the fields below.
See different parsers under `../dataloader` for more details.
Required fields:
- source_id: a numpy array of int or string of shape [batch_size].
- image_info [if `need_rescale_bboxes` is True]: a numpy array of
float of shape [batch_size, 4, 2].
- num_detections: a numpy array of
int of shape [batch_size].
- detection_boxes: a numpy array of float of shape [batch_size, K, 4].
- detection_classes: a numpy array of int of shape [batch_size, K].
- detection_scores: a numpy array of float of shape [batch_size, K].
Optional fields:
- detection_masks: a numpy array of float of shape
[batch_size, K, mask_height, mask_width].
Raises:
ValueError: if the required prediction or groundtruth fields are not
present in the incoming `predictions` or `groundtruths`.
"""
groundtruths, predictions = self._convert_to_numpy(groundtruths,
predictions)
for k in self._required_prediction_fields:
if k not in predictions:
raise ValueError(
'Missing the required key `{}` in predictions!'.format(k))
if self._need_rescale_bboxes:
self._process_predictions(predictions)
for k, v in six.iteritems(predictions):
if k not in self._predictions:
self._predictions[k] = [v]
else:
self._predictions[k].append(v)
if not self._annotation_file:
assert groundtruths
for k in self._required_groundtruth_fields:
if k not in groundtruths:
raise ValueError(
'Missing the required key `{}` in groundtruths!'.format(k))
for k, v in six.iteritems(groundtruths):
if k not in self._groundtruths:
self._groundtruths[k] = [v]
else:
self._groundtruths[k].append(v)
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Util functions related to pycocotools and COCO eval."""
import copy
import json
# Import libraries
from absl import logging
import numpy as np
from PIL import Image
from pycocotools import coco
from pycocotools import mask as mask_api
import six
import tensorflow as tf
from official.common import dataset_fn
from official.vision.dataloaders import tf_example_decoder
from official.vision.ops import box_ops
from official.vision.ops import mask_ops
class COCOWrapper(coco.COCO):
"""COCO wrapper class.
This class wraps COCO API object, which provides the following additional
functionalities:
1. Support string type image id.
2. Support loading the groundtruth dataset using the external annotation
dictionary.
3. Support loading the prediction results using the external annotation
dictionary.
"""
def __init__(self, eval_type='box', annotation_file=None, gt_dataset=None):
"""Instantiates a COCO-style API object.
Args:
eval_type: either 'box' or 'mask'.
annotation_file: a JSON file that stores annotations of the eval dataset.
This is required if `gt_dataset` is not provided.
gt_dataset: the groundtruth eval datatset in COCO API format.
"""
if ((annotation_file and gt_dataset) or
((not annotation_file) and (not gt_dataset))):
raise ValueError('One and only one of `annotation_file` and `gt_dataset` '
'needs to be specified.')
if eval_type not in ['box', 'mask']:
raise ValueError('The `eval_type` can only be either `box` or `mask`.')
coco.COCO.__init__(self, annotation_file=annotation_file)
self._eval_type = eval_type
if gt_dataset:
self.dataset = gt_dataset
self.createIndex()
def loadRes(self, predictions):
"""Loads result file and return a result api object.
Args:
predictions: a list of dictionary each representing an annotation in COCO
format. The required fields are `image_id`, `category_id`, `score`,
`bbox`, `segmentation`.
Returns:
res: result COCO api object.
Raises:
ValueError: if the set of image id from predctions is not the subset of
the set of image id of the groundtruth dataset.
"""
res = coco.COCO()
res.dataset['images'] = copy.deepcopy(self.dataset['images'])
res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
image_ids = [ann['image_id'] for ann in predictions]
if set(image_ids) != (set(image_ids) & set(self.getImgIds())):
raise ValueError('Results do not correspond to the current dataset!')
for ann in predictions:
x1, x2, y1, y2 = [ann['bbox'][0], ann['bbox'][0] + ann['bbox'][2],
ann['bbox'][1], ann['bbox'][1] + ann['bbox'][3]]
if self._eval_type == 'box':
ann['area'] = ann['bbox'][2] * ann['bbox'][3]
ann['segmentation'] = [
[x1, y1, x1, y2, x2, y2, x2, y1]]
elif self._eval_type == 'mask':
ann['area'] = mask_api.area(ann['segmentation'])
res.dataset['annotations'] = copy.deepcopy(predictions)
res.createIndex()
return res
def convert_predictions_to_coco_annotations(predictions):
"""Converts a batch of predictions to annotations in COCO format.
Args:
predictions: a dictionary of lists of numpy arrays including the following
fields. K below denotes the maximum number of instances per image.
Required fields:
- source_id: a list of numpy arrays of int or string of shape
[batch_size].
- num_detections: a list of numpy arrays of int of shape [batch_size].
- detection_boxes: a list of numpy arrays of float of shape
[batch_size, K, 4], where coordinates are in the original image
space (not the scaled image space).
- detection_classes: a list of numpy arrays of int of shape
[batch_size, K].
- detection_scores: a list of numpy arrays of float of shape
[batch_size, K].
Optional fields:
- detection_masks: a list of numpy arrays of float of shape
[batch_size, K, mask_height, mask_width].
Returns:
coco_predictions: prediction in COCO annotation format.
"""
coco_predictions = []
num_batches = len(predictions['source_id'])
max_num_detections = predictions['detection_classes'][0].shape[1]
use_outer_box = 'detection_outer_boxes' in predictions
for i in range(num_batches):
predictions['detection_boxes'][i] = box_ops.yxyx_to_xywh(
predictions['detection_boxes'][i])
if use_outer_box:
predictions['detection_outer_boxes'][i] = box_ops.yxyx_to_xywh(
predictions['detection_outer_boxes'][i])
mask_boxes = predictions['detection_outer_boxes']
else:
mask_boxes = predictions['detection_boxes']
batch_size = predictions['source_id'][i].shape[0]
for j in range(batch_size):
if 'detection_masks' in predictions:
image_masks = mask_ops.paste_instance_masks(
predictions['detection_masks'][i][j],
mask_boxes[i][j],
int(predictions['image_info'][i][j, 0, 0]),
int(predictions['image_info'][i][j, 0, 1]))
binary_masks = (image_masks > 0.0).astype(np.uint8)
encoded_masks = [
mask_api.encode(np.asfortranarray(binary_mask))
for binary_mask in list(binary_masks)]
for k in range(max_num_detections):
ann = {}
ann['image_id'] = predictions['source_id'][i][j]
ann['category_id'] = predictions['detection_classes'][i][j, k]
ann['bbox'] = predictions['detection_boxes'][i][j, k]
ann['score'] = predictions['detection_scores'][i][j, k]
if 'detection_masks' in predictions:
ann['segmentation'] = encoded_masks[k]
coco_predictions.append(ann)
for i, ann in enumerate(coco_predictions):
ann['id'] = i + 1
return coco_predictions
def convert_groundtruths_to_coco_dataset(groundtruths, label_map=None):
"""Converts groundtruths to the dataset in COCO format.
Args:
groundtruths: a dictionary of numpy arrays including the fields below.
Note that each element in the list represent the number for a single
example without batch dimension. K below denotes the actual number of
instances for each image.
Required fields:
- source_id: a list of numpy arrays of int or string of shape
[batch_size].
- height: a list of numpy arrays of int of shape [batch_size].
- width: a list of numpy arrays of int of shape [batch_size].
- num_detections: a list of numpy arrays of int of shape [batch_size].
- boxes: a list of numpy arrays of float of shape [batch_size, K, 4],
where coordinates are in the original image space (not the
normalized coordinates).
- classes: a list of numpy arrays of int of shape [batch_size, K].
Optional fields:
- is_crowds: a list of numpy arrays of int of shape [batch_size, K]. If
th field is absent, it is assumed that this instance is not crowd.
- areas: a list of numy arrays of float of shape [batch_size, K]. If the
field is absent, the area is calculated using either boxes or
masks depending on which one is available.
- masks: a list of numpy arrays of string of shape [batch_size, K],
label_map: (optional) a dictionary that defines items from the category id
to the category name. If `None`, collect the category mappping from the
`groundtruths`.
Returns:
coco_groundtruths: the groundtruth dataset in COCO format.
"""
source_ids = np.concatenate(groundtruths['source_id'], axis=0)
heights = np.concatenate(groundtruths['height'], axis=0)
widths = np.concatenate(groundtruths['width'], axis=0)
gt_images = [{'id': int(i), 'height': int(h), 'width': int(w)} for i, h, w
in zip(source_ids, heights, widths)]
gt_annotations = []
num_batches = len(groundtruths['source_id'])
for i in range(num_batches):
logging.info(
'convert_groundtruths_to_coco_dataset: Processing annotation %d', i)
max_num_instances = groundtruths['classes'][i].shape[1]
batch_size = groundtruths['source_id'][i].shape[0]
for j in range(batch_size):
num_instances = groundtruths['num_detections'][i][j]
if num_instances > max_num_instances:
logging.warning(
'num_groundtruths is larger than max_num_instances, %d v.s. %d',
num_instances, max_num_instances)
num_instances = max_num_instances
for k in range(int(num_instances)):
ann = {}
ann['image_id'] = int(groundtruths['source_id'][i][j])
if 'is_crowds' in groundtruths:
ann['iscrowd'] = int(groundtruths['is_crowds'][i][j, k])
else:
ann['iscrowd'] = 0
ann['category_id'] = int(groundtruths['classes'][i][j, k])
boxes = groundtruths['boxes'][i]
ann['bbox'] = [
float(boxes[j, k, 1]),
float(boxes[j, k, 0]),
float(boxes[j, k, 3] - boxes[j, k, 1]),
float(boxes[j, k, 2] - boxes[j, k, 0])]
if 'areas' in groundtruths:
ann['area'] = float(groundtruths['areas'][i][j, k])
else:
ann['area'] = float(
(boxes[j, k, 3] - boxes[j, k, 1]) *
(boxes[j, k, 2] - boxes[j, k, 0]))
if 'masks' in groundtruths:
if isinstance(groundtruths['masks'][i][j, k], tf.Tensor):
mask = Image.open(
six.BytesIO(groundtruths['masks'][i][j, k].numpy()))
width, height = mask.size
np_mask = (
np.array(mask.getdata()).reshape(height,
width).astype(np.uint8))
else:
mask = Image.open(
six.BytesIO(groundtruths['masks'][i][j, k]))
width, height = mask.size
np_mask = (
np.array(mask.getdata()).reshape(height,
width).astype(np.uint8))
np_mask[np_mask > 0] = 255
encoded_mask = mask_api.encode(np.asfortranarray(np_mask))
ann['segmentation'] = encoded_mask
# Ensure the content of `counts` is JSON serializable string.
if 'counts' in ann['segmentation']:
ann['segmentation']['counts'] = six.ensure_str(
ann['segmentation']['counts'])
if 'areas' not in groundtruths:
ann['area'] = mask_api.area(encoded_mask)
gt_annotations.append(ann)
for i, ann in enumerate(gt_annotations):
ann['id'] = i + 1
if label_map:
gt_categories = [{'id': i, 'name': label_map[i]} for i in label_map]
else:
category_ids = [gt['category_id'] for gt in gt_annotations]
gt_categories = [{'id': i} for i in set(category_ids)]
gt_dataset = {
'images': gt_images,
'categories': gt_categories,
'annotations': copy.deepcopy(gt_annotations),
}
return gt_dataset
class COCOGroundtruthGenerator:
"""Generates the groundtruth annotations from a single example."""
def __init__(self, file_pattern, file_type, num_examples, include_mask,
regenerate_source_id=False):
self._file_pattern = file_pattern
self._num_examples = num_examples
self._include_mask = include_mask
self._dataset_fn = dataset_fn.pick_dataset_fn(file_type)
self._regenerate_source_id = regenerate_source_id
def _parse_single_example(self, example):
"""Parses a single serialized tf.Example proto.
Args:
example: a serialized tf.Example proto string.
Returns:
A dictionary of groundtruth with the following fields:
source_id: a scalar tensor of int64 representing the image source_id.
height: a scalar tensor of int64 representing the image height.
width: a scalar tensor of int64 representing the image width.
boxes: a float tensor of shape [K, 4], representing the groundtruth
boxes in absolute coordinates with respect to the original image size.
classes: a int64 tensor of shape [K], representing the class labels of
each instances.
is_crowds: a bool tensor of shape [K], indicating whether the instance
is crowd.
areas: a float tensor of shape [K], indicating the area of each
instance.
masks: a string tensor of shape [K], containing the bytes of the png
mask of each instance.
"""
decoder = tf_example_decoder.TfExampleDecoder(
include_mask=self._include_mask,
regenerate_source_id=self._regenerate_source_id)
decoded_tensors = decoder.decode(example)
image = decoded_tensors['image']
image_size = tf.shape(image)[0:2]
boxes = box_ops.denormalize_boxes(
decoded_tensors['groundtruth_boxes'], image_size)
source_id = decoded_tensors['source_id']
if source_id.dtype is tf.string:
source_id = tf.strings.to_number(source_id, out_type=tf.int64)
groundtruths = {
'source_id': source_id,
'height': decoded_tensors['height'],
'width': decoded_tensors['width'],
'num_detections': tf.shape(decoded_tensors['groundtruth_classes'])[0],
'boxes': boxes,
'classes': decoded_tensors['groundtruth_classes'],
'is_crowds': decoded_tensors['groundtruth_is_crowd'],
'areas': decoded_tensors['groundtruth_area'],
}
if self._include_mask:
groundtruths.update({
'masks': decoded_tensors['groundtruth_instance_masks_png'],
})
return groundtruths
def _build_pipeline(self):
"""Builds data pipeline to generate groundtruth annotations."""
dataset = tf.data.Dataset.list_files(self._file_pattern, shuffle=False)
dataset = dataset.interleave(
map_func=lambda filename: self._dataset_fn(filename).prefetch(1),
cycle_length=None,
num_parallel_calls=tf.data.experimental.AUTOTUNE)
dataset = dataset.take(self._num_examples)
dataset = dataset.map(self._parse_single_example,
num_parallel_calls=tf.data.experimental.AUTOTUNE)
dataset = dataset.batch(1, drop_remainder=False)
dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
return dataset
def __call__(self):
return self._build_pipeline()
def scan_and_generator_annotation_file(file_pattern: str,
file_type: str,
num_samples: int,
include_mask: bool,
annotation_file: str,
regenerate_source_id: bool = False):
"""Scans and generate the COCO-style annotation JSON file given a dataset."""
groundtruth_generator = COCOGroundtruthGenerator(
file_pattern, file_type, num_samples, include_mask, regenerate_source_id)
generate_annotation_file(groundtruth_generator, annotation_file)
def generate_annotation_file(groundtruth_generator,
annotation_file):
"""Generates COCO-style annotation JSON file given a groundtruth generator."""
groundtruths = {}
logging.info('Loading groundtruth annotations from dataset to memory...')
for i, groundtruth in enumerate(groundtruth_generator()):
logging.info('generate_annotation_file: Processing annotation %d', i)
for k, v in six.iteritems(groundtruth):
if k not in groundtruths:
groundtruths[k] = [v]
else:
groundtruths[k].append(v)
gt_dataset = convert_groundtruths_to_coco_dataset(groundtruths)
logging.info('Saving groundtruth annotations to the JSON file...')
with tf.io.gfile.GFile(annotation_file, 'w') as f:
f.write(json.dumps(gt_dataset))
logging.info('Done saving the JSON file...')
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for coco_utils."""
import os
import tensorflow as tf
from official.vision.dataloaders import tfexample_utils
from official.vision.evaluation import coco_utils
class CocoUtilsTest(tf.test.TestCase):
def test_scan_and_generator_annotation_file(self):
num_samples = 10
example = tfexample_utils.create_detection_test_example(
image_height=512, image_width=512, image_channel=3, num_instances=10)
tf_examples = [example] * num_samples
data_file = os.path.join(self.create_tempdir(), 'test.tfrecord')
tfexample_utils.dump_to_tfrecord(
record_file=data_file, tf_examples=tf_examples)
annotation_file = os.path.join(self.create_tempdir(), 'annotation.json')
coco_utils.scan_and_generator_annotation_file(
file_pattern=data_file,
file_type='tfrecord',
num_samples=num_samples,
include_mask=True,
annotation_file=annotation_file)
self.assertTrue(
tf.io.gfile.exists(annotation_file),
msg='Annotation file {annotation_file} does not exists.')
if __name__ == '__main__':
tf.test.main()
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""IOU Metrics used for semantic segmentation models."""
import numpy as np
import tensorflow as tf
class PerClassIoU(tf.keras.metrics.Metric):
"""Computes the per-class Intersection-Over-Union metric.
Mean Intersection-Over-Union is a common evaluation metric for semantic image
segmentation, which first computes the IOU for each semantic class.
IOU is defined as follows:
IOU = true_positive / (true_positive + false_positive + false_negative).
The predictions are accumulated in a confusion matrix, weighted by
`sample_weight` and the metric is then calculated from it.
If `sample_weight` is `None`, weights default to 1.
Use `sample_weight` of 0 to mask values.
Example:
>>> # cm = [[1, 1],
>>> # [1, 1]]
>>> # sum_row = [2, 2], sum_col = [2, 2], true_positives = [1, 1]
>>> # iou = true_positives / (sum_row + sum_col - true_positives))
>>> # result = [(1 / (2 + 2 - 1), 1 / (2 + 2 - 1)] = 0.33
>>> m = tf.keras.metrics.MeanIoU(num_classes=2)
>>> m.update_state([0, 0, 1, 1], [0, 1, 0, 1])
>>> m.result().numpy()
[0.33333334, 0.33333334]
"""
def __init__(self, num_classes, name=None, dtype=None):
"""Initializes `PerClassIoU`.
Args:
num_classes: The possible number of labels the prediction task can have.
This value must be provided, since a confusion matrix of dimension =
[num_classes, num_classes] will be allocated.
name: (Optional) string name of the metric instance.
dtype: (Optional) data type of the metric result.
"""
super(PerClassIoU, self).__init__(name=name, dtype=dtype)
self.num_classes = num_classes
# Variable to accumulate the predictions in the confusion matrix.
self.total_cm = self.add_weight(
'total_confusion_matrix',
shape=(num_classes, num_classes),
initializer=tf.compat.v1.zeros_initializer)
def update_state(self, y_true, y_pred, sample_weight=None):
"""Accumulates the confusion matrix statistics.
Args:
y_true: The ground truth values.
y_pred: The predicted values.
sample_weight: Optional weighting of each example. Defaults to 1. Can be a
`Tensor` whose rank is either 0, or the same rank as `y_true`, and must
be broadcastable to `y_true`.
Returns:
IOU per class.
"""
y_true = tf.cast(y_true, self._dtype)
y_pred = tf.cast(y_pred, self._dtype)
# Flatten the input if its rank > 1.
if y_pred.shape.ndims > 1:
y_pred = tf.reshape(y_pred, [-1])
if y_true.shape.ndims > 1:
y_true = tf.reshape(y_true, [-1])
if sample_weight is not None:
sample_weight = tf.cast(sample_weight, self._dtype)
if sample_weight.shape.ndims > 1:
sample_weight = tf.reshape(sample_weight, [-1])
# Accumulate the prediction to current confusion matrix.
current_cm = tf.math.confusion_matrix(
y_true,
y_pred,
self.num_classes,
weights=sample_weight,
dtype=self._dtype)
return self.total_cm.assign_add(current_cm)
def result(self):
"""Compute the mean intersection-over-union via the confusion matrix."""
sum_over_row = tf.cast(
tf.reduce_sum(self.total_cm, axis=0), dtype=self._dtype)
sum_over_col = tf.cast(
tf.reduce_sum(self.total_cm, axis=1), dtype=self._dtype)
true_positives = tf.cast(
tf.linalg.tensor_diag_part(self.total_cm), dtype=self._dtype)
# sum_over_row + sum_over_col =
# 2 * true_positives + false_positives + false_negatives.
denominator = sum_over_row + sum_over_col - true_positives
return tf.math.divide_no_nan(true_positives, denominator)
def reset_states(self):
tf.keras.backend.set_value(
self.total_cm, np.zeros((self.num_classes, self.num_classes)))
def get_config(self):
config = {'num_classes': self.num_classes}
base_config = super(PerClassIoU, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for iou metric."""
import tensorflow as tf
from official.vision.evaluation import iou
class MeanIoUTest(tf.test.TestCase):
def test_config(self):
m_obj = iou.PerClassIoU(num_classes=2, name='per_class_iou')
self.assertEqual(m_obj.name, 'per_class_iou')
self.assertEqual(m_obj.num_classes, 2)
m_obj2 = iou.PerClassIoU.from_config(m_obj.get_config())
self.assertEqual(m_obj2.name, 'per_class_iou')
self.assertEqual(m_obj2.num_classes, 2)
def test_unweighted(self):
y_pred = [0, 1, 0, 1]
y_true = [0, 0, 1, 1]
m_obj = iou.PerClassIoU(num_classes=2)
result = m_obj(y_true, y_pred)
# cm = [[1, 1],
# [1, 1]]
# sum_row = [2, 2], sum_col = [2, 2], true_positives = [1, 1]
# iou = true_positives / (sum_row + sum_col - true_positives))
expected_result = [1 / (2 + 2 - 1), 1 / (2 + 2 - 1)]
self.assertAllClose(expected_result, result, atol=1e-3)
def test_weighted(self):
y_pred = tf.constant([0, 1, 0, 1], dtype=tf.float32)
y_true = tf.constant([0, 0, 1, 1])
sample_weight = tf.constant([0.2, 0.3, 0.4, 0.1])
m_obj = iou.PerClassIoU(num_classes=2)
result = m_obj(y_true, y_pred, sample_weight=sample_weight)
# cm = [[0.2, 0.3],
# [0.4, 0.1]]
# sum_row = [0.6, 0.4], sum_col = [0.5, 0.5], true_positives = [0.2, 0.1]
# iou = true_positives / (sum_row + sum_col - true_positives))
expected_result = [0.2 / (0.6 + 0.5 - 0.2), 0.1 / (0.4 + 0.5 - 0.1)]
self.assertAllClose(expected_result, result, atol=1e-3)
def test_multi_dim_input(self):
y_pred = tf.constant([[0, 1], [0, 1]], dtype=tf.float32)
y_true = tf.constant([[0, 0], [1, 1]])
sample_weight = tf.constant([[0.2, 0.3], [0.4, 0.1]])
m_obj = iou.PerClassIoU(num_classes=2)
result = m_obj(y_true, y_pred, sample_weight=sample_weight)
# cm = [[0.2, 0.3],
# [0.4, 0.1]]
# sum_row = [0.6, 0.4], sum_col = [0.5, 0.5], true_positives = [0.2, 0.1]
# iou = true_positives / (sum_row + sum_col - true_positives))
expected_result = [0.2 / (0.6 + 0.5 - 0.2), 0.1 / (0.4 + 0.5 - 0.1)]
self.assertAllClose(expected_result, result, atol=1e-3)
def test_zero_valid_entries(self):
m_obj = iou.PerClassIoU(num_classes=2)
self.assertAllClose(m_obj.result(), [0, 0], atol=1e-3)
def test_zero_and_non_zero_entries(self):
y_pred = tf.constant([1], dtype=tf.float32)
y_true = tf.constant([1])
m_obj = iou.PerClassIoU(num_classes=2)
result = m_obj(y_true, y_pred)
# cm = [[0, 0],
# [0, 1]]
# sum_row = [0, 1], sum_col = [0, 1], true_positives = [0, 1]
# iou = true_positives / (sum_row + sum_col - true_positives))
expected_result = [0, 1 / (1 + 1 - 1)]
self.assertAllClose(expected_result, result, atol=1e-3)
def test_update_state_annd_result(self):
y_pred = [0, 1, 0, 1]
y_true = [0, 0, 1, 1]
m_obj = iou.PerClassIoU(num_classes=2)
m_obj.update_state(y_true, y_pred)
result = m_obj.result()
# cm = [[1, 1],
# [1, 1]]
# sum_row = [2, 2], sum_col = [2, 2], true_positives = [1, 1]
# iou = true_positives / (sum_row + sum_col - true_positives))
expected_result = [1 / (2 + 2 - 1), 1 / (2 + 2 - 1)]
self.assertAllClose(expected_result, result, atol=1e-3)
if __name__ == '__main__':
tf.test.main()
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Implementation of the Panoptic Quality metric.
Panoptic Quality is an instance-based metric for evaluating the task of
image parsing, aka panoptic segmentation.
Please see the paper for details:
"Panoptic Segmentation", Alexander Kirillov, Kaiming He, Ross Girshick,
Carsten Rother and Piotr Dollar. arXiv:1801.00868, 2018.
Note that this metric class is branched from
https://github.com/tensorflow/models/blob/master/research/deeplab/evaluation/panoptic_quality.py
"""
import collections
import numpy as np
_EPSILON = 1e-10
def realdiv_maybe_zero(x, y):
"""Element-wise x / y where y may contain zeros, for those returns 0 too."""
return np.where(
np.less(np.abs(y), _EPSILON), np.zeros_like(x), np.divide(x, y))
def _ids_to_counts(id_array):
"""Given a numpy array, a mapping from each unique entry to its count."""
ids, counts = np.unique(id_array, return_counts=True)
return dict(zip(ids, counts))
class PanopticQuality:
"""Metric class for Panoptic Quality.
"Panoptic Segmentation" by Alexander Kirillov, Kaiming He, Ross Girshick,
Carsten Rother, Piotr Dollar.
https://arxiv.org/abs/1801.00868
"""
def __init__(self, num_categories, ignored_label, max_instances_per_category,
offset):
"""Initialization for PanopticQualityMetric.
Args:
num_categories: The number of segmentation categories (or "classes" in the
dataset.
ignored_label: A category id that is ignored in evaluation, e.g. the void
label as defined in COCO panoptic segmentation dataset.
max_instances_per_category: The maximum number of instances for each
category. Used in ensuring unique instance labels.
offset: The maximum number of unique labels. This is used, by multiplying
the ground-truth labels, to generate unique ids for individual regions
of overlap between groundtruth and predicted segments.
"""
self.num_categories = num_categories
self.ignored_label = ignored_label
self.max_instances_per_category = max_instances_per_category
self.offset = offset
self.reset()
def _naively_combine_labels(self, category_mask, instance_mask):
"""Naively creates a combined label array from categories and instances."""
return (category_mask.astype(np.uint32) * self.max_instances_per_category +
instance_mask.astype(np.uint32))
def compare_and_accumulate(self, groundtruths, predictions):
"""Compares predicted segmentation with groundtruth, accumulates its metric.
It is not assumed that instance ids are unique across different categories.
See for example combine_semantic_and_instance_predictions.py in official
PanopticAPI evaluation code for issues to consider when fusing category
and instance labels.
Instances ids of the ignored category have the meaning that id 0 is "void"
and remaining ones are crowd instances.
Args:
groundtruths: A dictionary contains groundtruth labels. It should contain
the following fields.
- category_mask: A 2D numpy uint16 array of groundtruth per-pixel
category labels.
- instance_mask: A 2D numpy uint16 array of groundtruth instance labels.
predictions: A dictionary contains the model outputs. It should contain
the following fields.
- category_array: A 2D numpy uint16 array of predicted per-pixel
category labels.
- instance_array: A 2D numpy uint16 array of predicted instance labels.
"""
groundtruth_category_mask = groundtruths['category_mask']
groundtruth_instance_mask = groundtruths['instance_mask']
predicted_category_mask = predictions['category_mask']
predicted_instance_mask = predictions['instance_mask']
# First, combine the category and instance labels so that every unique
# value for (category, instance) is assigned a unique integer label.
pred_segment_id = self._naively_combine_labels(predicted_category_mask,
predicted_instance_mask)
gt_segment_id = self._naively_combine_labels(groundtruth_category_mask,
groundtruth_instance_mask)
# Pre-calculate areas for all groundtruth and predicted segments.
gt_segment_areas = _ids_to_counts(gt_segment_id)
pred_segment_areas = _ids_to_counts(pred_segment_id)
# We assume there is only one void segment and it has instance id = 0.
void_segment_id = self.ignored_label * self.max_instances_per_category
# There may be other ignored groundtruth segments with instance id > 0, find
# those ids using the unique segment ids extracted with the area computation
# above.
ignored_segment_ids = {
gt_segment_id for gt_segment_id in gt_segment_areas
if (gt_segment_id //
self.max_instances_per_category) == self.ignored_label
}
# Next, combine the groundtruth and predicted labels. Dividing up the pixels
# based on which groundtruth segment and which predicted segment they belong
# to, this will assign a different 32-bit integer label to each choice
# of (groundtruth segment, predicted segment), encoded as
# gt_segment_id * offset + pred_segment_id.
intersection_id_array = (
gt_segment_id.astype(np.uint64) * self.offset +
pred_segment_id.astype(np.uint64))
# For every combination of (groundtruth segment, predicted segment) with a
# non-empty intersection, this counts the number of pixels in that
# intersection.
intersection_areas = _ids_to_counts(intersection_id_array)
# Helper function that computes the area of the overlap between a predicted
# segment and the ground-truth void/ignored segment.
def prediction_void_overlap(pred_segment_id):
void_intersection_id = void_segment_id * self.offset + pred_segment_id
return intersection_areas.get(void_intersection_id, 0)
# Compute overall ignored overlap.
def prediction_ignored_overlap(pred_segment_id):
total_ignored_overlap = 0
for ignored_segment_id in ignored_segment_ids:
intersection_id = ignored_segment_id * self.offset + pred_segment_id
total_ignored_overlap += intersection_areas.get(intersection_id, 0)
return total_ignored_overlap
# Sets that are populated with which segments groundtruth/predicted segments
# have been matched with overlapping predicted/groundtruth segments
# respectively.
gt_matched = set()
pred_matched = set()
# Calculate IoU per pair of intersecting segments of the same category.
for intersection_id, intersection_area in intersection_areas.items():
gt_segment_id = int(intersection_id // self.offset)
pred_segment_id = int(intersection_id % self.offset)
gt_category = int(gt_segment_id // self.max_instances_per_category)
pred_category = int(pred_segment_id // self.max_instances_per_category)
if gt_category != pred_category:
continue
# Union between the groundtruth and predicted segments being compared does
# not include the portion of the predicted segment that consists of
# groundtruth "void" pixels.
union = (
gt_segment_areas[gt_segment_id] +
pred_segment_areas[pred_segment_id] - intersection_area -
prediction_void_overlap(pred_segment_id))
iou = intersection_area / union
if iou > 0.5:
self.tp_per_class[gt_category] += 1
self.iou_per_class[gt_category] += iou
gt_matched.add(gt_segment_id)
pred_matched.add(pred_segment_id)
# Count false negatives for each category.
for gt_segment_id in gt_segment_areas:
if gt_segment_id in gt_matched:
continue
category = gt_segment_id // self.max_instances_per_category
# Failing to detect a void segment is not a false negative.
if category == self.ignored_label:
continue
self.fn_per_class[category] += 1
# Count false positives for each category.
for pred_segment_id in pred_segment_areas:
if pred_segment_id in pred_matched:
continue
# A false positive is not penalized if is mostly ignored in the
# groundtruth.
if (prediction_ignored_overlap(pred_segment_id) /
pred_segment_areas[pred_segment_id]) > 0.5:
continue
category = pred_segment_id // self.max_instances_per_category
self.fp_per_class[category] += 1
def _valid_categories(self):
"""Categories with a "valid" value for the metric, have > 0 instances.
We will ignore the `ignore_label` class and other classes which have
`tp + fn + fp = 0`.
Returns:
Boolean array of shape `[num_categories]`.
"""
valid_categories = np.not_equal(
self.tp_per_class + self.fn_per_class + self.fp_per_class, 0)
if self.ignored_label >= 0 and self.ignored_label < self.num_categories:
valid_categories[self.ignored_label] = False
return valid_categories
def result_per_category(self):
"""For supported metrics, return individual per-category metric values.
Returns:
A dictionary contains all per-class metrics, each metrics is a numpy array
of shape `[self.num_categories]`, where index `i` is the metrics value
over only that category.
"""
sq_per_class = realdiv_maybe_zero(self.iou_per_class, self.tp_per_class)
rq_per_class = realdiv_maybe_zero(
self.tp_per_class,
self.tp_per_class + 0.5 * self.fn_per_class + 0.5 * self.fp_per_class)
return {
'sq_per_class': sq_per_class,
'rq_per_class': rq_per_class,
'pq_per_class': np.multiply(sq_per_class, rq_per_class)
}
def result(self, is_thing=None):
"""Computes and returns the detailed metric results over all comparisons.
Args:
is_thing: A boolean array of length `num_categories`. The entry
`is_thing[category_id]` is True iff that category is a "thing" category
instead of "stuff."
Returns:
A dictionary with a breakdown of metrics and/or metric factors by things,
stuff, and all categories.
"""
results = self.result_per_category()
valid_categories = self._valid_categories()
# If known, break down which categories are valid _and_ things/stuff.
category_sets = collections.OrderedDict()
category_sets['All'] = valid_categories
if is_thing is not None:
category_sets['Things'] = np.logical_and(valid_categories, is_thing)
category_sets['Stuff'] = np.logical_and(valid_categories,
np.logical_not(is_thing))
for category_set_name, in_category_set in category_sets.items():
if np.any(in_category_set):
results.update({
f'{category_set_name}_pq':
np.mean(results['pq_per_class'][in_category_set]),
f'{category_set_name}_sq':
np.mean(results['sq_per_class'][in_category_set]),
f'{category_set_name}_rq':
np.mean(results['rq_per_class'][in_category_set]),
# The number of categories in this subset.
f'{category_set_name}_num_categories':
np.sum(in_category_set.astype(np.int32)),
})
else:
results[category_set_name] = {
f'{category_set_name}_pq': 0.,
f'{category_set_name}_sq': 0.,
f'{category_set_name}_rq': 0.,
f'{category_set_name}_num_categories': 0
}
return results
def reset(self):
"""Resets the accumulation to the metric class's state at initialization."""
self.iou_per_class = np.zeros(self.num_categories, dtype=np.float64)
self.tp_per_class = np.zeros(self.num_categories, dtype=np.float64)
self.fn_per_class = np.zeros(self.num_categories, dtype=np.float64)
self.fp_per_class = np.zeros(self.num_categories, dtype=np.float64)
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The panoptic quality evaluator.
The following snippet demonstrates the use of interfaces:
evaluator = PanopticQualityEvaluator(...)
for _ in range(num_evals):
for _ in range(num_batches_per_eval):
predictions, groundtruth = predictor.predict(...) # pop a batch.
evaluator.update_state(groundtruths, predictions)
evaluator.result() # finish one full eval and reset states.
See also: https://github.com/cocodataset/cocoapi/
"""
import numpy as np
import tensorflow as tf
from official.vision.evaluation import panoptic_quality
def _crop_padding(mask, image_info):
"""Crops padded masks to match original image shape.
Args:
mask: a padded mask tensor.
image_info: a tensor that holds information about original and preprocessed
images.
Returns:
cropped and padded masks: tf.Tensor
"""
image_shape = tf.cast(image_info[0, :], tf.int32)
mask = tf.image.crop_to_bounding_box(
tf.expand_dims(mask, axis=-1), 0, 0,
image_shape[0], image_shape[1])
return tf.expand_dims(mask[:, :, 0], axis=0)
class PanopticQualityEvaluator:
"""Panoptic Quality metric class."""
def __init__(self, num_categories, ignored_label, max_instances_per_category,
offset, is_thing=None, rescale_predictions=False):
"""Constructs Panoptic Quality evaluation class.
The class provides the interface to Panoptic Quality metrics_fn.
Args:
num_categories: The number of segmentation categories (or "classes" in the
dataset.
ignored_label: A category id that is ignored in evaluation, e.g. the void
label as defined in COCO panoptic segmentation dataset.
max_instances_per_category: The maximum number of instances for each
category. Used in ensuring unique instance labels.
offset: The maximum number of unique labels. This is used, by multiplying
the ground-truth labels, to generate unique ids for individual regions
of overlap between groundtruth and predicted segments.
is_thing: A boolean array of length `num_categories`. The entry
`is_thing[category_id]` is True iff that category is a "thing" category
instead of "stuff." Default to `None`, and it means categories are not
classified into these two categories.
rescale_predictions: `bool`, whether to scale back prediction to original
image sizes. If True, groundtruths['image_info'] is used to rescale
predictions.
"""
self._pq_metric_module = panoptic_quality.PanopticQuality(
num_categories, ignored_label, max_instances_per_category, offset)
self._is_thing = is_thing
self._rescale_predictions = rescale_predictions
self._required_prediction_fields = ['category_mask', 'instance_mask']
self._required_groundtruth_fields = ['category_mask', 'instance_mask']
self.reset_states()
@property
def name(self):
return 'panoptic_quality'
def reset_states(self):
"""Resets internal states for a fresh run."""
self._pq_metric_module.reset()
def result(self):
"""Evaluates detection results, and reset_states."""
results = self._pq_metric_module.result(self._is_thing)
self.reset_states()
return results
def _convert_to_numpy(self, groundtruths, predictions):
"""Converts tesnors to numpy arrays."""
if groundtruths:
labels = tf.nest.map_structure(lambda x: x.numpy(), groundtruths)
numpy_groundtruths = {}
for key, val in labels.items():
if isinstance(val, tuple):
val = np.concatenate(val)
numpy_groundtruths[key] = val
else:
numpy_groundtruths = groundtruths
if predictions:
outputs = tf.nest.map_structure(lambda x: x.numpy(), predictions)
numpy_predictions = {}
for key, val in outputs.items():
if isinstance(val, tuple):
val = np.concatenate(val)
numpy_predictions[key] = val
else:
numpy_predictions = predictions
return numpy_groundtruths, numpy_predictions
def update_state(self, groundtruths, predictions):
"""Update and aggregate detection results and groundtruth data.
Args:
groundtruths: a dictionary of Tensors including the fields below. See also
different parsers under `../dataloader` for more details.
Required fields:
- category_mask: a numpy array of uint16 of shape [batch_size, H, W].
- instance_mask: a numpy array of uint16 of shape [batch_size, H, W].
- image_info: [batch, 4, 2], a tensor that holds information about
original and preprocessed images. Each entry is in the format of
[[original_height, original_width], [input_height, input_width],
[y_scale, x_scale], [y_offset, x_offset]], where [desired_height,
desired_width] is the actual scaled image size, and [y_scale, x_scale]
is the scaling factor, which is the ratio of scaled dimension /
original dimension.
predictions: a dictionary of tensors including the fields below. See
different parsers under `../dataloader` for more details.
Required fields:
- category_mask: a numpy array of uint16 of shape [batch_size, H, W].
- instance_mask: a numpy array of uint16 of shape [batch_size, H, W].
Raises:
ValueError: if the required prediction or groundtruth fields are not
present in the incoming `predictions` or `groundtruths`.
"""
groundtruths, predictions = self._convert_to_numpy(groundtruths,
predictions)
for k in self._required_prediction_fields:
if k not in predictions:
raise ValueError(
'Missing the required key `{}` in predictions!'.format(k))
for k in self._required_groundtruth_fields:
if k not in groundtruths:
raise ValueError(
'Missing the required key `{}` in groundtruths!'.format(k))
if self._rescale_predictions:
for idx in range(len(groundtruths['category_mask'])):
image_info = groundtruths['image_info'][idx]
groundtruths_ = {
'category_mask':
_crop_padding(groundtruths['category_mask'][idx], image_info),
'instance_mask':
_crop_padding(groundtruths['instance_mask'][idx], image_info),
}
predictions_ = {
'category_mask':
_crop_padding(predictions['category_mask'][idx], image_info),
'instance_mask':
_crop_padding(predictions['instance_mask'][idx], image_info),
}
groundtruths_, predictions_ = self._convert_to_numpy(
groundtruths_, predictions_)
self._pq_metric_module.compare_and_accumulate(
groundtruths_, predictions_)
else:
self._pq_metric_module.compare_and_accumulate(groundtruths, predictions)
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for panoptic_quality_evaluator."""
import numpy as np
import tensorflow as tf
from official.vision.evaluation import panoptic_quality_evaluator
class PanopticQualityEvaluatorTest(tf.test.TestCase):
def test_multiple_batches(self):
category_mask = np.zeros([6, 6], np.uint16)
groundtruth_instance_mask = np.array([
[1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1],
[1, 1, 2, 2, 2, 1],
[1, 2, 2, 2, 2, 1],
[1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1],
],
dtype=np.uint16)
good_det_instance_mask = np.array([
[1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1],
[1, 2, 2, 2, 2, 1],
[1, 2, 2, 2, 1, 1],
[1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1],
],
dtype=np.uint16)
groundtruths = {
'category_mask':
tf.convert_to_tensor([category_mask]),
'instance_mask':
tf.convert_to_tensor([groundtruth_instance_mask]),
'image_info':
tf.convert_to_tensor([[[6, 6], [6, 6], [1.0, 1.0], [0, 0]]],
dtype=tf.float32)
}
predictions = {
'category_mask': tf.convert_to_tensor([category_mask]),
'instance_mask': tf.convert_to_tensor([good_det_instance_mask])
}
pq_evaluator = panoptic_quality_evaluator.PanopticQualityEvaluator(
num_categories=1,
ignored_label=2,
max_instances_per_category=16,
offset=16,
rescale_predictions=True)
for _ in range(2):
pq_evaluator.update_state(groundtruths, predictions)
bad_det_instance_mask = np.array([
[1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1],
[1, 1, 1, 2, 2, 1],
[1, 1, 1, 2, 2, 1],
[1, 1, 1, 2, 2, 1],
[1, 1, 1, 1, 1, 1],
],
dtype=np.uint16)
predictions['instance_mask'] = tf.convert_to_tensor([bad_det_instance_mask])
for _ in range(2):
pq_evaluator.update_state(groundtruths, predictions)
results = pq_evaluator.result()
np.testing.assert_array_equal(results['pq_per_class'],
[((28 / 30 + 6 / 8) + (27 / 32)) / 2 / 2])
np.testing.assert_array_equal(results['rq_per_class'], [3 / 4])
np.testing.assert_array_equal(results['sq_per_class'],
[((28 / 30 + 6 / 8) + (27 / 32)) / 3])
self.assertAlmostEqual(results['All_pq'], 0.63177083)
self.assertAlmostEqual(results['All_rq'], 0.75)
self.assertAlmostEqual(results['All_sq'], 0.84236111)
self.assertEqual(results['All_num_categories'], 1)
if __name__ == '__main__':
tf.test.main()
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for Panoptic Quality metric.
Note that this metric test class is branched from
https://github.com/tensorflow/models/blob/master/research/deeplab/evaluation/panoptic_quality_test.py
"""
from absl.testing import absltest
import numpy as np
from official.vision.evaluation import panoptic_quality
class PanopticQualityTest(absltest.TestCase):
def test_perfect_match(self):
category_mask = np.zeros([6, 6], np.uint16)
instance_mask = np.array([
[1, 1, 1, 1, 1, 1],
[1, 2, 2, 2, 2, 1],
[1, 2, 2, 2, 2, 1],
[1, 2, 2, 2, 2, 1],
[1, 2, 2, 1, 1, 1],
[1, 2, 1, 1, 1, 1],
],
dtype=np.uint16)
groundtruths = {
'category_mask': category_mask,
'instance_mask': instance_mask
}
predictions = {
'category_mask': category_mask,
'instance_mask': instance_mask
}
pq_metric = panoptic_quality.PanopticQuality(
num_categories=1,
ignored_label=2,
max_instances_per_category=16,
offset=16)
pq_metric.compare_and_accumulate(groundtruths, predictions)
np.testing.assert_array_equal(pq_metric.iou_per_class, [2.0])
np.testing.assert_array_equal(pq_metric.tp_per_class, [2])
np.testing.assert_array_equal(pq_metric.fn_per_class, [0])
np.testing.assert_array_equal(pq_metric.fp_per_class, [0])
results = pq_metric.result()
np.testing.assert_array_equal(results['pq_per_class'], [1.0])
np.testing.assert_array_equal(results['rq_per_class'], [1.0])
np.testing.assert_array_equal(results['sq_per_class'], [1.0])
self.assertAlmostEqual(results['All_pq'], 1.0)
self.assertAlmostEqual(results['All_rq'], 1.0)
self.assertAlmostEqual(results['All_sq'], 1.0)
self.assertEqual(results['All_num_categories'], 1)
def test_totally_wrong(self):
category_mask = np.array([
[0, 0, 0, 0, 0, 0],
[0, 1, 0, 0, 1, 0],
[0, 1, 1, 1, 1, 0],
[0, 1, 1, 1, 1, 0],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
],
dtype=np.uint16)
instance_mask = np.zeros([6, 6], np.uint16)
groundtruths = {
'category_mask': category_mask,
'instance_mask': instance_mask
}
predictions = {
'category_mask': 1 - category_mask,
'instance_mask': instance_mask
}
pq_metric = panoptic_quality.PanopticQuality(
num_categories=2,
ignored_label=2,
max_instances_per_category=1,
offset=16)
pq_metric.compare_and_accumulate(groundtruths, predictions)
np.testing.assert_array_equal(pq_metric.iou_per_class, [0.0, 0.0])
np.testing.assert_array_equal(pq_metric.tp_per_class, [0, 0])
np.testing.assert_array_equal(pq_metric.fn_per_class, [1, 1])
np.testing.assert_array_equal(pq_metric.fp_per_class, [1, 1])
results = pq_metric.result()
np.testing.assert_array_equal(results['pq_per_class'], [0.0, 0.0])
np.testing.assert_array_equal(results['rq_per_class'], [0.0, 0.0])
np.testing.assert_array_equal(results['sq_per_class'], [0.0, 0.0])
self.assertAlmostEqual(results['All_pq'], 0.0)
self.assertAlmostEqual(results['All_rq'], 0.0)
self.assertAlmostEqual(results['All_sq'], 0.0)
self.assertEqual(results['All_num_categories'], 2)
def test_matches_by_iou(self):
groundtruth_instance_mask = np.array(
[
[1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1],
[1, 1, 2, 2, 2, 1],
[1, 2, 2, 2, 2, 1],
[1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1],
],
dtype=np.uint16)
good_det_instance_mask = np.array(
[
[1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1],
[1, 2, 2, 2, 2, 1],
[1, 2, 2, 2, 1, 1],
[1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1],
],
dtype=np.uint16)
groundtruths = {
'category_mask': np.zeros_like(groundtruth_instance_mask),
'instance_mask': groundtruth_instance_mask
}
predictions = {
'category_mask': np.zeros_like(good_det_instance_mask),
'instance_mask': good_det_instance_mask
}
pq_metric = panoptic_quality.PanopticQuality(
num_categories=1,
ignored_label=2,
max_instances_per_category=16,
offset=16)
pq_metric.compare_and_accumulate(groundtruths, predictions)
# iou(1, 1) = 28/30
# iou(2, 2) = 6 / 8
np.testing.assert_array_almost_equal(pq_metric.iou_per_class,
[28 / 30 + 6 / 8])
np.testing.assert_array_equal(pq_metric.tp_per_class, [2])
np.testing.assert_array_equal(pq_metric.fn_per_class, [0])
np.testing.assert_array_equal(pq_metric.fp_per_class, [0])
results = pq_metric.result()
np.testing.assert_array_equal(results['pq_per_class'],
[(28 / 30 + 6 / 8) / 2])
np.testing.assert_array_equal(results['rq_per_class'], [1.0])
np.testing.assert_array_equal(results['sq_per_class'],
[(28 / 30 + 6 / 8) / 2])
self.assertAlmostEqual(results['All_pq'], (28 / 30 + 6 / 8) / 2)
self.assertAlmostEqual(results['All_rq'], 1.0)
self.assertAlmostEqual(results['All_sq'], (28 / 30 + 6 / 8) / 2)
self.assertEqual(results['All_num_categories'], 1)
bad_det_instance_mask = np.array(
[
[1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1],
[1, 1, 1, 2, 2, 1],
[1, 1, 1, 2, 2, 1],
[1, 1, 1, 2, 2, 1],
[1, 1, 1, 1, 1, 1],
],
dtype=np.uint16)
predictions['instance_mask'] = bad_det_instance_mask
pq_metric.reset()
pq_metric.compare_and_accumulate(groundtruths, predictions)
# iou(1, 1) = 27/32
np.testing.assert_array_almost_equal(pq_metric.iou_per_class, [27 / 32])
np.testing.assert_array_equal(pq_metric.tp_per_class, [1])
np.testing.assert_array_equal(pq_metric.fn_per_class, [1])
np.testing.assert_array_equal(pq_metric.fp_per_class, [1])
results = pq_metric.result()
np.testing.assert_array_equal(results['pq_per_class'], [27 / 32 / 2])
np.testing.assert_array_equal(results['rq_per_class'], [0.5])
np.testing.assert_array_equal(results['sq_per_class'], [27 / 32])
self.assertAlmostEqual(results['All_pq'], 27 / 32 / 2)
self.assertAlmostEqual(results['All_rq'], 0.5)
self.assertAlmostEqual(results['All_sq'], 27 / 32)
self.assertEqual(results['All_num_categories'], 1)
def test_wrong_instances(self):
category_mask = np.array([
[1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1],
[1, 2, 2, 1, 2, 2],
[1, 2, 2, 1, 2, 2],
[1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1],
],
dtype=np.uint16)
groundtruth_instance_mask = np.zeros([6, 6], dtype=np.uint16)
predicted_instance_mask = np.array([
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 1],
[0, 0, 0, 0, 1, 1],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
],
dtype=np.uint16)
groundtruths = {
'category_mask': category_mask,
'instance_mask': groundtruth_instance_mask
}
predictions = {
'category_mask': category_mask,
'instance_mask': predicted_instance_mask
}
pq_metric = panoptic_quality.PanopticQuality(
num_categories=3,
ignored_label=0,
max_instances_per_category=10,
offset=100)
pq_metric.compare_and_accumulate(groundtruths, predictions)
np.testing.assert_array_equal(pq_metric.iou_per_class, [0.0, 1.0, 0.0])
np.testing.assert_array_equal(pq_metric.tp_per_class, [0, 1, 0])
np.testing.assert_array_equal(pq_metric.fn_per_class, [0, 0, 1])
np.testing.assert_array_equal(pq_metric.fp_per_class, [0, 0, 2])
results = pq_metric.result()
np.testing.assert_array_equal(results['pq_per_class'], [0.0, 1.0, 0.0])
np.testing.assert_array_equal(results['rq_per_class'], [0.0, 1.0, 0.0])
np.testing.assert_array_equal(results['sq_per_class'], [0.0, 1.0, 0.0])
self.assertAlmostEqual(results['All_pq'], 0.5)
self.assertAlmostEqual(results['All_rq'], 0.5)
self.assertAlmostEqual(results['All_sq'], 0.5)
self.assertEqual(results['All_num_categories'], 2)
def test_instance_order_is_arbitrary(self):
category_mask = np.array([
[1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1],
[1, 2, 2, 1, 2, 2],
[1, 2, 2, 1, 2, 2],
[1, 1, 1, 1, 1, 1],
[1, 1, 1, 1, 1, 1],
],
dtype=np.uint16)
groundtruth_instance_mask = np.array([
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
[0, 1, 1, 0, 0, 0],
[0, 1, 1, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
],
dtype=np.uint16)
predicted_instance_mask = np.array([
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 1],
[0, 0, 0, 0, 1, 1],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
],
dtype=np.uint16)
groundtruths = {
'category_mask': category_mask,
'instance_mask': groundtruth_instance_mask
}
predictions = {
'category_mask': category_mask,
'instance_mask': predicted_instance_mask
}
pq_metric = panoptic_quality.PanopticQuality(
num_categories=3,
ignored_label=0,
max_instances_per_category=10,
offset=100)
pq_metric.compare_and_accumulate(groundtruths, predictions)
np.testing.assert_array_equal(pq_metric.iou_per_class, [0.0, 1.0, 2.0])
np.testing.assert_array_equal(pq_metric.tp_per_class, [0, 1, 2])
np.testing.assert_array_equal(pq_metric.fn_per_class, [0, 0, 0])
np.testing.assert_array_equal(pq_metric.fp_per_class, [0, 0, 0])
results = pq_metric.result()
np.testing.assert_array_equal(results['pq_per_class'], [0.0, 1.0, 1.0])
np.testing.assert_array_equal(results['rq_per_class'], [0.0, 1.0, 1.0])
np.testing.assert_array_equal(results['sq_per_class'], [0.0, 1.0, 1.0])
self.assertAlmostEqual(results['All_pq'], 1.0)
self.assertAlmostEqual(results['All_rq'], 1.0)
self.assertAlmostEqual(results['All_sq'], 1.0)
self.assertEqual(results['All_num_categories'], 2)
if __name__ == '__main__':
absltest.main()
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Metrics for segmentation."""
import tensorflow as tf
from official.vision.evaluation import iou
class MeanIoU(tf.keras.metrics.MeanIoU):
"""Mean IoU metric for semantic segmentation.
This class utilizes tf.keras.metrics.MeanIoU to perform batched mean iou when
both input images and groundtruth masks are resized to the same size
(rescale_predictions=False). It also computes mean iou on groundtruth original
sizes, in which case, each prediction is rescaled back to the original image
size.
"""
def __init__(
self, num_classes, rescale_predictions=False, name=None, dtype=None):
"""Constructs Segmentation evaluator class.
Args:
num_classes: `int`, number of classes.
rescale_predictions: `bool`, whether to scale back prediction to original
image sizes. If True, y_true['image_info'] is used to rescale
predictions.
name: `str`, name of the metric instance..
dtype: data type of the metric result.
"""
self._rescale_predictions = rescale_predictions
super().__init__(num_classes=num_classes, name=name, dtype=dtype)
def update_state(self, y_true, y_pred):
"""Updates metric state.
Args:
y_true: `dict`, dictionary with the following name, and key values.
- masks: [batch, width, height, 1], groundtruth masks.
- valid_masks: [batch, width, height, 1], valid elements in the mask.
- image_info: [batch, 4, 2], a tensor that holds information about
original and preprocessed images. Each entry is in the format of
[[original_height, original_width], [input_height, input_width],
[y_scale, x_scale], [y_offset, x_offset]], where [desired_height,
desired_width] is the actual scaled image size, and [y_scale, x_scale]
is the scaling factor, which is the ratio of scaled dimension /
original dimension.
y_pred: Tensor [batch, width_p, height_p, num_classes], predicated masks.
"""
predictions = y_pred
masks = y_true['masks']
valid_masks = y_true['valid_masks']
images_info = y_true['image_info']
if isinstance(predictions, tuple) or isinstance(predictions, list):
predictions = tf.concat(predictions, axis=0)
masks = tf.concat(masks, axis=0)
valid_masks = tf.concat(valid_masks, axis=0)
images_info = tf.concat(images_info, axis=0)
# Ignore mask elements is set to zero for argmax op.
masks = tf.where(valid_masks, masks, tf.zeros_like(masks))
if self._rescale_predictions:
# This part can only run on cpu/gpu due to dynamic image resizing.
for i in range(tf.shape(predictions)[0]):
mask = masks[i]
valid_mask = valid_masks[i]
predicted_mask = predictions[i]
image_info = images_info[i]
rescale_size = tf.cast(
tf.math.ceil(image_info[1, :] / image_info[2, :]), tf.int32)
image_shape = tf.cast(image_info[0, :], tf.int32)
offsets = tf.cast(image_info[3, :], tf.int32)
predicted_mask = tf.image.resize(
predicted_mask,
rescale_size,
method=tf.image.ResizeMethod.BILINEAR)
predicted_mask = tf.image.crop_to_bounding_box(predicted_mask,
offsets[0], offsets[1],
image_shape[0],
image_shape[1])
mask = tf.image.crop_to_bounding_box(mask, 0, 0, image_shape[0],
image_shape[1])
valid_mask = tf.image.crop_to_bounding_box(valid_mask, 0, 0,
image_shape[0],
image_shape[1])
predicted_mask = tf.argmax(predicted_mask, axis=2)
flatten_predictions = tf.reshape(predicted_mask, shape=[1, -1])
flatten_masks = tf.reshape(mask, shape=[1, -1])
flatten_valid_masks = tf.reshape(valid_mask, shape=[1, -1])
super(MeanIoU, self).update_state(
flatten_masks, flatten_predictions,
tf.cast(flatten_valid_masks, tf.float32))
else:
predictions = tf.image.resize(
predictions,
tf.shape(masks)[1:3],
method=tf.image.ResizeMethod.BILINEAR)
predictions = tf.argmax(predictions, axis=3)
flatten_predictions = tf.reshape(predictions, shape=[-1])
flatten_masks = tf.reshape(masks, shape=[-1])
flatten_valid_masks = tf.reshape(valid_masks, shape=[-1])
super().update_state(flatten_masks, flatten_predictions,
tf.cast(flatten_valid_masks, tf.float32))
class PerClassIoU(iou.PerClassIoU):
"""Per Class IoU metric for semantic segmentation.
This class utilizes iou.PerClassIoU to perform batched per class
iou when both input images and groundtruth masks are resized to the same size
(rescale_predictions=False). It also computes per class iou on groundtruth
original sizes, in which case, each prediction is rescaled back to the
original image size.
"""
def __init__(
self, num_classes, rescale_predictions=False, name=None, dtype=None):
"""Constructs Segmentation evaluator class.
Args:
num_classes: `int`, number of classes.
rescale_predictions: `bool`, whether to scale back prediction to original
image sizes. If True, y_true['image_info'] is used to rescale
predictions.
name: `str`, name of the metric instance..
dtype: data type of the metric result.
"""
self._rescale_predictions = rescale_predictions
super().__init__(num_classes=num_classes, name=name, dtype=dtype)
def update_state(self, y_true, y_pred):
"""Updates metric state.
Args:
y_true: `dict`, dictionary with the following name, and key values.
- masks: [batch, width, height, 1], groundtruth masks.
- valid_masks: [batch, width, height, 1], valid elements in the mask.
- image_info: [batch, 4, 2], a tensor that holds information about
original and preprocessed images. Each entry is in the format of
[[original_height, original_width], [input_height, input_width],
[y_scale, x_scale], [y_offset, x_offset]], where [desired_height,
desired_width] is the actual scaled image size, and [y_scale, x_scale]
is the scaling factor, which is the ratio of scaled dimension /
original dimension.
y_pred: Tensor [batch, width_p, height_p, num_classes], predicated masks.
"""
predictions = y_pred
masks = y_true['masks']
valid_masks = y_true['valid_masks']
images_info = y_true['image_info']
if isinstance(predictions, tuple) or isinstance(predictions, list):
predictions = tf.concat(predictions, axis=0)
masks = tf.concat(masks, axis=0)
valid_masks = tf.concat(valid_masks, axis=0)
images_info = tf.concat(images_info, axis=0)
# Ignore mask elements is set to zero for argmax op.
masks = tf.where(valid_masks, masks, tf.zeros_like(masks))
if self._rescale_predictions:
# This part can only run on cpu/gpu due to dynamic image resizing.
for i in range(tf.shape(predictions)[0]):
mask = masks[i]
valid_mask = valid_masks[i]
predicted_mask = predictions[i]
image_info = images_info[i]
rescale_size = tf.cast(
tf.math.ceil(image_info[1, :] / image_info[2, :]), tf.int32)
image_shape = tf.cast(image_info[0, :], tf.int32)
offsets = tf.cast(image_info[3, :], tf.int32)
predicted_mask = tf.image.resize(
predicted_mask,
rescale_size,
method=tf.image.ResizeMethod.BILINEAR)
predicted_mask = tf.image.crop_to_bounding_box(predicted_mask,
offsets[0], offsets[1],
image_shape[0],
image_shape[1])
mask = tf.image.crop_to_bounding_box(mask, 0, 0, image_shape[0],
image_shape[1])
valid_mask = tf.image.crop_to_bounding_box(valid_mask, 0, 0,
image_shape[0],
image_shape[1])
predicted_mask = tf.argmax(predicted_mask, axis=2)
flatten_predictions = tf.reshape(predicted_mask, shape=[1, -1])
flatten_masks = tf.reshape(mask, shape=[1, -1])
flatten_valid_masks = tf.reshape(valid_mask, shape=[1, -1])
super().update_state(flatten_masks, flatten_predictions,
tf.cast(flatten_valid_masks, tf.float32))
else:
predictions = tf.image.resize(
predictions,
tf.shape(masks)[1:3],
method=tf.image.ResizeMethod.BILINEAR)
predictions = tf.argmax(predictions, axis=3)
flatten_predictions = tf.reshape(predictions, shape=[-1])
flatten_masks = tf.reshape(masks, shape=[-1])
flatten_valid_masks = tf.reshape(valid_masks, shape=[-1])
super().update_state(flatten_masks, flatten_predictions,
tf.cast(flatten_valid_masks, tf.float32))
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for segmentation_metrics."""
from absl.testing import parameterized
import numpy as np
import tensorflow as tf
from official.vision.evaluation import segmentation_metrics
class SegmentationMetricsTest(parameterized.TestCase, tf.test.TestCase):
def _create_test_data(self):
y_pred_cls0 = np.expand_dims(
np.array([[1, 1, 0], [1, 1, 0], [0, 0, 0]], dtype=np.uint16),
axis=(0, -1))
y_pred_cls1 = np.expand_dims(
np.array([[0, 0, 0], [0, 0, 1], [0, 0, 1]], dtype=np.uint16),
axis=(0, -1))
y_pred = np.concatenate((y_pred_cls0, y_pred_cls1), axis=-1)
y_true = {
'masks':
np.expand_dims(
np.array([[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0], [0, 0, 0, 1, 1, 1],
[0, 0, 0, 1, 1, 1], [0, 0, 0, 1, 1, 1]],
dtype=np.uint16),
axis=(0, -1)),
'valid_masks':
np.ones([1, 6, 6, 1], dtype=np.uint16),
'image_info':
np.array([[[6, 6], [3, 3], [0.5, 0.5], [0, 0]]], dtype=np.float32)
}
return y_pred, y_true
@parameterized.parameters(True, False)
def test_mean_iou_metric(self, rescale_predictions):
tf.config.experimental_run_functions_eagerly(True)
mean_iou_metric = segmentation_metrics.MeanIoU(
num_classes=2, rescale_predictions=rescale_predictions)
y_pred, y_true = self._create_test_data()
# Disable autograph for correct coverage statistics.
update_fn = tf.autograph.experimental.do_not_convert(
mean_iou_metric.update_state)
update_fn(y_true=y_true, y_pred=y_pred)
miou = mean_iou_metric.result()
self.assertAlmostEqual(miou.numpy(), 0.762, places=3)
@parameterized.parameters(True, False)
def test_per_class_mean_iou_metric(self, rescale_predictions):
per_class_iou_metric = segmentation_metrics.PerClassIoU(
num_classes=2, rescale_predictions=rescale_predictions)
y_pred, y_true = self._create_test_data()
# Disable autograph for correct coverage statistics.
update_fn = tf.autograph.experimental.do_not_convert(
per_class_iou_metric.update_state)
update_fn(y_true=y_true, y_pred=y_pred)
per_class_miou = per_class_iou_metric.result()
self.assertAllClose(per_class_miou.numpy(), [0.857, 0.667], atol=1e-3)
if __name__ == '__main__':
tf.test.main()
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""2D detection evaluator for the Waymo Open Dataset."""
import pprint
from absl import logging
import tensorflow as tf
from official.vision.ops import box_ops
from waymo_open_dataset import label_pb2
from waymo_open_dataset.metrics.python import wod_detection_evaluator
from waymo_open_dataset.protos import breakdown_pb2
from waymo_open_dataset.protos import metrics_pb2
def get_2d_detection_default_config():
"""Returns the config proto for WOD 2D detection Evaluation."""
config = metrics_pb2.Config()
config.breakdown_generator_ids.append(breakdown_pb2.Breakdown.OBJECT_TYPE)
difficulty = config.difficulties.add()
difficulty.levels.append(label_pb2.Label.LEVEL_1)
difficulty.levels.append(label_pb2.Label.LEVEL_2)
config.breakdown_generator_ids.append(breakdown_pb2.Breakdown.ALL_BUT_SIGN)
difficulty = config.difficulties.add()
difficulty.levels.append(label_pb2.Label.LEVEL_1)
difficulty.levels.append(label_pb2.Label.LEVEL_2)
config.matcher_type = metrics_pb2.MatcherProto.TYPE_HUNGARIAN
config.iou_thresholds.append(0.0)
config.iou_thresholds.append(0.7)
config.iou_thresholds.append(0.5)
config.iou_thresholds.append(0.5)
config.iou_thresholds.append(0.5)
config.box_type = label_pb2.Label.Box.TYPE_2D
for i in range(100):
config.score_cutoffs.append(i * 0.01)
config.score_cutoffs.append(1.0)
return config
class WOD2dDetectionEvaluator(wod_detection_evaluator.WODDetectionEvaluator):
"""WOD 2D detection evaluation metric class."""
def __init__(self, config=None):
if config is None:
config = get_2d_detection_default_config()
super().__init__(config=config)
def _remove_padding(self, tensor_dict, num_valid):
"""Remove the paddings of the prediction/groundtruth data."""
result_tensor_dict = {}
gather_indices = tf.range(num_valid)
for k, v in tensor_dict.items():
if 'frame_id' in k:
result_tensor_dict[k] = tf.tile([v], [num_valid])
else:
result_tensor_dict[k] = tf.gather(v, gather_indices)
return result_tensor_dict
def update_state(self, groundtruths, predictions):
"""Update the metrics state with prediction and groundtruth data.
Args:
groundtruths: a dictionary of Tensors including the fields below.
Required fields:
- source_id: a numpy array of int or string of shape [batch_size].
- num_detections: a numpy array of int of shape [batch_size].
- boxes: a numpy array of float of shape [batch_size, K, 4].
- classes: a numpy array of int of shape [batch_size, K].
- difficulties: a numpy array of int of shape [batch_size, K].
predictions: a dictionary of tensors including the fields below.
Required fields:
- source_id: a numpy array of int or string of shape [batch_size].
- image_info: a numpy array of float of shape [batch_size, 4, 2].
- num_detections: a numpy array of int of shape [batch_size].
- detection_boxes: a numpy array of float of shape [batch_size, K, 4].
- detection_classes: a numpy array of int of shape [batch_size, K].
- detection_scores: a numpy array of float of shape [batch_size, K].
"""
# Preprocess potentially aggregated tensors.
for k, v in groundtruths.items():
if isinstance(v, tuple):
groundtruths[k] = tf.concat(v, axis=0)
for k, v in predictions.items():
if isinstance(v, tuple):
predictions[k] = tf.concat(v, axis=0)
# Change cyclists' type id from 3 to 4, where 3 is reserved for sign.
groundtruth_type = tf.cast(groundtruths['classes'], tf.uint8)
groundtruth_type = tf.where(
tf.equal(groundtruth_type, 3),
tf.ones_like(groundtruth_type) * 4, groundtruth_type)
prediction_type = tf.cast(predictions['detection_classes'], tf.uint8)
prediction_type = tf.where(
tf.equal(prediction_type, 3),
tf.ones_like(prediction_type) * 4, prediction_type)
# Rescale the detection boxes back to original scale.
image_scale = tf.tile(predictions['image_info'][:, 2:3, :], (1, 1, 2))
prediction_bbox = predictions['detection_boxes'] / image_scale
batch_size = tf.shape(groundtruths['source_id'])[0]
for i in tf.range(batch_size):
frame_groundtruths = {
'ground_truth_frame_id':
groundtruths['source_id'][i],
'ground_truth_bbox':
box_ops.yxyx_to_cycxhw(
tf.cast(groundtruths['boxes'][i], tf.float32)),
'ground_truth_type':
groundtruth_type[i],
'ground_truth_difficulty':
tf.cast(groundtruths['difficulties'][i], tf.uint8),
}
frame_groundtruths = self._remove_padding(
frame_groundtruths, groundtruths['num_detections'][i])
frame_predictions = {
'prediction_frame_id':
groundtruths['source_id'][i],
'prediction_bbox':
box_ops.yxyx_to_cycxhw(
tf.cast(prediction_bbox[i], tf.float32)),
'prediction_type':
prediction_type[i],
'prediction_score':
tf.cast(predictions['detection_scores'][i], tf.float32),
'prediction_overlap_nlz':
tf.zeros_like(predictions['detection_scores'][i], dtype=tf.bool)
}
frame_predictions = self._remove_padding(frame_predictions,
predictions['num_detections'][i])
super().update_state(frame_groundtruths, frame_predictions)
def evaluate(self):
"""Compute the final metrics."""
ap, _, _, _, _ = super().evaluate()
metric_dict = {}
for i, name in enumerate(self._breakdown_names):
# Skip sign metrics in 2d detection task.
if 'SIGN' in name:
continue
metric_dict['WOD metrics/{}/AP'.format(name)] = ap[i]
pp = pprint.PrettyPrinter()
logging.info('WOD Detection Metrics: \n %s', pp.pformat(metric_dict))
return metric_dict
......@@ -35,14 +35,14 @@ input and return an `ExampleModel` instance, similar as
As a simple example, we define a single model. However, you can split the model
implementation to individual components, such as backbones, decoders, heads, as
what we do
[here](https://github.com/tensorflow/models/blob/master/official/vision/beta/modeling).
[here](https://github.com/tensorflow/models/blob/master/official/vision/modeling).
And then in `build_example_model` function, you can hook up these components
together to obtain your full model.
## Create Dataloader
A dataloader reads, decodes and parses the input data. We have created various
[dataloaders](https://github.com/tensorflow/models/blob/master/official/vision/beta/dataloaders)
[dataloaders](https://github.com/tensorflow/models/blob/master/official/vision/dataloaders)
to handle standard input formats for classification, detection and segmentation.
If you have non-standard or complex data, you may want to create your own
dataloader. It contains a `Decoder` and a `Parser`.
......@@ -123,10 +123,10 @@ together and is called by the base
You can create your own task by inheriting from base
[Task](https://github.com/tensorflow/models/blob/master/official/core/base_task.py),
or from one of the
[tasks](https://github.com/tensorflow/models/blob/master/official/vision/beta/tasks/)
[tasks](https://github.com/tensorflow/models/blob/master/official/vision/tasks/)
we already defined, if most of the operations can be reused. An `ExampleTask`
inheriting from
[ImageClassificationTask](https://github.com/tensorflow/models/blob/master/official/vision/beta/tasks/image_classification.py#L32)
[ImageClassificationTask](https://github.com/tensorflow/models/blob/master/official/vision/tasks/image_classification.py#L32)
can be found
[here](example_task.py).
We will go through each important components in the task in the following.
......@@ -175,7 +175,7 @@ from official.vision.beta.projects.example import example_task
## Training
You can create your own trainer by branching from our core
[trainer](https://github.com/tensorflow/models/blob/master/official/vision/beta/train.py).
[trainer](https://github.com/tensorflow/models/blob/master/official/vision/train.py).
Just make sure you import the registry like this:
```python
......@@ -185,7 +185,7 @@ from official.vision.beta.projects.example import registry_imports # pylint: di
You can run training locally for testing purpose:
```bash
# Assume you are under official/vision/beta/projects.
# Assume you are under official/vision/projects.
python3 example/train.py \
--experiment=tf_vision_example_experiment \
--config_file=${PWD}/example/example_config_local.yaml \
......@@ -210,5 +210,5 @@ python3 example/train.py \
--mode=train \
--tpu=$TPU_NAME \
--model_dir=/tmp/tfvision_test/
--config_file=third_party/tensorflow_models/official/vision/beta/projects/example/example_config_tpu.yaml
--config_file=third_party/tensorflow_models/official/vision/examples/starter/example_config_tpu.yaml
```
......@@ -13,9 +13,8 @@
# limitations under the License.
"""Example experiment configuration definition."""
from typing import List
import dataclasses
from typing import List
from official.core import config_definitions as cfg
from official.core import exp_factory
......
......@@ -22,9 +22,9 @@ from typing import Mapping, List, Tuple
# Import libraries
import tensorflow as tf
from official.vision.beta.dataloaders import decoder
from official.vision.beta.dataloaders import parser
from official.vision.beta.ops import preprocess_ops
from official.vision.dataloaders import decoder
from official.vision.dataloaders import parser
from official.vision.ops import preprocess_ops
MEAN_RGB = (0.485 * 255, 0.456 * 255, 0.406 * 255)
STDDEV_RGB = (0.229 * 255, 0.224 * 255, 0.225 * 255)
......
......@@ -22,7 +22,7 @@ directly used from `official/vision/beta/modeling` directory.
from typing import Any, Mapping
# Import libraries
import tensorflow as tf
from official.vision.beta.projects.example import example_config as example_cfg
from official.vision.examples.starter import example_config as example_cfg
class ExampleModel(tf.keras.Model):
......
......@@ -21,10 +21,10 @@ from official.common import dataset_fn
from official.core import base_task
from official.core import task_factory
from official.modeling import tf_utils
from official.vision.beta.dataloaders import input_reader_factory
from official.vision.beta.projects.example import example_config as exp_cfg
from official.vision.beta.projects.example import example_input
from official.vision.beta.projects.example import example_model
from official.vision.dataloaders import input_reader_factory
from official.vision.examples.starter import example_config as exp_cfg
from official.vision.examples.starter import example_input
from official.vision.examples.starter import example_model
@task_factory.register_task_cls(exp_cfg.ExampleTask)
......
......@@ -21,7 +21,7 @@ to handle each file separately.
# pylint: disable=unused-import
from official.common import registry_imports
from official.vision.beta.projects.example import example_config
from official.vision.beta.projects.example import example_input
from official.vision.beta.projects.example import example_model
from official.vision.beta.projects.example import example_task
from official.vision.examples.starter import example_config
from official.vision.examples.starter import example_input
from official.vision.examples.starter import example_model
from official.vision.examples.starter import example_task
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment