evaluation.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-

# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Functions to perform COCO evaluation."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import copy
import operator
import pprint
import six
import time

import io
from PIL import Image

import numpy as np
import tensorflow as tf

from mask_rcnn.utils.logging_formatter import logging

from mask_rcnn import coco_metric
from mask_rcnn.utils import coco_utils

from mask_rcnn.object_detection import visualization_utils

import dllogger
from dllogger import Verbosity


def process_prediction_for_eval(prediction):
    """Process the model prediction for COCO eval."""
    image_info = prediction['image_info']
    box_coordinates = prediction['detection_boxes']
    processed_box_coordinates = np.zeros_like(box_coordinates)

    for image_id in range(box_coordinates.shape[0]):
        scale = image_info[image_id][2]

        for box_id in range(box_coordinates.shape[1]):
            # Map [y1, x1, y2, x2] -> [x1, y1, w, h] and multiply detections
            # Map [y1, x1, y2, x2] -> [x1, y1, w, h] and multiply detections
            # by image scale.
            y1, x1, y2, x2 = box_coordinates[image_id, box_id, :]
            new_box = scale * np.array([x1, y1, x2 - x1, y2 - y1])
            processed_box_coordinates[image_id, box_id, :] = new_box

    prediction['detection_boxes'] = processed_box_coordinates
    return prediction


def compute_coco_eval_metric(predictor,
                             num_batches=-1,
                             include_mask=True,
                             annotation_json_file="",
                             eval_batch_size=-1,
                             report_frequency=None):
    """Compute COCO eval metric given a prediction generator.

    Args:
    predictor: a generator that iteratively pops a dictionary of predictions
      with the format compatible with COCO eval tool.
    num_batches: the number of batches to be aggregated in eval. This is how
      many times that the predictor gets pulled.
    include_mask: a boolean that indicates whether we include the mask eval.
    annotation_json_file: the annotation json file of the eval dataset.

    Returns:
    eval_results: the aggregated COCO metric eval results.
    """

    if annotation_json_file == "":
        annotation_json_file = None

    use_groundtruth_from_json = (annotation_json_file is not None)

    predictions = dict()
    batch_idx = 0

    if use_groundtruth_from_json:
        eval_metric = coco_metric.EvaluationMetric(annotation_json_file, include_mask=include_mask)

    else:
        eval_metric = coco_metric.EvaluationMetric(filename=None, include_mask=include_mask)

    def evaluation_preds(preds):

        # Essential to avoid modifying the source dict
        _preds = copy.deepcopy(preds)

        for k, v in six.iteritems(_preds):
            _preds[k] = np.concatenate(_preds[k], axis=0)

        if 'orig_images' in _preds and _preds['orig_images'].shape[0] > 10:
            # Only samples a few images for visualization.
            _preds['orig_images'] = _preds['orig_images'][:10]

        if use_groundtruth_from_json:
            eval_results = eval_metric.predict_metric_fn(_preds)

        else:
            images, annotations = coco_utils.extract_coco_groundtruth(_preds, include_mask)
            coco_dataset = coco_utils.create_coco_format_dataset(images, annotations)
            eval_results = eval_metric.predict_metric_fn(_preds, groundtruth_data=coco_dataset)

        return eval_results

    # Take into account cuDNN & Tensorflow warmup
    # Drop N first steps for avg throughput calculation
    BURNIN_STEPS = 100
    model_throughput_list = list()
    inference_time_list = list()

    while num_batches < 0 or batch_idx < num_batches:

        try:
            step_t0 = time.time()
            step_predictions = six.next(predictor)
            batch_time = time.time() - step_t0

            throughput = eval_batch_size / batch_time
            model_throughput_list.append(throughput)
            inference_time_list.append(batch_time)

            logging.info('Running inference on batch %03d/%03d... - Step Time: %.4fs - Throughput: %.1f imgs/s' % (
                batch_idx + 1,
                num_batches,
                batch_time,
                throughput
            ))

        except StopIteration:
            logging.info('Get StopIteration at %d batch.' % (batch_idx + 1))
            break

        step_predictions = process_prediction_for_eval(step_predictions)

        for k, v in step_predictions.items():

            if k not in predictions:
                predictions[k] = [v]

            else:
                predictions[k].append(v)

        batch_idx = batch_idx + 1

        # If you want the report to happen each report_frequency to happen each report_frequency batches.
        # Thus, each report is of eval_batch_size * report_frequency
        if report_frequency and batch_idx % report_frequency == 0:
            eval_results = evaluation_preds(preds=predictions)
            logging.info('Eval results: %s' % pprint.pformat(eval_results, indent=4))

    inference_time_list.sort()
    eval_results = evaluation_preds(preds=predictions)

    average_time = np.mean(inference_time_list)
    latency_50 = max(inference_time_list[:int(len(inference_time_list) * 0.5)])
    latency_90 = max(inference_time_list[:int(len(inference_time_list) * 0.90)])
    latency_95 = max(inference_time_list[:int(len(inference_time_list) * 0.95)])
    latency_99 = max(inference_time_list[:int(len(inference_time_list) * 0.99)])
    latency_100 = max(inference_time_list[:int(len(inference_time_list) * 1)])

    print()  # Visual Spacing
    logging.info("# @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ #")
    logging.info("         Evaluation Performance Summary          ")
    logging.info("# @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ #")

    total_processing_hours, rem = divmod(np.sum(model_throughput_list), 3600)
    total_processing_minutes, total_processing_seconds = divmod(rem, 60)

    if len(model_throughput_list) > BURNIN_STEPS:
        # Take into account cuDNN & Tensorflow warmup
        # Drop N first steps for avg throughput calculation
        # Also drop last step which may have a different batch size
        avg_throughput = np.mean(model_throughput_list[BURNIN_STEPS:-1])
    else:
        avg_throughput = -1.

    print()  # Visual Spacing
    logging.info("Average throughput: {throughput:.1f} samples/sec".format(throughput=avg_throughput))
    logging.info("Inference Latency Average (s) = {avg:.4f}".format(avg=average_time))
    logging.info("Inference Latency 50% (s) = {cf_50:.4f}".format(cf_50=latency_50))
    logging.info("Inference Latency 90%  (s) = {cf_90:.4f}".format(cf_90=latency_90))
    logging.info("Inference Latency 95%  (s) = {cf_95:.4f}".format(cf_95=latency_95))
    logging.info("Inference Latency 99%  (s) = {cf_99:.4f}".format(cf_99=latency_99))
    logging.info("Inference Latency 100%  (s) = {cf_100:.4f}".format(cf_100=latency_100))
    logging.info("Total processed steps: {total_steps}".format(total_steps=len(model_throughput_list)))
    logging.info(
        "Total processing time: {hours}h {minutes:02d}m {seconds:02d}s".format(
            hours=total_processing_hours,
            minutes=int(total_processing_minutes),
            seconds=int(total_processing_seconds)
        )
    )
    dllogger.log(step=(), data={"avg_inference_throughput": avg_throughput}, verbosity=Verbosity.DEFAULT)
    avg_inference_time = float(total_processing_hours * 3600 + int(total_processing_minutes) * 60 +
        int(total_processing_seconds))
    dllogger.log(step=(), data={"avg_inference_time": avg_inference_time}, verbosity=Verbosity.DEFAULT)
    logging.info("==================== Metrics ====================")

    # logging.info('Eval Epoch results: %s' % pprint.pformat(eval_results, indent=4))
    for key, value in sorted(eval_results.items(), key=operator.itemgetter(0)):
        logging.info("%s: %.9f" % (key, value))
    print()  # Visual Spacing

    return eval_results, predictions


def evaluate(eval_estimator,
             input_fn,
             num_eval_samples,
             eval_batch_size,
             include_mask=True,
             validation_json_file="",
             report_frequency=None):

    """Runs COCO evaluation once."""
    predictor = eval_estimator.predict(
        input_fn=input_fn,
        yield_single_examples=False
    )

    # Every predictor.next() gets a batch of prediction (a dictionary).
    num_eval_times = num_eval_samples // eval_batch_size
    assert num_eval_times > 0, 'num_eval_samples must be >= eval_batch_size!'

    eval_results, predictions = compute_coco_eval_metric(
        predictor,
        num_eval_times,
        include_mask,
        validation_json_file,
        eval_batch_size=eval_batch_size,
        report_frequency=report_frequency
    )

    return eval_results, predictions


def write_summary(eval_results, summary_dir, current_step, predictions=None):
    """Write out eval results for the checkpoint."""
    with tf.Graph().as_default():
        summaries = []

        # Summary writer writes out eval metrics.
        try:
            # Tensorflow 1.x
            summary_writer = tf.compat.v1.summary.FileWriter(summary_dir)
        except AttributeError:
            # Tensorflow 2.x
            summary_writer = tf.summary.create_file_writer(summary_dir)
            summary_writer.as_default()

        eval_results_dict = {}
        for metric in eval_results:
            try:
                summaries.append(tf.compat.v1.Summary.Value(tag=metric, simple_value=eval_results[metric]))
                eval_results_dict[metric] = float(eval_results[metric])

            except AttributeError:
                tf.summary.scalar(name=metric, data=eval_results[metric], step=current_step)
                eval_results_dict[metric] = float(eval_results[metric])
        dllogger.log(step=(), data=eval_results_dict, verbosity=Verbosity.DEFAULT)

        if isinstance(predictions, dict) and predictions:
            images_summary = get_image_summary(predictions, current_step)

            try:
                summaries += images_summary
            except TypeError:
                summaries.append(images_summary)

        try:
            # tf_summaries = tf.compat.v1.Summary(value=list(summaries))
            tf_summaries = tf.compat.v1.Summary(value=summaries)
            summary_writer.add_summary(tf_summaries, current_step)
            summary_writer.flush()

        except AttributeError:
            tf.summary.flush(summary_writer)


def generate_image_preview(image, boxes, scores, classes, gt_boxes=None, segmentations=None):
    """Creates an image summary given predictions."""
    max_boxes_to_draw = 100
    min_score_thresh = 0.1

    # Visualizes the predicitons.
    image_with_detections = visualization_utils.visualize_boxes_and_labels_on_image_array(
        image,
        boxes,
        classes=classes,
        scores=scores,
        category_index={},
        instance_masks=segmentations,
        use_normalized_coordinates=False,
        max_boxes_to_draw=max_boxes_to_draw,
        min_score_thresh=min_score_thresh,
        agnostic_mode=False
    )

    if gt_boxes is not None:
        # Visualizes the groundtruth boxes. They are in black by default.
        image_with_detections = visualization_utils.visualize_boxes_and_labels_on_image_array(
            image_with_detections,
            gt_boxes,
            classes=None,
            scores=None,
            category_index={},
            use_normalized_coordinates=False,
            max_boxes_to_draw=max_boxes_to_draw,
            agnostic_mode=True
        )

    return image_with_detections


def generate_image_buffer(input_image):
    buf = io.BytesIO()
    w, h = input_image.shape[:2]
    ratio = 1024 / w
    new_size = [int(w * ratio), int(h * ratio)]

    image = Image.fromarray(input_image.astype(np.uint8))
    image.thumbnail(new_size)
    image.save(buf, format='png')

    return buf.getvalue()


def get_image_summary(predictions, current_step, max_images=10):
    """Write out image and prediction for summary."""

    if 'orig_images' not in predictions:
        logging.info('Missing orig_images in predictions: %s', predictions.keys())
        return

    max_images = min(
        len(predictions['orig_images']) * predictions['orig_images'][0].shape[0],
        max_images
    )

    _detection_boxes = np.concatenate(predictions['detection_boxes'], axis=0)
    _detection_scores = np.concatenate(predictions['detection_scores'], axis=0)
    _detection_classes = np.concatenate(predictions['detection_classes'], axis=0)
    _image_info = np.concatenate(predictions['image_info'], axis=0)
    _num_detections = np.concatenate(predictions['num_detections'], axis=0)
    _orig_images = np.concatenate(predictions['orig_images'], axis=0)

    if 'detection_masks' in predictions:
        _detection_masks = np.concatenate(predictions['detection_masks'], axis=0)
    else:
        _detection_masks = None

    if 'groundtruth_boxes' in predictions:
        _groundtruth_boxes = np.concatenate(predictions['groundtruth_boxes'], axis=0)
    else:
        _groundtruth_boxes = None

    _orig_images = _orig_images * 255
    _orig_images = _orig_images.astype(np.uint8)

    image_previews = []

    for i in range(max_images):
        num_detections = min(len(_detection_boxes[i]), int(_num_detections[i]))

        detection_boxes = _detection_boxes[i][:num_detections]
        detection_scores = _detection_scores[i][:num_detections]
        detection_classes = _detection_classes[i][:num_detections]

        image = _orig_images[i]
        image_height = image.shape[0]
        image_width = image.shape[1]

        # Rescale the box to fit the visualization image.
        h, w = _image_info[i][3:5]
        detection_boxes = detection_boxes / np.array([w, h, w, h])
        detection_boxes = detection_boxes * np.array([image_width, image_height, image_width, image_height])

        if _groundtruth_boxes is not None:
            gt_boxes = _groundtruth_boxes[i]
            gt_boxes = gt_boxes * np.array([image_height, image_width, image_height, image_width])
        else:
            gt_boxes = None

        if _detection_masks is not None:
            instance_masks = _detection_masks[i][0:num_detections]
            segmentations = coco_metric.generate_segmentation_from_masks(
                instance_masks,
                detection_boxes,
                image_height,
                image_width
            )
        else:
            segmentations = None

        # From [x, y, w, h] to [x1, y1, x2, y2] and
        # process_prediction_for_eval() set the box to be [x, y] format, need to
        # reverted them to [y, x] format.
        xmin, ymin, w, h = np.split(detection_boxes, 4, axis=-1)
        xmax = xmin + w
        ymax = ymin + h

        boxes_to_visualize = np.concatenate([ymin, xmin, ymax, xmax], axis=-1)

        image_preview = generate_image_preview(
            image,
            boxes=boxes_to_visualize,
            scores=detection_scores,
            classes=detection_classes.astype(np.int32),
            gt_boxes=gt_boxes,
            segmentations=segmentations
        )
        image_previews.append(image_preview)

    try:
        summaries = []

        for i, image_preview in enumerate(image_previews):
            image_buffer = generate_image_buffer(image_preview)
            image_summary = tf.compat.v1.Summary.Image(encoded_image_string=image_buffer)
            image_value = tf.compat.v1.Summary.Value(tag='%d_input' % i, image=image_summary)

            summaries.append(image_value)

    except AttributeError:
        image_previews = np.array(image_previews)
        summaries = tf.summary.image(
            name='image_summary',
            data=image_previews,
            step=current_step,
            max_outputs=max_images
        )

    return summaries