Merge branch 'tf2' into 'main'

tf2 detection See merge request dcutoolkit/deeplearing/dlexamples_new!2

Merge branch 'tf2' into 'main'
tf2 detection See merge request dcutoolkit/deeplearing/dlexamples_new!2
d0d91e12 · huchen · 2795dc1f · c320b6ef · d0d91e12 · d0d91e12
Commit d0d91e12 authored Apr 15, 2022 by huchen
20 changed files
--- a/TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/distributed_executer.py
+++ b/TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/distributed_executer.py
--- a/TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/evaluation.py
+++ b/TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/evaluation.py
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Functions to perform COCO evaluation."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import copy
+import operator
+import pprint
+import six
+import time
+import io
+from PIL import Image
+import numpy as np
+import tensorflow as tf
+from mask_rcnn.utils.logging_formatter import logging
+from mask_rcnn import coco_metric
+from mask_rcnn.utils import coco_utils
+from mask_rcnn.object_detection import visualization_utils
+import dllogger
+from dllogger import Verbosity
+def process_prediction_for_eval(prediction):
+    """Process the model prediction for COCO eval."""
+    image_info = prediction['image_info']
+    box_coordinates = prediction['detection_boxes']
+    processed_box_coordinates = np.zeros_like(box_coordinates)
+    for image_id in range(box_coordinates.shape[0]):
+        scale = image_info[image_id][2]
+        for box_id in range(box_coordinates.shape[1]):
+            # Map [y1, x1, y2, x2] -> [x1, y1, w, h] and multiply detections
+            # Map [y1, x1, y2, x2] -> [x1, y1, w, h] and multiply detections
+            # by image scale.
+            y1, x1, y2, x2 = box_coordinates[image_id, box_id, :]
+            new_box = scale * np.array([x1, y1, x2 - x1, y2 - y1])
+            processed_box_coordinates[image_id, box_id, :] = new_box
+    prediction['detection_boxes'] = processed_box_coordinates
+    return prediction
+def compute_coco_eval_metric(predictor,
+                             num_batches=-1,
+                             include_mask=True,
+                             annotation_json_file="",
+                             eval_batch_size=-1,
+                             report_frequency=None):
+    """Compute COCO eval metric given a prediction generator.
+    Args:
+    predictor: a generator that iteratively pops a dictionary of predictions
+      with the format compatible with COCO eval tool.
+    num_batches: the number of batches to be aggregated in eval. This is how
+      many times that the predictor gets pulled.
+    include_mask: a boolean that indicates whether we include the mask eval.
+    annotation_json_file: the annotation json file of the eval dataset.
+    Returns:
+    eval_results: the aggregated COCO metric eval results.
+    """
+    if annotation_json_file == "":
+        annotation_json_file = None
+    use_groundtruth_from_json = (annotation_json_file is not None)
+    predictions = dict()
+    batch_idx = 0
+    if use_groundtruth_from_json:
+        eval_metric = coco_metric.EvaluationMetric(annotation_json_file, include_mask=include_mask)
+    else:
+        eval_metric = coco_metric.EvaluationMetric(filename=None, include_mask=include_mask)
+    def evaluation_preds(preds):
+        # Essential to avoid modifying the source dict
+        _preds = copy.deepcopy(preds)
+        for k, v in six.iteritems(_preds):
+            _preds[k] = np.concatenate(_preds[k], axis=0)
+        if 'orig_images' in _preds and _preds['orig_images'].shape[0] > 10:
+            # Only samples a few images for visualization.
+            _preds['orig_images'] = _preds['orig_images'][:10]
+        if use_groundtruth_from_json:
+            eval_results = eval_metric.predict_metric_fn(_preds)
+        else:
+            images, annotations = coco_utils.extract_coco_groundtruth(_preds, include_mask)
+            coco_dataset = coco_utils.create_coco_format_dataset(images, annotations)
+            eval_results = eval_metric.predict_metric_fn(_preds, groundtruth_data=coco_dataset)
+        return eval_results
+    # Take into account cuDNN & Tensorflow warmup
+    # Drop N first steps for avg throughput calculation
+    BURNIN_STEPS = 100
+    model_throughput_list = list()
+    inference_time_list = list()
+    while num_batches < 0 or batch_idx < num_batches:
+        try:
+            step_t0 = time.time()
+            step_predictions = six.next(predictor)
+            batch_time = time.time() - step_t0
+            throughput = eval_batch_size / batch_time
+            model_throughput_list.append(throughput)
+            inference_time_list.append(batch_time)
+            logging.info('Running inference on batch %03d/%03d... - Step Time: %.4fs - Throughput: %.1f imgs/s' % (
+                batch_idx + 1,
+                num_batches,
+                batch_time,
+                throughput
+            ))
+        except StopIteration:
+            logging.info('Get StopIteration at %d batch.' % (batch_idx + 1))
+            break
+        step_predictions = process_prediction_for_eval(step_predictions)
+        for k, v in step_predictions.items():
+            if k not in predictions:
+                predictions[k] = [v]
+            else:
+                predictions[k].append(v)
+        batch_idx = batch_idx + 1
+        # If you want the report to happen each report_frequency to happen each report_frequency batches.
+        # Thus, each report is of eval_batch_size * report_frequency
+        if report_frequency and batch_idx % report_frequency == 0:
+            eval_results = evaluation_preds(preds=predictions)
+            logging.info('Eval results: %s' % pprint.pformat(eval_results, indent=4))
+    inference_time_list.sort()
+    eval_results = evaluation_preds(preds=predictions)
+    average_time = np.mean(inference_time_list)
+    latency_50 = max(inference_time_list[:int(len(inference_time_list) * 0.5)])
+    latency_90 = max(inference_time_list[:int(len(inference_time_list) * 0.90)])
+    latency_95 = max(inference_time_list[:int(len(inference_time_list) * 0.95)])
+    latency_99 = max(inference_time_list[:int(len(inference_time_list) * 0.99)])
+    latency_100 = max(inference_time_list[:int(len(inference_time_list) * 1)])
+    print()  # Visual Spacing
+    logging.info("# @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ #")
+    logging.info("         Evaluation Performance Summary          ")
+    logging.info("# @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ #")
+    total_processing_hours, rem = divmod(np.sum(model_throughput_list), 3600)
+    total_processing_minutes, total_processing_seconds = divmod(rem, 60)
+    if len(model_throughput_list) > BURNIN_STEPS:
+        # Take into account cuDNN & Tensorflow warmup
+        # Drop N first steps for avg throughput calculation
+        # Also drop last step which may have a different batch size
+        avg_throughput = np.mean(model_throughput_list[BURNIN_STEPS:-1])
+    else:
+        avg_throughput = -1.
+    print()  # Visual Spacing
+    logging.info("Average throughput: {throughput:.1f} samples/sec".format(throughput=avg_throughput))
+    logging.info("Inference Latency Average (s) = {avg:.4f}".format(avg=average_time))
+    logging.info("Inference Latency 50% (s) = {cf_50:.4f}".format(cf_50=latency_50))
+    logging.info("Inference Latency 90%  (s) = {cf_90:.4f}".format(cf_90=latency_90))
+    logging.info("Inference Latency 95%  (s) = {cf_95:.4f}".format(cf_95=latency_95))
+    logging.info("Inference Latency 99%  (s) = {cf_99:.4f}".format(cf_99=latency_99))
+    logging.info("Inference Latency 100%  (s) = {cf_100:.4f}".format(cf_100=latency_100))
+    logging.info("Total processed steps: {total_steps}".format(total_steps=len(model_throughput_list)))
+    logging.info(
+        "Total processing time: {hours}h {minutes:02d}m {seconds:02d}s".format(
+            hours=total_processing_hours,
+            minutes=int(total_processing_minutes),
+            seconds=int(total_processing_seconds)
+        )
+    )
+    dllogger.log(step=(), data={"avg_inference_throughput": avg_throughput}, verbosity=Verbosity.DEFAULT)
+    avg_inference_time = float(total_processing_hours * 3600 + int(total_processing_minutes) * 60 +
+        int(total_processing_seconds))
+    dllogger.log(step=(), data={"avg_inference_time": avg_inference_time}, verbosity=Verbosity.DEFAULT)
+    logging.info("==================== Metrics ====================")
+    # logging.info('Eval Epoch results: %s' % pprint.pformat(eval_results, indent=4))
+    for key, value in sorted(eval_results.items(), key=operator.itemgetter(0)):
+        logging.info("%s: %.9f" % (key, value))
+    print()  # Visual Spacing
+    return eval_results, predictions
+def evaluate(eval_estimator,
+             input_fn,
+             num_eval_samples,
+             eval_batch_size,
+             include_mask=True,
+             validation_json_file="",
+             report_frequency=None):
+    """Runs COCO evaluation once."""
+    predictor = eval_estimator.predict(
+        input_fn=input_fn,
+        yield_single_examples=False
+    )
+    # Every predictor.next() gets a batch of prediction (a dictionary).
+    num_eval_times = num_eval_samples // eval_batch_size
+    assert num_eval_times > 0, 'num_eval_samples must be >= eval_batch_size!'
+    eval_results, predictions = compute_coco_eval_metric(
+        predictor,
+        num_eval_times,
+        include_mask,
+        validation_json_file,
+        eval_batch_size=eval_batch_size,
+        report_frequency=report_frequency
+    )
+    return eval_results, predictions
+def write_summary(eval_results, summary_dir, current_step, predictions=None):
+    """Write out eval results for the checkpoint."""
+    with tf.Graph().as_default():
+        summaries = []
+        # Summary writer writes out eval metrics.
+        try:
+            # Tensorflow 1.x
+            summary_writer = tf.compat.v1.summary.FileWriter(summary_dir)
+        except AttributeError:
+            # Tensorflow 2.x
+            summary_writer = tf.summary.create_file_writer(summary_dir)
+            summary_writer.as_default()
+        eval_results_dict = {}
+        for metric in eval_results:
+            try:
+                summaries.append(tf.compat.v1.Summary.Value(tag=metric, simple_value=eval_results[metric]))
+                eval_results_dict[metric] = float(eval_results[metric])
+            except AttributeError:
+                tf.summary.scalar(name=metric, data=eval_results[metric], step=current_step)
+                eval_results_dict[metric] = float(eval_results[metric])
+        dllogger.log(step=(), data=eval_results_dict, verbosity=Verbosity.DEFAULT)
+        if isinstance(predictions, dict) and predictions:
+            images_summary = get_image_summary(predictions, current_step)
+            try:
+                summaries += images_summary
+            except TypeError:
+                summaries.append(images_summary)
+        try:
+            # tf_summaries = tf.compat.v1.Summary(value=list(summaries))
+            tf_summaries = tf.compat.v1.Summary(value=summaries)
+            summary_writer.add_summary(tf_summaries, current_step)
+            summary_writer.flush()
+        except AttributeError:
+            tf.summary.flush(summary_writer)
+def generate_image_preview(image, boxes, scores, classes, gt_boxes=None, segmentations=None):
+    """Creates an image summary given predictions."""
+    max_boxes_to_draw = 100
+    min_score_thresh = 0.1
+    # Visualizes the predicitons.
+    image_with_detections = visualization_utils.visualize_boxes_and_labels_on_image_array(
+        image,
+        boxes,
+        classes=classes,
+        scores=scores,
+        category_index={},
+        instance_masks=segmentations,
+        use_normalized_coordinates=False,
+        max_boxes_to_draw=max_boxes_to_draw,
+        min_score_thresh=min_score_thresh,
+        agnostic_mode=False
+    )
+    if gt_boxes is not None:
+        # Visualizes the groundtruth boxes. They are in black by default.
+        image_with_detections = visualization_utils.visualize_boxes_and_labels_on_image_array(
+            image_with_detections,
+            gt_boxes,
+            classes=None,
+            scores=None,
+            category_index={},
+            use_normalized_coordinates=False,
+            max_boxes_to_draw=max_boxes_to_draw,
+            agnostic_mode=True
+        )
+    return image_with_detections
+def generate_image_buffer(input_image):
+    buf = io.BytesIO()
+    w, h = input_image.shape[:2]
+    ratio = 1024 / w
+    new_size = [int(w * ratio), int(h * ratio)]
+    image = Image.fromarray(input_image.astype(np.uint8))
+    image.thumbnail(new_size)
+    image.save(buf, format='png')
+    return buf.getvalue()
+def get_image_summary(predictions, current_step, max_images=10):
+    """Write out image and prediction for summary."""
+    if 'orig_images' not in predictions:
+        logging.info('Missing orig_images in predictions: %s', predictions.keys())
+        return
+    max_images = min(
+        len(predictions['orig_images']) * predictions['orig_images'][0].shape[0],
+        max_images
+    )
+    _detection_boxes = np.concatenate(predictions['detection_boxes'], axis=0)
+    _detection_scores = np.concatenate(predictions['detection_scores'], axis=0)
+    _detection_classes = np.concatenate(predictions['detection_classes'], axis=0)
+    _image_info = np.concatenate(predictions['image_info'], axis=0)
+    _num_detections = np.concatenate(predictions['num_detections'], axis=0)
+    _orig_images = np.concatenate(predictions['orig_images'], axis=0)
+    if 'detection_masks' in predictions:
+        _detection_masks = np.concatenate(predictions['detection_masks'], axis=0)
+    else:
+        _detection_masks = None
+    if 'groundtruth_boxes' in predictions:
+        _groundtruth_boxes = np.concatenate(predictions['groundtruth_boxes'], axis=0)
+    else:
+        _groundtruth_boxes = None
+    _orig_images = _orig_images * 255
+    _orig_images = _orig_images.astype(np.uint8)
+    image_previews = []
+    for i in range(max_images):
+        num_detections = min(len(_detection_boxes[i]), int(_num_detections[i]))
+        detection_boxes = _detection_boxes[i][:num_detections]
+        detection_scores = _detection_scores[i][:num_detections]
+        detection_classes = _detection_classes[i][:num_detections]
+        image = _orig_images[i]
+        image_height = image.shape[0]
+        image_width = image.shape[1]
+        # Rescale the box to fit the visualization image.
+        h, w = _image_info[i][3:5]
+        detection_boxes = detection_boxes / np.array([w, h, w, h])
+        detection_boxes = detection_boxes * np.array([image_width, image_height, image_width, image_height])
+        if _groundtruth_boxes is not None:
+            gt_boxes = _groundtruth_boxes[i]
+            gt_boxes = gt_boxes * np.array([image_height, image_width, image_height, image_width])
+        else:
+            gt_boxes = None
+        if _detection_masks is not None:
+            instance_masks = _detection_masks[i][0:num_detections]
+            segmentations = coco_metric.generate_segmentation_from_masks(
+                instance_masks,
+                detection_boxes,
+                image_height,
+                image_width
+            )
+        else:
+            segmentations = None
+        # From [x, y, w, h] to [x1, y1, x2, y2] and
+        # process_prediction_for_eval() set the box to be [x, y] format, need to
+        # reverted them to [y, x] format.
+        xmin, ymin, w, h = np.split(detection_boxes, 4, axis=-1)
+        xmax = xmin + w
+        ymax = ymin + h
+        boxes_to_visualize = np.concatenate([ymin, xmin, ymax, xmax], axis=-1)
+        image_preview = generate_image_preview(
+            image,
+            boxes=boxes_to_visualize,
+            scores=detection_scores,
+            classes=detection_classes.astype(np.int32),
+            gt_boxes=gt_boxes,
+            segmentations=segmentations
+        )
+        image_previews.append(image_preview)
+    try:
+        summaries = []
+        for i, image_preview in enumerate(image_previews):
+            image_buffer = generate_image_buffer(image_preview)
+            image_summary = tf.compat.v1.Summary.Image(encoded_image_string=image_buffer)
+            image_value = tf.compat.v1.Summary.Value(tag='%d_input' % i, image=image_summary)
+            summaries.append(image_value)
+    except AttributeError:
+        image_previews = np.array(image_previews)
+        summaries = tf.summary.image(
+            name='image_summary',
+            data=image_previews,
+            step=current_step,
+            max_outputs=max_images
+        )
+    return summaries
--- a/TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/hooks/__init__.py
+++ b/TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/hooks/__init__.py
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from mask_rcnn.hooks.ckpt_hook import CheckpointSaverHook
+from mask_rcnn.hooks.pretrained_restore_hook import PretrainedWeightsLoadingHook
+__all__ = [
+    "CheckpointSaverHook",
+    "PretrainedWeightsLoadingHook",
+]
--- a/TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/hooks/ckpt_hook.py
+++ b/TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/hooks/ckpt_hook.py
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import tensorflow as tf
+from mask_rcnn.utils.logging_formatter import logging
+__all__ = ["CheckpointSaverHook"]
+class CheckpointSaverHook(tf.estimator.SessionRunHook):
+    """Saves checkpoints every N steps or seconds."""
+    def __init__(self, checkpoint_dir, checkpoint_basename="model.ckpt"):
+        """Initializes a `CheckpointSaverHook`.
+        Args:
+          checkpoint_dir: `str`, base directory for the checkpoint files.
+          checkpoint_basename: `str`, base name for the checkpoint files.
+        Raises:
+          ValueError: One of `save_steps` or `save_secs` should be set.
+          ValueError: At most one of `saver` or `scaffold` should be set.
+        """
+        logging.info("Create CheckpointSaverHook.")
+        self._saver = None
+        self._checkpoint_dir = checkpoint_dir
+        self._save_path = os.path.join(checkpoint_dir, checkpoint_basename)
+        self._steps_per_run = 1
+        self._is_initialized = False
+        self._global_step_tensor = None
+        self._summary_writer = None
+    def _set_steps_per_run(self, steps_per_run):
+        self._steps_per_run = steps_per_run
+    def begin(self):
+        self._global_step_tensor = tf.compat.v1.train.get_or_create_global_step()
+        self._saver = tf.compat.v1.train.Saver()
+        from tensorflow.python.training import summary_io
+        self._summary_writer = summary_io.SummaryWriterCache.get(self._checkpoint_dir)
+        if self._global_step_tensor is None:
+            raise RuntimeError(
+                "Global step should be created to use CheckpointSaverHook."
+            )
+    def after_create_session(self, session, coord):
+        if not self._is_initialized:
+            global_step = session.run(self._global_step_tensor)
+            from tensorflow.python.keras.backend import get_graph
+            default_graph = get_graph()
+            # We do write graph and saver_def at the first call of before_run.
+            # We cannot do this in begin, since we let other hooks to change graph and
+            # add variables in begin. Graph is finalized after all begin calls.
+            tf.io.write_graph(
+                default_graph.as_graph_def(add_shapes=True),
+                self._checkpoint_dir,
+                "graph.pbtxt"
+            )
+            saver_def = self._saver.saver_def
+            from tensorflow.python.framework import meta_graph
+            meta_graph_def = meta_graph.create_meta_graph_def(
+                graph_def=default_graph.as_graph_def(add_shapes=True),
+                saver_def=saver_def
+            )
+            self._summary_writer.add_graph(default_graph)
+            self._summary_writer.add_meta_graph(meta_graph_def)
+            # The checkpoint saved here is the state at step "global_step".
+            self._save(session, global_step)
+            self._is_initialized = True
+    def end(self, session):
+        last_step = session.run(self._global_step_tensor)
+        self._save(session, last_step)
+    def _save(self, session, step):
+        """Saves the latest checkpoint, returns should_stop."""
+        logging.info("Saving checkpoints for %d into %s.", step, self._save_path)
+        self._saver.save(session, self._save_path, global_step=step)
+        self._summary_writer.add_session_log(
+            tf.compat.v1.SessionLog(status=tf.compat.v1.SessionLog.CHECKPOINT, checkpoint_path=self._save_path),
+            step
+        )
--- a/TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/hooks/logging_hook.py
+++ b/TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/hooks/logging_hook.py
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import copy
+import operator
+import time
+import numpy as np
+import tensorflow as tf
+from distutils.version import LooseVersion
+from mask_rcnn.utils.logging_formatter import logging
+from mask_rcnn.utils import meters
+from mask_rcnn.utils.decorators import atexit_hook
+from mask_rcnn.utils.distributed_utils import MPI_is_distributed
+from mask_rcnn.utils.distributed_utils import MPI_rank_and_size
+from mask_rcnn.utils.distributed_utils import MPI_size
+from mask_rcnn.utils.logging_backend import LoggingBackend
+from mask_rcnn.utils.logging_backend import RuntimeMode
+from mask_rcnn.utils.metric_tracking import clear_registered_metrics
+from mask_rcnn.utils.metric_tracking import TF_METRICS
+from mask_rcnn.utils.metric_tracking import KERAS_MODELS
+from mask_rcnn.utils.lazy_imports import LazyImport
+hvd = LazyImport("horovod.tensorflow")
+__all__ = ["AutoLoggingHook"]
+@atexit_hook
+class _AutoLoggingHook(tf.estimator.SessionRunHook):
+    def __init__(self, log_every_n_steps=200, warmup_steps=500, is_training=True):
+        """
+        AutoLogging Hook for Tensorflow
+        :param log_every_n_steps: log will be output on the console every N steps
+        :param warmup_steps: integers, numbers of steps considered as warmup
+        :param is_training: boolean
+        """
+        self._logging_proxy = LoggingBackend()
+        self._initialized = False
+        self._metrics = copy.copy(TF_METRICS)
+        self._batch_size_tensor = None
+        self._AMP_steps_since_last_loss_scale = None
+        self._AMP_loss_scale_tensor = None
+        self._current_step = None
+        self._amp_steps_non_skipped = None
+        self._warmup_steps = warmup_steps
+        self._log_every_n_steps = log_every_n_steps
+        self._step_t0 = None
+        self._session_t0 = None
+        self._session_run_times = list()
+        self._global_step_tensor = None
+        self._is_training = is_training
+        self._runtime_mode = RuntimeMode.TRAIN if is_training else RuntimeMode.VALIDATION
+        self._model_throughput = meters.MovingAverageMeter(window_size=1000)
+        self._model_stats = None
+        self._n_gpus = None
+    def __atexit__(self):
+        if self._initialized:
+            total_processing_time = int(np.sum(self._session_run_times))
+            try:
+                avg_throughput = self._model_throughput.read()
+            except ValueError:
+                avg_throughput = -1
+            self._logging_proxy.log_summary(
+                is_train=self._is_training,
+                total_steps=self._current_step,
+                total_processing_time=total_processing_time,
+                avg_throughput=avg_throughput
+            )
+            metric_data = dict()
+            for key, value in self._metrics.items():
+                try:
+                    metric_data[key] = value["aggregator"].read()
+                except ValueError:
+                    pass
+            self._logging_proxy.log_final_metrics(metric_data=metric_data, runtime_mode=self._runtime_mode)
+    def begin(self):
+        """Called once before using the session.
+        When called, the default graph is the one that will be launched in the
+        session.  The hook can modify the graph by adding new operations to it.
+        After the `begin()` call the graph will be finalized and the other callbacks
+        can not modify the graph anymore. Second call of `begin()` on the same
+        graph, should not change the graph.
+        """
+        from tensorflow.python.keras.backend import get_graph
+        _graph = get_graph()
+        try:
+            self._batch_size_tensor = None
+            for tensor in _graph.as_graph_def().node:
+                if "IteratorGetNext" in tensor.name:
+                    _input_tensor = _graph.get_tensor_by_name(tensor.name + ":0")
+                    try:
+                        self._batch_size_tensor = tf.shape(input=_input_tensor)[0]
+                    except TypeError:  # Ragged Tensor
+                        self._batch_size_tensor = _input_tensor.bounding_shape()[0]
+                    break
+            else:
+                raise RuntimeError(
+                    "Tensor `{}` could not be found. "
+                    "Make sure you are using tf.data API".format("IteratorGetNext")
+                )
+        except RuntimeError:
+            raise
+        except Exception as e:
+            raise RuntimeError(
+                "Impossible to fetch the tensor: `IteratorGetNext`. Make sure you are using tf.data API."
+            ) from e
+        self._global_step_tensor = tf.compat.v1.train.get_or_create_global_step()
+        try:
+            self._AMP_loss_scale_tensor = _graph.get_tensor_by_name("current_loss_scale/Read/ReadVariableOp:0")
+            self._AMP_steps_since_last_loss_scale = _graph.get_tensor_by_name("current_loss_scale/Read/ReadVariableOp:0")
+        except RuntimeError:
+            raise
+        # TF-AMP is not activated
+        except Exception:
+            pass
+        # if self._is_training:
+        #     self.runtime_data["params_count"] = tf.reduce_sum(
+        #         [tf.reduce_prod(v.shape) for v in tf.trainable_variables()]
+        #     )
+    def end(self, session):  # pylint: disable=unused-argument
+        """Called at the end of session.
+        The `session` argument can be used in case the hook wants to run final ops,
+        such as saving a last checkpoint.
+        If `session.run()` raises exception other than OutOfRangeError or
+        StopIteration then `end()` is not called.
+        Note the difference between `end()` and `after_run()` behavior when
+        `session.run()` raises OutOfRangeError or StopIteration. In that case
+        `end()` is called but `after_run()` is not called.
+        Args:
+          session: A TensorFlow Session that will be soon closed.
+        """
+        self._session_run_times.append(time.time() - self._session_t0)
+    def after_create_session(self, session, coord):  # pylint: disable=unused-argument3
+        """Called when new TensorFlow session is created.
+        This is called to signal the hooks that a new session has been created. This
+        has two essential differences with the situation in which `begin` is called:
+        * When this is called, the graph is finalized and ops can no longer be added
+            to the graph.
+        * This method will also be called as a result of recovering a wrapped
+            session, not only at the beginning of the overall session.
+        Args:
+          session: A TensorFlow Session that has been created.
+          coord: A Coordinator object which keeps track of all threads.
+        """
+        # ========= Collect the number of GPUs ======== #
+        if self._is_training:
+            if MPI_is_distributed():
+                self._n_gpus = MPI_size()
+            elif tf.distribute.has_strategy():
+                self._n_gpus = tf.distribute.get_strategy().num_replicas_in_sync
+            else:
+                self._n_gpus = 1
+        else:
+            self._n_gpus = 1
+        # =========== TensorFlow Hook Setup =========== #
+        _global_step, _metrics = setup_tensorflow_hook(
+            sess=session,
+            logging_proxy=self._logging_proxy,
+            is_training=self._is_training,
+            is_initialized=self._initialized
+        )
+        if _global_step >= 0:
+            self._current_step = self._amp_steps_non_skipped = _global_step
+        self._metrics.update(_metrics)
+        if not self._is_training:
+            for metric_name in self._metrics.keys():
+                self._metrics[metric_name]["aggregator"].reset()
+        self._initialized = True
+        # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% #
+        self._session_t0 = time.time()
+    def before_run(self, run_context):  # pylint: disable=unused-argument
+        """Called before each call to run().
+        You can return from this call a `SessionRunArgs` object indicating ops or
+        tensors to add to the upcoming `run()` call.  These ops/tensors will be run
+        together with the ops/tensors originally passed to the original run() call.
+        The run args you return can also contain feeds to be added to the run()
+        call.
+        The `run_context` argument is a `SessionRunContext` that provides
+        information about the upcoming `run()` call: the originally requested
+        op/tensors, the TensorFlow Session.
+        At this point graph is finalized and you can not add ops.
+        Args:
+          run_context: A `SessionRunContext` object.
+        Returns:
+          None or a `SessionRunArgs` object.
+        """
+        self._current_step += 1
+        request_fetches = {
+            "global_step": self._global_step_tensor, "metrics": dict(), "batch_size": self._batch_size_tensor
+        }
+        if self._is_training and self._AMP_steps_since_last_loss_scale is not None:
+            request_fetches["AMP"] = {
+                "steps_since_last_loss_scale": self._AMP_steps_since_last_loss_scale,
+                "current_loss_scale": self._AMP_loss_scale_tensor,
+            }
+        if self._current_step % self._log_every_n_steps == 0:
+            for key, value in self._metrics.items():
+                request_fetches["metrics"][key] = value["tensor"]
+        self._step_t0 = time.time()
+        return tf.estimator.SessionRunArgs(request_fetches)
+    def after_run(self, run_context, run_values):  # pylint: disable=unused-argument
+        """Called after each call to run().
+        The `run_values` argument contains results of requested ops/tensors by
+        `before_run()`.
+        The `run_context` argument is the same one send to `before_run` call.
+        `run_context.request_stop()` can be called to stop the iteration.
+        If `session.run()` raises any exceptions then `after_run()` is not called.
+        Args:
+          run_context: A `SessionRunContext` object.
+          run_values: A SessionRunValues object.
+        """
+        batch_time = time.time() - self._step_t0
+        _global_step = run_values.results["global_step"]
+        if self._is_training and self._AMP_steps_since_last_loss_scale is not None:
+            try:
+                AMP_steps_since_last_loss_scale = run_values.results["AMP"]["steps_since_last_loss_scale"]
+                AMP_loss_scale = run_values.results["AMP"]["current_loss_scale"]
+            except KeyError:
+                AMP_steps_since_last_loss_scale = None
+                AMP_loss_scale = None
+            if AMP_steps_since_last_loss_scale is not None:
+                # Step has been skipped
+                if _global_step != (self._amp_steps_non_skipped + 1):
+                    logging.warning(
+                        "AMP - Training iteration `#{step}` has been skipped and loss rescaled. "
+                        "New Loss Scale: {loss_scale}\n".format(step=self._current_step, loss_scale=AMP_loss_scale)
+                    )
+                else:
+                    self._amp_steps_non_skipped += 1
+                    if AMP_steps_since_last_loss_scale == 0:
+                        logging.warning(
+                            "AMP - Training iteration `#{step}` - Loss scale has been automatically increased. "
+                            "New Loss Scale: {loss_scale}\n".format(step=self._current_step, loss_scale=AMP_loss_scale)
+                        )
+        else:
+            AMP_steps_since_last_loss_scale = None
+            AMP_loss_scale = None
+        def get_model_throughput():
+            gpu_batch_size = run_values.results["batch_size"]
+            return gpu_batch_size / batch_time * self._n_gpus
+        # def get_model_stats():
+        #     return get_tf_model_statistics(batch_size=run_values.results["batch_size"], scope_name=None)
+        #
+        # if self._model_stats is None:
+        #     self._model_stats = get_model_stats()
+        is_log_step = self._current_step % self._log_every_n_steps == 0
+        if is_log_step:
+            if self._current_step > self._warmup_steps:
+                try:
+                    model_throughput = self._model_throughput.read()
+                except ValueError:
+                    model_throughput = get_model_throughput()
+            else:
+                model_throughput = get_model_throughput()
+            self._logging_proxy.log_step(iteration=self._current_step, throughput=model_throughput, gpu_stats=[])
+            self._logging_proxy.log_amp_runtime(
+                current_loss_scale=AMP_loss_scale,
+                steps_non_skipped=_global_step,
+                steps_since_last_scale=AMP_steps_since_last_loss_scale,
+            )
+            metric_data = dict()
+            for name, value in sorted(run_values.results["metrics"].items(), key=operator.itemgetter(0)):
+                self._metrics[name]["aggregator"].record(value)
+                metric_data[name] = self._metrics[name]["aggregator"].read()
+            self._logging_proxy.log_metrics(
+                metric_data=metric_data, iteration=self._current_step, runtime_mode=self._runtime_mode
+            )
+            print()  # Visual Spacing
+        elif self._current_step > self._warmup_steps:
+            # Do not store speed for log step due to additional fetches
+            self._model_throughput.record(get_model_throughput())
+class _SlaveGPUsHook(tf.estimator.SessionRunHook):
+    def after_create_session(self, session, coord):
+        with logging.temp_verbosity(logging.INFO):  # Do not warn user about metric cleaning
+            clear_registered_metrics()
+def real_autologging_hook(*args, **kwargs):
+    replica_id = tf.distribute.get_replica_context().replica_id_in_sync_group
+    # Do not set a logging hook for GPUs != 0
+    if MPI_rank_and_size()[0] != 0 or (isinstance(replica_id, tf.Tensor) and tf.get_static_value(replica_id) != 0):
+        return _SlaveGPUsHook()
+    else:
+        _ = LoggingBackend()  # Making sure the backend is defined before any hook due to __atexit__ hook
+        return _AutoLoggingHook(*args, **kwargs)
+def collect_registered_metrics():
+    if TF_METRICS:  # if not empty
+        metrics = copy.copy(TF_METRICS)
+        # Do not warn user about metric cleaning
+        with logging.temp_verbosity(logging.INFO):
+            clear_registered_metrics()
+        return metrics
+    else:
+        return dict()
+def get_model_variables():
+    """return model variables: global variables without optimizer's variables"""
+    return [
+        # yapf: disable
+        var for var in tf.compat.v1.global_variables() if (
+            var.name[-11:] not in "/Momentum:0" and
+            var.name[-11:] not in "/Adadelta:0" and
+            var.name[-13:] not in "/Adadelta_1:0" and
+            var.name[-7:] not in "/Adam:0" and
+            var.name[-9:] not in "/Adam_1:0" and
+            var.name[-10:] not in "/Adagrad:0" and
+            var.name[10:] not in "/RMSProp:0" and
+            var.name[-12:] not in "/RMSProp_1:0" and
+            var.name[-16:] not in "/LARSOptimizer:0"
+        )
+        # yapf: enable
+    ]
+def get_trainable_variables():
+    """Get a list of trainable TensorFlow variables.
+    Parameters
+    ----------
+    train_only : boolean
+        If True, only get the trainable variables.
+    Returns
+    -------
+    list of Tensor
+        A list of trainable TensorFlow variables
+    Examples
+    --------
+    """
+    if KERAS_MODELS or LooseVersion(tf.__version__) >= LooseVersion("2.0.0"):
+        logging.warning(
+            "In TF2.x, only trainable variables created with Keras Models are captured for logging.\n"
+            "In TF1.x, if any keras model is defined. Only variables created inside Keras Models will be logged."
+        )
+        var_list = list()
+        for model in KERAS_MODELS:
+            var_list.extend(model.trainable_variables)
+        # Keep only a list of unique variables (remove potential duplicates)
+        var_list = list(set(var_list))
+        # clearing the list of Keras Model to avoid memory leaks
+        KERAS_MODELS.clear()
+        return [var for var in sorted(var_list, key=lambda v: v.name)]
+    else:
+        # return tf.trainable_variables()  # deprecated in TF2.x
+        from tensorflow.python.keras.backend import get_graph
+        return get_graph().get_collection('trainable_variables')
+def setup_tensorflow_hook(sess, logging_proxy, is_training, is_initialized):
+    global_step = -1
+    if is_training:
+        if not is_initialized:
+            _global_step_tensor = tf.compat.v1.train.get_or_create_global_step()
+            global_step = sess.run(_global_step_tensor)
+            trainable_variables = get_trainable_variables()
+            def count_weights_in_varlist(var_list):
+                return np.sum([np.prod(s.get_shape()) for s in var_list])
+            logging_proxy.log_git_status()
+            logging_proxy.log_model_statistics(
+                model_statistics={
+                    "# Trainable Weights": "{:,}".format(int(count_weights_in_varlist(trainable_variables))),
+                    "# Model Weights": "{:,}".format(int(count_weights_in_varlist(get_model_variables()))),
+                }
+            )
+            logging_proxy.log_trainable_variables([(var.name, var.get_shape()) for var in trainable_variables])
+    else:
+        if not is_initialized:
+            global_step = 0
+    metrics = collect_registered_metrics()
+    logging_proxy.log_runtime(is_train=is_training)
+    return global_step, metrics
+AutoLoggingHook = lambda *args, **kwargs: real_autologging_hook(*args, **kwargs)
--- a/TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/hooks/pretrained_restore_hook.py
+++ b/TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/hooks/pretrained_restore_hook.py
--- a/TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/hyperparameters/__init__.py
+++ b/TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/hyperparameters/__init__.py
--- a/TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/hyperparameters/cmdline_utils.py
+++ b/TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/hyperparameters/cmdline_utils.py
--- a/TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/hyperparameters/flags_to_params.py
+++ b/TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/hyperparameters/flags_to_params.py
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Functions to override model parameters from command-line flags."""
+from mask_rcnn.hyperparameters import params_dict
+ESSENTIAL_FLAGS = ['tpu', 'data_dir', 'model_dir']
+def override_params_from_input_flags(params, input_flags):
+    """Update params dictionary with input flags.
+  Args:
+    params: ParamsDict object containing dictionary of model parameters.
+    input_flags: All the flags with non-null value of overridden model
+    parameters.
+  Returns:
+    ParamsDict object containing dictionary of model parameters.
+  """
+    if params is None:
+        raise ValueError('Input dictionary is empty. It is expected to be loaded with default ' 'values')
+    if not isinstance(params, params_dict.ParamsDict):
+        raise ValueError('The base parameter set must be a ParamsDict, was: {}'.format(type(params)))
+    essential_flag_dict = {}
+    for key in ESSENTIAL_FLAGS:
+        flag_value = input_flags.get_flag_value(key, None)
+        if flag_value is None:
+            raise ValueError('Flag {} could not be None.'.format(key))
+        else:
+            essential_flag_dict[key] = flag_value
+    params_dict.override_params_dict(params, essential_flag_dict, is_strict=False)
+    normal_flag_dict = get_dictionary_from_flags(params.as_dict(), input_flags)
+    params_dict.override_params_dict(params, normal_flag_dict, is_strict=False)
+    return params
+def get_dictionary_from_flags(params, input_flags):
+    """Generate dictionary from non-null flags.
+  Args:
+    params: Python dictionary of model parameters.
+    input_flags: All the flags with non-null value of overridden model
+    parameters.
+  Returns:
+    Python dict of overriding model parameters.
+  """
+    flag_dict = {}
+    for k, v in params.items():
+        if isinstance(v, dict):
+            d = get_dictionary_from_flags(v, input_flags)
+            flag_dict[k] = d
+        else:
+            try:
+                flag_value = input_flags.get_flag_value(k, None)
+                if flag_value is not None:
+                    flag_dict[k] = flag_value
+            except AttributeError:
+                flag_dict[k] = v
+    return flag_dict
--- a/TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/hyperparameters/hyperparameters.py
+++ b/TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/hyperparameters/hyperparameters.py
--- a/TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/hyperparameters/mask_rcnn_params.py
+++ b/TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/hyperparameters/mask_rcnn_params.py
--- a/TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/hyperparameters/params_dict.py
+++ b/TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/hyperparameters/params_dict.py
--- a/TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/hyperparameters/params_io.py
+++ b/TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/hyperparameters/params_io.py
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#============================================================================
+"""Utils to handle parameters IO."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import six
+import yaml
+import tensorflow as tf
+def save_hparams_to_yaml(hparams, file_path):
+    with tf.io.gfile.GFile(file_path, 'w') as f:
+        try:
+            hparams_val = hparams.values()
+        except AttributeError:
+            hparams_val = hparams.__dict__
+        yaml.dump(hparams_val, f)
+def override_hparams(hparams, dict_or_string_or_yaml_file):
+    """Override a given hparams using a dict or a string or a JSON file.
+  Args:
+    hparams: a HParams object to be overridden.
+    dict_or_string_or_yaml_file: a Python dict, or a comma-separated string,
+      or a path to a YAML file specifying the parameters to be overridden.
+  Returns:
+    hparams: the overridden HParams object.
+  Raises:
+    ValueError: if failed to override the parameters.
+  """
+    if not dict_or_string_or_yaml_file:
+        return hparams
+    if isinstance(dict_or_string_or_yaml_file, dict):
+        for key, val in dict_or_string_or_yaml_file.items():
+            if key not in hparams:
+                try:  # TF 1.x
+                    hparams.add_hparam(key, val)
+                except AttributeError:  # TF 2.x
+                    try:  # Dict
+                        hparams[key] = val
+                    except TypeError:  # Namespace
+                        setattr(hparams, key, val)
+            else:
+                raise ValueError("Parameter `%s` is already defined" % key)
+        # hparams.override_from_dict(dict_or_string_or_yaml_file)
+    elif isinstance(dict_or_string_or_yaml_file, six.string_types):
+        try:
+            hparams.parse(dict_or_string_or_yaml_file)
+        except ValueError as parse_error:
+            try:
+                with tf.io.gfile.GFile(dict_or_string_or_yaml_file) as f:
+                    hparams.override_from_dict(yaml.load(f))
+            except Exception as read_error:
+                parse_message = ('Failed to parse config string: %s\n' % parse_error.message)
+                read_message = ('Failed to parse yaml file provided. %s' % read_error.message)
+                raise ValueError(parse_message + read_message)
+    else:
+        raise ValueError('Unknown input type to parse.')
+    return hparams
--- a/TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/mask_rcnn_model.py
+++ b/TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/mask_rcnn_model.py
--- a/TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/models/__init__.py
+++ b/TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/models/__init__.py
--- a/TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/models/fpn.py
+++ b/TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/models/fpn.py
--- a/TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/models/heads.py
+++ b/TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/models/heads.py
--- a/TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/models/keras_utils.py
+++ b/TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/models/keras_utils.py
--- a/TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/models/resnet.py
+++ b/TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/models/resnet.py
--- a/TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/object_detection/__init__.py
+++ b/TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/object_detection/__init__.py