tf2 detection

c320b6ef · zhenyi · 0fc002df · c320b6ef · c320b6ef · c320b6ef
Commit c320b6ef authored Apr 15, 2022 by zhenyi
20 changed files
--- a/TensorFlow2x/ComputeVision/Detection/MaskRCNN/.idea/.gitignore
+++ b/TensorFlow2x/ComputeVision/Detection/MaskRCNN/.idea/.gitignore
+# Default ignored files
+/shelf/
+/workspace.xml
--- a/TensorFlow2x/ComputeVision/Detection/MaskRCNN/.idea/MaskRCNN.iml
+++ b/TensorFlow2x/ComputeVision/Detection/MaskRCNN/.idea/MaskRCNN.iml
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+  <component name="PyDocumentationSettings">
+    <option name="format" value="PLAIN" />
+    <option name="myDocStringFormat" value="Plain" />
+  </component>
+</module>
\ No newline at end of file
--- a/TensorFlow2x/ComputeVision/Detection/MaskRCNN/.idea/inspectionProfiles/profiles_settings.xml
+++ b/TensorFlow2x/ComputeVision/Detection/MaskRCNN/.idea/inspectionProfiles/profiles_settings.xml
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
\ No newline at end of file
--- a/TensorFlow2x/ComputeVision/Detection/MaskRCNN/.idea/misc.xml
+++ b/TensorFlow2x/ComputeVision/Detection/MaskRCNN/.idea/misc.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9" project-jdk-type="Python SDK" />
+</project>
\ No newline at end of file
--- a/TensorFlow2x/ComputeVision/Detection/MaskRCNN/.idea/modules.xml
+++ b/TensorFlow2x/ComputeVision/Detection/MaskRCNN/.idea/modules.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/MaskRCNN.iml" filepath="$PROJECT_DIR$/.idea/MaskRCNN.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
--- a/TensorFlow2x/ComputeVision/Detection/MaskRCNN/1.py
+++ b/TensorFlow2x/ComputeVision/Detection/MaskRCNN/1.py
+import torch
+import torchvision
+print(torch.__version__)
+print(torchvision.__version__)
+print(torch.cuda.is_available())
+print(torch.cuda.device_count())
+print(torch.cuda.get_device_name(0))
+a=torch.Tensor([[1,1,2,2],[1,1,3.100001,3],[1,1,3.1,3]])
+b=torch.Tensor([0.9,0.98,0.980005])
+from torchvision.ops import nms
+ccc=nms(a,b,0.4)
+print(ccc)
+print(a[ccc])
\ No newline at end of file
--- a/TensorFlow2x/ComputeVision/Detection/MaskRCNN/Dockerfile
+++ b/TensorFlow2x/ComputeVision/Detection/MaskRCNN/Dockerfile
+#===============================================================================
+#
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# ==============================================================================
+ARG FROM_IMAGE_NAME=nvcr.io/nvidia/tensorflow:20.06-tf1-py3
+FROM ${FROM_IMAGE_NAME}
+ENV DEBIAN_FRONTEND=noninteractive
+RUN rm -rf /workspace && mkdir -p /workspace
+ADD . /workspace
+WORKDIR /workspace
+RUN apt-get update && \
+    apt-get install -y libsm6 libxext6 libxrender-dev python3-tk cmake && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+# Make sure python and pip points to pip3 and python3
+RUN python -m pip install --upgrade pip && \
+    pip --no-cache-dir --no-cache install \
+        Cython \
+        matplotlib \
+        opencv-python-headless \
+        mpi4py \
+        Pillow \
+        pytest \
+        pyyaml && \
+    git clone https://github.com/pybind/pybind11 /opt/pybind11 && \
+    cd /opt/pybind11 && cmake . && make install && pip install . && \
+    pip --no-cache-dir --no-cache install \
+        'git+https://github.com/NVIDIA/cocoapi#egg=pycocotools&subdirectory=PythonAPI' && \
+    pip --no-cache-dir --no-cache install \
+        'git+https://github.com/NVIDIA/dllogger'
+# Update protobuf 3 to 3.3.0
+RUN \
+    curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v3.3.0/protoc-3.3.0-linux-x86_64.zip && \
+    unzip -u protoc-3.3.0-linux-x86_64.zip -d protoc3 && \
+    mv protoc3/bin/* /usr/local/bin/ && \
+    mv protoc3/include/* /usr/local/include/
--- a/TensorFlow2x/ComputeVision/Detection/MaskRCNN/LICENSE
+++ b/TensorFlow2x/ComputeVision/Detection/MaskRCNN/LICENSE
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright 2019 NVIDIA Corporation
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
\ No newline at end of file
--- a/TensorFlow2x/ComputeVision/Detection/MaskRCNN/README.md
+++ b/TensorFlow2x/ComputeVision/Detection/MaskRCNN/README.md
+# 简介
+* Tensorflow训练Mask R-CNN模型
+<br>
+# 环境准备  
+## 1）安装工具包  
+* rocm3.3环境安装tensorflow1.15  
+* 安装pycocotools  
+  pip3 install pycocotools -i http://pypi.douban.com/simple/ --trusted-host pypi.douban.com  
+* 更新pandas  
+  pip3 install -U pandas -i http://pypi.douban.com/simple/ --trusted-host pypi.douban.com  
+* 安装dllogger  
+  git clone --recursive https://github.com/NVIDIA/dllogger.git  
+  python3 setup.py install  
+<br>  
+## 2）数据处理（train 和 val）
+```  
+cd dataset/  
+git clone http://github.com/tensorflow/models tf-models  
+cd tf-models/research  
+wget -O protobuf.zip https://github.com/google/protobuf/releases/download/v3.0.0/protoc-3.0.0-linux-x86_64.zip protobuf.zip  
+unzip protobuf.zip  
+./bin/protoc object_detection/protos/.proto --python_out=. 
+```
+返回dataset目录  
+  vim create_coco_tf_record.py  
+注释掉310 316行 
+<br> 
+```
+PYTHONPATH="tf-models:tf-models/research" python3 create_coco_tf_record.py \
+  --logtostderr \
+  --include_masks \
+  --train_image_dir=/path/to/COCO2017/images/train2017 \
+  --val_image_dir=/path/to/COCO2017/images/val2017 \
+  --train_object_annotations_file=/path/to/COCO2017/annotations/instances_train2017.json \
+  --val_object_annotations_file=/path/to/COCO2017/annotations/instances_val2017.json \
+  --train_caption_annotations_file=/path/to/COCO2017/annotations/captions_train2017.json \
+  --val_caption_annotations_file=/path/to/COCO2017/annotations/captions_val2017.json \
+  --output_dir=coco2017_tfrecord  
+```
+生成coco2017_tfrecord文件夹  
+## 3）预训练模型下载  
+<br>
+生成的模型文件结构如下:
+``` 
+weights/
+>mask-rcnn/1555659850/  
+https://storage.googleapis.com/cloud-tpu-checkpoints/mask-rcnn/1555659850/saved_model.pb 
+>>variables/  
+https://storage.googleapis.com/cloud-tpu-checkpoints/mask-rcnn/1555659850/variables/variables.data-00000-of-00001  
+https://storage.googleapis.com/cloud-tpu-checkpoints/mask-rcnn/1555659850/variables/variables.index  
+>resnet/
+>>extracted_from_maskrcnn/
+>>resnet-nhwc-2018-02-07/  
+https://storage.googleapis.com/cloud-tpu-checkpoints/retinanet/resnet50-checkpoint-2018-02-07/checkpoint 
+>>>model.ckpt-112603/  
+https://storage.googleapis.com/cloud-tpu-checkpoints/retinanet/resnet50-checkpoint-2018-02-07/model.ckpt-112603.data-00000-of-00001  
+https://storage.googleapis.com/cloud-tpu-checkpoints/retinanet/resnet50-checkpoint-2018-02-07/model.ckpt-112603.index  
+https://storage.googleapis.com/cloud-tpu-checkpoints/retinanet/resnet50-checkpoint-2018-02-07/model.ckpt-112603.meta  
+>>resnet-nhwc-2018-10-14/
+```
+# 测试  
+## 单卡训练  
+```
+python3 scripts/benchmark_training.py --gpus {1,4,8} --batch_size {2,4}  
+python3 scripts/benchmark_training.py --gpus 1 --batch_size 2 --model_dir save_model --data_dir /public/home/tianlh/AI-application/Tensorflow/MaskRCNN_tf2/dataset/coco2017_tfrecord --weights_dir weights 
+```
+## 多卡训练 
+``` 
+python3 scripts/benchmark_training.py --gpus 2 --batch_size 4 --model_dir save_model_2dcu --data_dir /public/home/tianlh/AI-application/Tensorflow/MaskRCNN_tf2/dataset/coco2017_tfrecord --weights_dir weights 
+```
+## 推理  
+```
+python3 scripts/benchmark_inference.py --batch_size 2 --model_dir save_model --data_dir /public/home/tianlh/AI-application/Tensorflow/MaskRCNN_tf2/dataset/coco2017_tfrecord --weights_dir weights
+```
+# 参考资料
+[https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow2/Segmentation/MaskRCNN](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow2/Segmentation/MaskRCNN)
\ No newline at end of file
--- a/TensorFlow2x/ComputeVision/Detection/MaskRCNN/README.md-org
+++ b/TensorFlow2x/ComputeVision/Detection/MaskRCNN/README.md-org
--- a/TensorFlow2x/ComputeVision/Detection/MaskRCNN/dataset/create_coco_tf_record.py
+++ b/TensorFlow2x/ComputeVision/Detection/MaskRCNN/dataset/create_coco_tf_record.py
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+r"""Convert raw COCO dataset to TFRecord for object_detection.
+Example usage:
+    python create_coco_tf_record.py --logtostderr \
+      --train_image_dir="${TRAIN_IMAGE_DIR}" \
+      --val_image_dir="${VAL_IMAGE_DIR}" \
+      --test_image_dir="${TEST_IMAGE_DIR}" \
+      --train_annotations_file="${TRAIN_ANNOTATIONS_FILE}" \
+      --val_annotations_file="${VAL_ANNOTATIONS_FILE}" \
+      --testdev_annotations_file="${TESTDEV_ANNOTATIONS_FILE}" \
+      --output_dir="${OUTPUT_DIR}"
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import collections
+import hashlib
+import io
+import json
+import multiprocessing
+import os
+from absl import app
+from absl import flags
+import numpy as np
+import PIL.Image
+from pycocotools import mask
+from research.object_detection.utils import dataset_util
+from research.object_detection.utils import label_map_util
+import tensorflow as tf
+flags.DEFINE_boolean('include_masks', False,
+                     'Whether to include instance segmentations masks '
+                     '(PNG encoded) in the result. default: False.')
+flags.DEFINE_string('train_image_dir', '', 'Training image directory.')
+flags.DEFINE_string('val_image_dir', '', 'Validation image directory.')
+flags.DEFINE_string('test_image_dir', '', 'Test image directory.')
+flags.DEFINE_string('train_object_annotations_file', '', '')
+flags.DEFINE_string('val_object_annotations_file', '', '')
+flags.DEFINE_string('train_caption_annotations_file', '', '')
+flags.DEFINE_string('val_caption_annotations_file', '', '')
+flags.DEFINE_string('testdev_annotations_file', '',
+                    'Test-dev annotations JSON file.')
+flags.DEFINE_string('output_dir', '/tmp/', 'Output data directory.')
+FLAGS = flags.FLAGS
+tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO)
+def create_tf_example(image,
+                      bbox_annotations,
+                      caption_annotations,
+                      image_dir,
+                      category_index,
+                      include_masks=False):
+  """Converts image and annotations to a tf.Example proto.
+  Args:
+    image: dict with keys:
+      [u'license', u'file_name', u'coco_url', u'height', u'width',
+      u'date_captured', u'flickr_url', u'id']
+    bbox_annotations:
+      list of dicts with keys:
+      [u'segmentation', u'area', u'iscrowd', u'image_id',
+      u'bbox', u'category_id', u'id']
+      Notice that bounding box coordinates in the official COCO dataset are
+      given as [x, y, width, height] tuples using absolute coordinates where
+      x, y represent the top-left (0-indexed) corner.  This function converts
+      to the format expected by the Tensorflow Object Detection API (which is
+      which is [ymin, xmin, ymax, xmax] with coordinates normalized relative
+      to image size).
+    image_dir: directory containing the image files.
+    category_index: a dict containing COCO category information keyed
+      by the 'id' field of each category.  See the
+      label_map_util.create_category_index function.
+    include_masks: Whether to include instance segmentations masks
+      (PNG encoded) in the result. default: False.
+  Returns:
+    example: The converted tf.Example
+    num_annotations_skipped: Number of (invalid) annotations that were ignored.
+  Raises:
+    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
+  """
+  image_height = image['height']
+  image_width = image['width']
+  filename = image['file_name']
+  image_id = image['id']
+  full_path = os.path.join(image_dir, filename)
+  with tf.io.gfile.GFile(full_path, 'rb') as fid:
+    encoded_jpg = fid.read()
+  encoded_jpg_io = io.BytesIO(encoded_jpg)
+  image = PIL.Image.open(encoded_jpg_io)
+  key = hashlib.sha256(encoded_jpg).hexdigest()
+  xmin = []
+  xmax = []
+  ymin = []
+  ymax = []
+  is_crowd = []
+  category_names = []
+  category_ids = []
+  area = []
+  encoded_mask_png = []
+  num_annotations_skipped = 0
+  for object_annotations in bbox_annotations:
+    (x, y, width, height) = tuple(object_annotations['bbox'])
+    if width <= 0 or height <= 0:
+      num_annotations_skipped += 1
+      continue
+    if x + width > image_width or y + height > image_height:
+      num_annotations_skipped += 1
+      continue
+    xmin.append(float(x) / image_width)
+    xmax.append(float(x + width) / image_width)
+    ymin.append(float(y) / image_height)
+    ymax.append(float(y + height) / image_height)
+    is_crowd.append(object_annotations['iscrowd'])
+    category_id = int(object_annotations['category_id'])
+    category_ids.append(category_id)
+    category_names.append(category_index[category_id]['name'].encode('utf8'))
+    area.append(object_annotations['area'])
+    if include_masks:
+      run_len_encoding = mask.frPyObjects(object_annotations['segmentation'],
+                                          image_height, image_width)
+      binary_mask = mask.decode(run_len_encoding)
+      if not object_annotations['iscrowd']:
+        binary_mask = np.amax(binary_mask, axis=2)
+      pil_image = PIL.Image.fromarray(binary_mask)
+      output_io = io.BytesIO()
+      pil_image.save(output_io, format='PNG')
+      encoded_mask_png.append(output_io.getvalue())
+  captions = []
+  for caption_annotation in caption_annotations:
+    captions.append(caption_annotation['caption'].encode('utf8'))
+  feature_dict = {
+      'image/height':
+          dataset_util.int64_feature(image_height),
+      'image/width':
+          dataset_util.int64_feature(image_width),
+      'image/filename':
+          dataset_util.bytes_feature(filename.encode('utf8')),
+      'image/source_id':
+          dataset_util.bytes_feature(str(image_id).encode('utf8')),
+      'image/key/sha256':
+          dataset_util.bytes_feature(key.encode('utf8')),
+      'image/encoded':
+          dataset_util.bytes_feature(encoded_jpg),
+      'image/caption':
+        dataset_util.bytes_list_feature(captions),
+      'image/format':
+          dataset_util.bytes_feature('jpeg'.encode('utf8')),
+      'image/object/bbox/xmin':
+          dataset_util.float_list_feature(xmin),
+      'image/object/bbox/xmax':
+          dataset_util.float_list_feature(xmax),
+      'image/object/bbox/ymin':
+          dataset_util.float_list_feature(ymin),
+      'image/object/bbox/ymax':
+          dataset_util.float_list_feature(ymax),
+      'image/object/class/text':
+          dataset_util.bytes_list_feature(category_names),
+      'image/object/class/label':
+          dataset_util.int64_list_feature(category_ids),
+      'image/object/is_crowd':
+          dataset_util.int64_list_feature(is_crowd),
+      'image/object/area':
+          dataset_util.float_list_feature(area),
+  }
+  if include_masks:
+    feature_dict['image/object/mask'] = (
+        dataset_util.bytes_list_feature(encoded_mask_png))
+  example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
+  return key, example, num_annotations_skipped
+def _pool_create_tf_example(args):
+  return create_tf_example(*args)
+def _load_object_annotations(object_annotations_file):
+  with tf.io.gfile.GFile(object_annotations_file, 'r') as fid:
+    obj_annotations = json.load(fid)
+  images = obj_annotations['images']
+  category_index = label_map_util.create_category_index(
+      obj_annotations['categories'])
+  img_to_obj_annotation = collections.defaultdict(list)
+  tf.compat.v1.logging.info('Building bounding box index.')
+  for annotation in obj_annotations['annotations']:
+    image_id = annotation['image_id']
+    img_to_obj_annotation[image_id].append(annotation)
+  missing_annotation_count = 0
+  for image in images:
+    image_id = image['id']
+    if image_id not in img_to_obj_annotation:
+      missing_annotation_count += 1
+  tf.compat.v1.logging.info('%d images are missing bboxes.', missing_annotation_count)
+  return images, img_to_obj_annotation, category_index
+def _load_caption_annotations(caption_annotations_file):
+  with tf.io.gfile.GFile(caption_annotations_file, 'r') as fid:
+    caption_annotations = json.load(fid)
+  img_to_caption_annotation = collections.defaultdict(list)
+  tf.compat.v1.logging.info('Building caption index.')
+  for annotation in caption_annotations['annotations']:
+    image_id = annotation['image_id']
+    img_to_caption_annotation[image_id].append(annotation)
+  missing_annotation_count = 0
+  images = caption_annotations['images']
+  for image in images:
+    image_id = image['id']
+    if image_id not in img_to_caption_annotation:
+      missing_annotation_count += 1
+  tf.compat.v1.logging.info('%d images are missing captions.', missing_annotation_count)
+  return img_to_caption_annotation
+def _create_tf_record_from_coco_annotations(
+    object_annotations_file,
+    caption_annotations_file,
+    image_dir, output_path, include_masks, num_shards):
+  """Loads COCO annotation json files and converts to tf.Record format.
+  Args:
+    object_annotations_file: JSON file containing bounding box annotations.
+    caption_annotations_file: JSON file containing caption annotations.
+    image_dir: Directory containing the image files.
+    output_path: Path to output tf.Record file.
+    include_masks: Whether to include instance segmentations masks
+      (PNG encoded) in the result. default: False.
+    num_shards: Number of output files to create.
+  """
+  tf.compat.v1.logging.info('writing to output path: %s', output_path)
+  writers = [
+      tf.io.TFRecordWriter(output_path + '-%05d-of-%05d.tfrecord' %
+                                  (i, num_shards)) for i in range(num_shards)
+  ]
+  images, img_to_obj_annotation, category_index = (
+      _load_object_annotations(object_annotations_file))
+  img_to_caption_annotation = (
+      _load_caption_annotations(caption_annotations_file))
+  pool = multiprocessing.Pool()
+  total_num_annotations_skipped = 0
+  for idx, (_, tf_example, num_annotations_skipped) in enumerate(
+      pool.imap(_pool_create_tf_example,
+                [(image,
+                  img_to_obj_annotation[image['id']],
+                  img_to_caption_annotation[image['id']],
+                  image_dir,
+                  category_index,
+                  include_masks)
+                 for image in images])):
+    if idx % 100 == 0:
+      tf.compat.v1.logging.info('On image %d of %d', idx, len(images))
+    total_num_annotations_skipped += num_annotations_skipped
+    writers[idx % num_shards].write(tf_example.SerializeToString())
+  pool.close()
+  pool.join()
+  for writer in writers:
+    writer.close()
+  tf.compat.v1.logging.info('Finished writing, skipped %d annotations.',
+                  total_num_annotations_skipped)
+def main(_):
+  assert FLAGS.train_image_dir, '`train_image_dir` missing.'
+  assert FLAGS.val_image_dir, '`val_image_dir` missing.'
+  assert FLAGS.test_image_dir, '`test_image_dir` missing.'
+  if not tf.io.gfile.isdir(FLAGS.output_dir):
+    tf.io.gfile.makedirs(FLAGS.output_dir)
+  train_output_path = os.path.join(FLAGS.output_dir, 'train')
+  val_output_path = os.path.join(FLAGS.output_dir, 'val')
+  testdev_output_path = os.path.join(FLAGS.output_dir, 'test-dev')
+  _create_tf_record_from_coco_annotations(
+      FLAGS.train_object_annotations_file,
+      FLAGS.train_caption_annotations_file,
+      FLAGS.train_image_dir,
+      train_output_path,
+      FLAGS.include_masks,
+      num_shards=256)
+  _create_tf_record_from_coco_annotations(
+      FLAGS.val_object_annotations_file,
+      FLAGS.val_caption_annotations_file,
+      FLAGS.val_image_dir,
+      val_output_path,
+      FLAGS.include_masks,
+      num_shards=32)
+if __name__ == '__main__':
+  tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO)
+  app.run(main)
--- a/TensorFlow2x/ComputeVision/Detection/MaskRCNN/dataset/download_and_preprocess_coco.sh
+++ b/TensorFlow2x/ComputeVision/Detection/MaskRCNN/dataset/download_and_preprocess_coco.sh
+#!/bin/bash
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+# Script to download and preprocess the COCO data set for detection.
+#
+# The outputs of this script are TFRecord files containing serialized
+# tf.Example protocol buffers. See create_coco_tf_record.py for details of how
+# the tf.Example protocol buffers are constructed and see
+# http://cocodataset.org/#overview for an overview of the dataset.
+#
+# usage:
+#  bash download_and_preprocess_coco.sh /data-dir/coco
+set -e
+set -x
+if [ -z "$1" ]; then
+  echo "usage download_and_preprocess_coco.sh [data dir]"
+  exit
+fi
+#sudo apt install -y protobuf-compiler python-pil python-lxml\
+#  python-pip python-dev git unzip
+#pip install Cython git+https://github.com/cocodataset/cocoapi#subdirectory=PythonAPI
+echo "Cloning Tensorflow models directory (for conversion utilities)"
+if [ ! -e tf-models ]; then
+  git clone http://github.com/tensorflow/models tf-models
+fi
+(cd tf-models/research && protoc object_detection/protos/*.proto --python_out=.)
+UNZIP="unzip -nq"
+# Create the output directories.
+OUTPUT_DIR="${1%/}"
+SCRATCH_DIR="${OUTPUT_DIR}/raw-data"
+mkdir -p "${OUTPUT_DIR}"
+mkdir -p "${SCRATCH_DIR}"
+CURRENT_DIR=$(pwd)
+# Helper function to download and unpack a .zip file.
+function download_and_unzip() {
+  local BASE_URL=${1}
+  local FILENAME=${2}
+  if [ ! -f ${FILENAME} ]; then
+    echo "Downloading ${FILENAME} to $(pwd)"
+    wget -nd -c "${BASE_URL}/${FILENAME}"
+  else
+    echo "Skipping download of ${FILENAME}"
+  fi
+  echo "Unzipping ${FILENAME}"
+  ${UNZIP} ${FILENAME}
+}
+cd ${SCRATCH_DIR}
+# Download the images.
+BASE_IMAGE_URL="http://images.cocodataset.org/zips"
+TRAIN_IMAGE_FILE="train2017.zip"
+download_and_unzip ${BASE_IMAGE_URL} ${TRAIN_IMAGE_FILE}
+TRAIN_IMAGE_DIR="${SCRATCH_DIR}/train2017"
+VAL_IMAGE_FILE="val2017.zip"
+download_and_unzip ${BASE_IMAGE_URL} ${VAL_IMAGE_FILE}
+VAL_IMAGE_DIR="${SCRATCH_DIR}/val2017"
+TEST_IMAGE_FILE="test2017.zip"
+download_and_unzip ${BASE_IMAGE_URL} ${TEST_IMAGE_FILE}
+TEST_IMAGE_DIR="${SCRATCH_DIR}/test2017"
+# Download the annotations.
+BASE_INSTANCES_URL="http://images.cocodataset.org/annotations"
+INSTANCES_FILE="annotations_trainval2017.zip"
+download_and_unzip ${BASE_INSTANCES_URL} ${INSTANCES_FILE}
+TRAIN_OBJ_ANNOTATIONS_FILE="${SCRATCH_DIR}/annotations/instances_train2017.json"
+VAL_OBJ_ANNOTATIONS_FILE="${SCRATCH_DIR}/annotations/instances_val2017.json"
+TRAIN_CAPTION_ANNOTATIONS_FILE="${SCRATCH_DIR}/annotations/captions_train2017.json"
+VAL_CAPTION_ANNOTATIONS_FILE="${SCRATCH_DIR}/annotations/captions_val2017.json"
+# Download the test image info.
+BASE_IMAGE_INFO_URL="http://images.cocodataset.org/annotations"
+IMAGE_INFO_FILE="image_info_test2017.zip"
+download_and_unzip ${BASE_IMAGE_INFO_URL} ${IMAGE_INFO_FILE}
+TESTDEV_ANNOTATIONS_FILE="${SCRATCH_DIR}/annotations/image_info_test-dev2017.json"
+# # Build TFRecords of the image data.
+cd "${CURRENT_DIR}"
+# Setup packages
+touch tf-models/__init__.py
+touch tf-models/research/__init__.py
+# Run our conversion
+SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
+PYTHONPATH="tf-models:tf-models/research" python $SCRIPT_DIR/create_coco_tf_record.py \
+  --logtostderr \
+  --include_masks \
+  --train_image_dir="${TRAIN_IMAGE_DIR}" \
+  --val_image_dir="${VAL_IMAGE_DIR}" \
+  --test_image_dir="${TEST_IMAGE_DIR}" \
+  --train_object_annotations_file="${TRAIN_OBJ_ANNOTATIONS_FILE}" \
+  --val_object_annotations_file="${VAL_OBJ_ANNOTATIONS_FILE}" \
+  --train_caption_annotations_file="${TRAIN_CAPTION_ANNOTATIONS_FILE}" \
+  --val_caption_annotations_file="${VAL_CAPTION_ANNOTATIONS_FILE}" \
+  --testdev_annotations_file="${TESTDEV_ANNOTATIONS_FILE}" \
+  --output_dir="${OUTPUT_DIR}"
+mv ${SCRATCH_DIR}/annotations/ ${OUTPUT_DIR}
--- a/TensorFlow2x/ComputeVision/Detection/MaskRCNN/download_and_process_pretrained_weights.sh
+++ b/TensorFlow2x/ComputeVision/Detection/MaskRCNN/download_and_process_pretrained_weights.sh
+#!/usr/bin/env bash
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+mkdir -p /model
+cd /model
+# DOWNLOAD CHECKPOINTS
+## Mask RCNN
+## ====================== Mask RCNN ====================== ##
+BASE_URL="https://storage.googleapis.com/cloud-tpu-checkpoints/mask-rcnn/1555659850"
+DEST_DIR="mask-rcnn/1555659850"
+wget -N ${BASE_URL}/saved_model.pb -P ${DEST_DIR}
+wget -N ${BASE_URL}/variables/variables.data-00000-of-00001 -P ${DEST_DIR}/variables
+wget -N ${BASE_URL}/variables/variables.index -P ${DEST_DIR}/variables
+## ====================== resnet-nhwc-2018-02-07 ====================== ##
+BASE_URL="https://storage.googleapis.com/cloud-tpu-checkpoints/retinanet/resnet50-checkpoint-2018-02-07"
+DEST_DIR="resnet/resnet-nhwc-2018-02-07"
+wget -N ${BASE_URL}/checkpoint -P ${DEST_DIR}
+wget -N ${BASE_URL}/model.ckpt-112603.data-00000-of-00001 -P ${DEST_DIR}
+wget -N ${BASE_URL}/model.ckpt-112603.index  -P ${DEST_DIR}
+wget -N ${BASE_URL}/model.ckpt-112603.meta -P ${DEST_DIR}
+## ====================== resnet-nhwc-2018-10-14 ====================== ##
+#BASE_URL="https://storage.googleapis.com/cloud-tpu-artifacts/resnet/resnet-nhwc-2018-10-14"
+#DEST_DIR="resnet/resnet-nhwc-2018-10-14"
+#
+#wget -N ${BASE_URL}/model.ckpt-112602.data-00000-of-00001 -P ${DEST_DIR}
+#wget -N ${BASE_URL}/model.ckpt-112602.index -P ${DEST_DIR}
+#wget -N ${BASE_URL}/model.ckpt-112602.meta -P ${DEST_DIR}
+# VERIFY CHECKPOINTS
+echo "Verifying and Processing Checkpoints..."
+python pb_to_ckpt.py \
+    --frozen_model_filename=mask-rcnn/1555659850/ \
+    --output_filename=mask-rcnn/1555659850/ckpt/model.ckpt
+python extract_RN50_weights.py \
+    --checkpoint_dir=mask-rcnn/1555659850/ckpt/model.ckpt \
+    --save_to=resnet/extracted_from_maskrcnn
+echo "Generating list of tensors and their shape..."
+python inspect_checkpoint.py --file_name=mask-rcnn/1555659850/ckpt/model.ckpt \
+    > mask-rcnn/1555659850/tensors_and_shape.txt
+python inspect_checkpoint.py --file_name=resnet/resnet-nhwc-2018-02-07/model.ckpt-112603 \
+    > resnet/resnet-nhwc-2018-02-07/tensors_and_shape.txt
+#python inspect_checkpoint.py --file_name=resnet/resnet-nhwc-2018-10-14/model.ckpt-112602 \
+#    > resnet/resnet-nhwc-2018-10-14/tensors_and_shape.txt
+python inspect_checkpoint.py --file_name=resnet/extracted_from_maskrcnn/resnet50.ckpt \
+    > resnet/extracted_from_maskrcnn/tensors_and_shape.txt
+echo "Script Finished with Success"
--- a/TensorFlow2x/ComputeVision/Detection/MaskRCNN/images/MaskRCNN_TF1_conv.png
+++ b/TensorFlow2x/ComputeVision/Detection/MaskRCNN/images/MaskRCNN_TF1_conv.png
--- a/TensorFlow2x/ComputeVision/Detection/MaskRCNN/images/MaskRCNN_TF2_conv.png
+++ b/TensorFlow2x/ComputeVision/Detection/MaskRCNN/images/MaskRCNN_TF2_conv.png
--- a/TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/__init__.py
+++ b/TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/__init__.py
--- a/TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/anchors.py
+++ b/TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/anchors.py
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Mask-RCNN anchor definition."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from collections import OrderedDict
+import numpy as np
+import tensorflow as tf
+from mask_rcnn.object_detection import argmax_matcher
+from mask_rcnn.object_detection import balanced_positive_negative_sampler
+from mask_rcnn.object_detection import box_list
+from mask_rcnn.object_detection import faster_rcnn_box_coder
+from mask_rcnn.object_detection import region_similarity_calculator
+from mask_rcnn.object_detection import target_assigner
+def _generate_anchor_configs(min_level, max_level, num_scales, aspect_ratios):
+  """Generates mapping from output level to a list of anchor configurations.
+  A configuration is a tuple of (num_anchors, scale, aspect_ratio).
+  Args:
+      min_level: integer number of minimum level of the output feature pyramid.
+      max_level: integer number of maximum level of the output feature pyramid.
+      num_scales: integer number representing intermediate scales added
+        on each level. For instances, num_scales=2 adds two additional
+        anchor scales [2^0, 2^0.5] on each level.
+      aspect_ratios: list of tuples representing the aspect raito anchors added
+        on each level. For instances, aspect_ratios =
+        [(1, 1), (1.4, 0.7), (0.7, 1.4)] adds three anchors on each level.
+  Returns:
+    anchor_configs: a dictionary with keys as the levels of anchors and
+      values as a list of anchor configuration.
+  """
+  anchor_configs = {}
+  for level in range(min_level, max_level + 1):
+    anchor_configs[level] = []
+    for scale_octave in range(num_scales):
+      for aspect in aspect_ratios:
+        anchor_configs[level].append(
+            (2**level, scale_octave / float(num_scales), aspect))
+  return anchor_configs
+def _generate_anchor_boxes(image_size, anchor_scale, anchor_configs):
+  """Generates multiscale anchor boxes.
+  Args:
+    image_size: integer number of input image size. The input image has the
+      same dimension for width and height. The image_size should be divided by
+      the largest feature stride 2^max_level.
+    anchor_scale: float number representing the scale of size of the base
+      anchor to the feature stride 2^level.
+    anchor_configs: a dictionary with keys as the levels of anchors and
+      values as a list of anchor configuration.
+  Returns:
+    anchor_boxes: a numpy array with shape [N, 4], which stacks anchors on all
+      feature levels.
+  Raises:
+    ValueError: input size must be the multiple of largest feature stride.
+  """
+  boxes_all = []
+  for _, configs in anchor_configs.items():
+    boxes_level = []
+    for config in configs:
+      stride, octave_scale, aspect = config
+      if image_size[0] % stride != 0 or image_size[1] % stride != 0:
+        raise ValueError('input size must be divided by the stride.')
+      base_anchor_size = anchor_scale * stride * 2**octave_scale
+      anchor_size_x_2 = base_anchor_size * aspect[0] / 2.0
+      anchor_size_y_2 = base_anchor_size * aspect[1] / 2.0
+      x = np.arange(stride / 2, image_size[1], stride)
+      y = np.arange(stride / 2, image_size[0], stride)
+      xv, yv = np.meshgrid(x, y)
+      xv = xv.reshape(-1)
+      yv = yv.reshape(-1)
+      boxes = np.vstack((yv - anchor_size_y_2, xv - anchor_size_x_2,
+                         yv + anchor_size_y_2, xv + anchor_size_x_2))
+      boxes = np.swapaxes(boxes, 0, 1)
+      boxes_level.append(np.expand_dims(boxes, axis=1))
+    # concat anchors on the same level to the reshape NxAx4
+    boxes_level = np.concatenate(boxes_level, axis=1)
+    boxes_all.append(boxes_level.reshape([-1, 4]))
+  anchor_boxes = np.vstack(boxes_all)
+  return anchor_boxes
+class Anchors(object):
+  """Mask-RCNN Anchors class."""
+  def __init__(self, min_level, max_level, num_scales, aspect_ratios, anchor_scale, image_size):
+    """Constructs multiscale Mask-RCNN anchors.
+    Args:
+      min_level: integer number of minimum level of the output feature pyramid.
+      max_level: integer number of maximum level of the output feature pyramid.
+      num_scales: integer number representing intermediate scales added
+        on each level. For instances, num_scales=2 adds two additional
+        anchor scales [2^0, 2^0.5] on each level.
+      aspect_ratios: list of tuples representing the aspect raito anchors added
+        on each level. For instances, aspect_ratios =
+        [(1, 1), (1.4, 0.7), (0.7, 1.4)] adds three anchors on each level.
+      anchor_scale: float number representing the scale of size of the base
+        anchor to the feature stride 2^level.
+      image_size: integer number of input image size. The input image has the
+        same dimension for width and height. The image_size should be divided by
+        the largest feature stride 2^max_level.
+    """
+    self.min_level = min_level
+    self.max_level = max_level
+    self.num_scales = num_scales
+    self.aspect_ratios = aspect_ratios
+    self.anchor_scale = anchor_scale
+    self.image_size = image_size
+    self.config = self._generate_configs()
+    self.boxes = self._generate_boxes()
+  def _generate_configs(self):
+    """Generate configurations of anchor boxes."""
+    return _generate_anchor_configs(self.min_level, self.max_level,
+                                    self.num_scales, self.aspect_ratios)
+  def _generate_boxes(self):
+    """Generates multiscale anchor boxes."""
+    boxes = _generate_anchor_boxes(self.image_size, self.anchor_scale,
+                                   self.config)
+    boxes = tf.convert_to_tensor(value=boxes, dtype=tf.float32)
+    return boxes
+  def get_anchors_per_location(self):
+    return self.num_scales * len(self.aspect_ratios)
+  def get_unpacked_boxes(self):
+    return self.unpack_labels(self.boxes)
+  def unpack_labels(self, labels):
+    """Unpacks an array of labels into multiscales labels."""
+    labels_unpacked = OrderedDict()
+    count = 0
+    for level in range(self.min_level, self.max_level + 1):
+      feat_size0 = int(self.image_size[0] / 2**level)
+      feat_size1 = int(self.image_size[1] / 2**level)
+      steps = feat_size0 * feat_size1 * self.get_anchors_per_location()
+      indices = tf.range(count, count + steps)
+      count += steps
+      labels_unpacked[level] = tf.reshape(
+          tf.gather(labels, indices), [feat_size0, feat_size1, -1])
+    return labels_unpacked
+class AnchorLabeler(object):
+  """Labeler for multiscale anchor boxes."""
+  def __init__(self, anchors, num_classes, match_threshold=0.7,
+               unmatched_threshold=0.3, rpn_batch_size_per_im=256,
+               rpn_fg_fraction=0.5):
+    """Constructs anchor labeler to assign labels to anchors.
+    Args:
+      anchors: an instance of class Anchors.
+      num_classes: integer number representing number of classes in the dataset.
+      match_threshold: a float number between 0 and 1 representing the
+        lower-bound threshold to assign positive labels for anchors. An anchor
+        with a score over the threshold is labeled positive.
+      unmatched_threshold: a float number between 0 and 1 representing the
+        upper-bound threshold to assign negative labels for anchors. An anchor
+        with a score below the threshold is labeled negative.
+      rpn_batch_size_per_im: a integer number that represents the number of
+        sampled anchors per image in the first stage (region proposal network).
+      rpn_fg_fraction: a float number between 0 and 1 representing the fraction
+        of positive anchors (foreground) in the first stage.
+    """
+    similarity_calc = region_similarity_calculator.IouSimilarity()
+    matcher = argmax_matcher.ArgMaxMatcher(
+        match_threshold,
+        unmatched_threshold=unmatched_threshold,
+        negatives_lower_than_unmatched=True,
+        force_match_for_each_row=True)
+    box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
+    self._target_assigner = target_assigner.TargetAssigner(
+        similarity_calc, matcher, box_coder)
+    self._anchors = anchors
+    self._match_threshold = match_threshold
+    self._unmatched_threshold = unmatched_threshold
+    self._rpn_batch_size_per_im = rpn_batch_size_per_im
+    self._rpn_fg_fraction = rpn_fg_fraction
+    self._num_classes = num_classes
+  def _get_rpn_samples(self, match_results):
+    """Computes anchor labels.
+    This function performs subsampling for foreground (fg) and background (bg)
+    anchors.
+    Args:
+      match_results: A integer tensor with shape [N] representing the
+        matching results of anchors. (1) match_results[i]>=0,
+        meaning that column i is matched with row match_results[i].
+        (2) match_results[i]=-1, meaning that column i is not matched.
+        (3) match_results[i]=-2, meaning that column i is ignored.
+    Returns:
+      score_targets: a integer tensor with the a shape of [N].
+        (1) score_targets[i]=1, the anchor is a positive sample.
+        (2) score_targets[i]=0, negative. (3) score_targets[i]=-1, the anchor is
+        don't care (ignore).
+    """
+    sampler = (
+        balanced_positive_negative_sampler.BalancedPositiveNegativeSampler(
+            positive_fraction=self._rpn_fg_fraction, is_static=False))
+    # indicator includes both positive and negative labels.
+    # labels includes only positives labels.
+    # positives = indicator & labels.
+    # negatives = indicator & !labels.
+    # ignore = !indicator.
+    indicator = tf.greater(match_results, -2)
+    labels = tf.greater(match_results, -1)
+    samples = sampler.subsample(
+        indicator, self._rpn_batch_size_per_im, labels)
+    positive_labels = tf.where(
+        tf.logical_and(samples, labels),
+        tf.constant(2, dtype=tf.int32, shape=match_results.shape),
+        tf.constant(0, dtype=tf.int32, shape=match_results.shape))
+    negative_labels = tf.where(
+        tf.logical_and(samples, tf.logical_not(labels)),
+        tf.constant(1, dtype=tf.int32, shape=match_results.shape),
+        tf.constant(0, dtype=tf.int32, shape=match_results.shape))
+    ignore_labels = tf.fill(match_results.shape, -1)
+    return (ignore_labels + positive_labels + negative_labels,
+            positive_labels, negative_labels)
+  def label_anchors(self, gt_boxes, gt_labels):
+    """Labels anchors with ground truth inputs.
+    Args:
+      gt_boxes: A float tensor with shape [N, 4] representing groundtruth boxes.
+        For each row, it stores [y0, x0, y1, x1] for four corners of a box.
+      gt_labels: A integer tensor with shape [N, 1] representing groundtruth
+        classes.
+    Returns:
+      score_targets_dict: ordered dictionary with keys
+        [min_level, min_level+1, ..., max_level]. The values are tensor with
+        shape [height_l, width_l, num_anchors]. The height_l and width_l
+        represent the dimension of class logits at l-th level.
+      box_targets_dict: ordered dictionary with keys
+        [min_level, min_level+1, ..., max_level]. The values are tensor with
+        shape [height_l, width_l, num_anchors * 4]. The height_l and
+        width_l represent the dimension of bounding box regression output at
+        l-th level.
+    """
+    gt_box_list = box_list.BoxList(gt_boxes)
+    anchor_box_list = box_list.BoxList(self._anchors.boxes)
+    # cls_targets, cls_weights, box_weights are not used
+    _, _, box_targets, _, matches = self._target_assigner.assign(
+        anchor_box_list, gt_box_list, gt_labels)
+    # score_targets contains the subsampled positive and negative anchors.
+    score_targets, _, _ = self._get_rpn_samples(matches.match_results)
+    # Unpack labels.
+    score_targets_dict = self._anchors.unpack_labels(score_targets)
+    box_targets_dict = self._anchors.unpack_labels(box_targets)
+    return score_targets_dict, box_targets_dict
--- a/TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/coco_metric.py
+++ b/TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/coco_metric.py
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""COCO-style evaluation metrics.
+Implements the interface of COCO API and metric_fn in tf.TPUEstimator.
+COCO API: github.com/cocodataset/cocoapi/
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import atexit
+import copy
+import tempfile
+import numpy as np
+import tensorflow as tf
+from mask_rcnn.utils.logging_formatter import logging
+from pycocotools.coco import COCO
+from pycocotools.cocoeval import COCOeval
+import pycocotools.mask as maskUtils
+import cv2
+class MaskCOCO(COCO):
+  """COCO object for mask evaluation.
+  """
+  def reset(self, dataset):
+    """Reset the dataset and groundtruth data index in this object.
+    Args:
+      dataset: dict of groundtruth data. It should has similar structure as the
+        COCO groundtruth JSON file. Must contains three keys: {'images',
+          'annotations', 'categories'}.
+        'images': list of image information dictionary. Required keys: 'id',
+          'width' and 'height'.
+        'annotations': list of dict. Bounding boxes and segmentations related
+          information. Required keys: {'id', 'image_id', 'category_id', 'bbox',
+            'iscrowd', 'area', 'segmentation'}.
+        'categories': list of dict of the category information.
+          Required key: 'id'.
+        Refer to http://cocodataset.org/#format-data for more details.
+    Raises:
+      AttributeError: If the dataset is empty or not a dict.
+    """
+    assert dataset, 'Groundtruth should not be empty.'
+    assert isinstance(dataset,
+                      dict), 'annotation file format {} not supported'.format(
+                          type(dataset))
+    self.anns, self.cats, self.imgs = dict(), dict(), dict()
+    self.dataset = copy.deepcopy(dataset)
+    self.createIndex()
+  def loadRes(self, detection_results, include_mask, is_image_mask=False):
+    """Load result file and return a result api object.
+    Args:
+      detection_results: a dictionary containing predictions results.
+      include_mask: a boolean, whether to include mask in detection results.
+      is_image_mask: a boolean, where the predict mask is a whole image mask.
+    Returns:
+      res: result MaskCOCO api object
+    """
+    res = MaskCOCO()
+    res.dataset['images'] = [img for img in self.dataset['images']]
+    logging.info('Loading and preparing results...')
+    predictions = self.load_predictions(
+        detection_results,
+        include_mask=include_mask,
+        is_image_mask=is_image_mask)
+    assert isinstance(predictions, list), 'results in not an array of objects'
+    if predictions:
+      image_ids = [pred['image_id'] for pred in predictions]
+      assert set(image_ids) == (set(image_ids) & set(self.getImgIds())), \
+             'Results do not correspond to current coco set'
+      if (predictions and 'bbox' in predictions[0] and predictions[0]['bbox']):
+        res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
+        for idx, pred in enumerate(predictions):
+          bb = pred['bbox']
+          x1, x2, y1, y2 = [bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]]
+          if 'segmentation' not in pred:
+            pred['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]]
+          pred['area'] = bb[2] * bb[3]
+          pred['id'] = idx + 1
+          pred['iscrowd'] = 0
+      elif 'segmentation' in predictions[0]:
+        res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
+        for idx, pred in enumerate(predictions):
+          # now only support compressed RLE format as segmentation results
+          pred['area'] = maskUtils.area(pred['segmentation'])
+          if 'bbox' not in pred:
+            pred['bbox'] = maskUtils.toBbox(pred['segmentation'])
+          pred['id'] = idx + 1
+          pred['iscrowd'] = 0
+      res.dataset['annotations'] = predictions
+    res.createIndex()
+    return res
+  def load_predictions(self,
+                       detection_results,
+                       include_mask,
+                       is_image_mask=False):
+    """Create prediction dictionary list from detection and mask results.
+    Args:
+      detection_results: a dictionary containing numpy arrays which corresponds
+        to prediction results.
+      include_mask: a boolean, whether to include mask in detection results.
+      is_image_mask: a boolean, where the predict mask is a whole image mask.
+    Returns:
+      a list of dictionary including different prediction results from the model
+        in numpy form.
+    """
+    predictions = []
+    num_detections = detection_results['detection_scores'].size
+    current_index = 0
+    for i, image_id in enumerate(detection_results['source_id']):
+      if include_mask:
+        box_coorindates_in_image = detection_results['detection_boxes'][i]
+        segments = generate_segmentation_from_masks(
+            detection_results['detection_masks'][i],
+            box_coorindates_in_image,
+            int(detection_results['image_info'][i][3]),
+            int(detection_results['image_info'][i][4]),
+            is_image_mask=is_image_mask
+        )
+        # Convert the mask to uint8 and then to fortranarray for RLE encoder.
+        encoded_masks = [
+            maskUtils.encode(np.asfortranarray(instance_mask.astype(np.uint8)))
+            for instance_mask in segments
+        ]
+      for box_index in range(int(detection_results['num_detections'][i])):
+        if current_index % 1000 == 0:
+          logging.info('{}/{}'.format(current_index, num_detections))
+        current_index += 1
+        prediction = {
+            'image_id': int(image_id),
+            'bbox': detection_results['detection_boxes'][i][box_index].tolist(),
+            'score': detection_results['detection_scores'][i][box_index],
+            'category_id': int(
+                detection_results['detection_classes'][i][box_index]),
+        }
+        if include_mask:
+          prediction['segmentation'] = encoded_masks[box_index]
+        predictions.append(prediction)
+    return predictions
+def generate_segmentation_from_masks(masks,
+                                     detected_boxes,
+                                     image_height,
+                                     image_width,
+                                     is_image_mask=False):
+  """Generates segmentation result from instance masks.
+  Args:
+    masks: a numpy array of shape [N, mask_height, mask_width] representing the
+      instance masks w.r.t. the `detected_boxes`.
+    detected_boxes: a numpy array of shape [N, 4] representing the reference
+      bounding boxes.
+    image_height: an integer representing the height of the image.
+    image_width: an integer representing the width of the image.
+    is_image_mask: bool. True: input masks are whole-image masks. False: input
+      masks are bounding-box level masks.
+  Returns:
+    segms: a numpy array of shape [N, image_height, image_width] representing
+      the instance masks *pasted* on the image canvas.
+  """
+  def expand_boxes(boxes, scale):
+    """Expands an array of boxes by a given scale."""
+    # Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/utils/boxes.py#L227
+    # The `boxes` in the reference implementation is in [x1, y1, x2, y2] form,
+    # whereas `boxes` here is in [x1, y1, w, h] form
+    w_half = boxes[:, 2] * .5
+    h_half = boxes[:, 3] * .5
+    x_c = boxes[:, 0] + w_half
+    y_c = boxes[:, 1] + h_half
+    w_half *= scale
+    h_half *= scale
+    boxes_exp = np.zeros(boxes.shape)
+    boxes_exp[:, 0] = x_c - w_half
+    boxes_exp[:, 2] = x_c + w_half
+    boxes_exp[:, 1] = y_c - h_half
+    boxes_exp[:, 3] = y_c + h_half
+    return boxes_exp
+  # Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/core/test.py#L812
+  # To work around an issue with cv2.resize (it seems to automatically pad
+  # with repeated border values), we manually zero-pad the masks by 1 pixel
+  # prior to resizing back to the original image resolution. This prevents
+  # "top hat" artifacts. We therefore need to expand the reference boxes by an
+  # appropriate factor.
+  _, mask_height, mask_width = masks.shape
+  scale = max((mask_width + 2.0) / mask_width,
+              (mask_height + 2.0) / mask_height)
+  ref_boxes = expand_boxes(detected_boxes, scale)
+  ref_boxes = ref_boxes.astype(np.int32)
+  padded_mask = np.zeros((mask_height + 2, mask_width + 2), dtype=np.float32)
+  segms = []
+  for mask_ind, mask in enumerate(masks):
+    im_mask = np.zeros((image_height, image_width), dtype=np.uint8)
+    if is_image_mask:
+      # Process whole-image masks.
+      im_mask[:, :] = mask[:, :]
+    else:
+      # Process mask inside bounding boxes.
+      padded_mask[1:-1, 1:-1] = mask[:, :]
+      ref_box = ref_boxes[mask_ind, :]
+      w = ref_box[2] - ref_box[0] + 1
+      h = ref_box[3] - ref_box[1] + 1
+      w = np.maximum(w, 1)
+      h = np.maximum(h, 1)
+      mask = cv2.resize(padded_mask, (w, h))
+      mask = np.array(mask > 0.5, dtype=np.uint8)
+      x_0 = max(ref_box[0], 0)
+      x_1 = min(ref_box[2] + 1, image_width)
+      y_0 = max(ref_box[1], 0)
+      y_1 = min(ref_box[3] + 1, image_height)
+      im_mask[y_0:y_1, x_0:x_1] = mask[(y_0 - ref_box[1]):(y_1 - ref_box[1]), (
+          x_0 - ref_box[0]):(x_1 - ref_box[0])]
+    segms.append(im_mask)
+  segms = np.array(segms)
+  assert masks.shape[0] == segms.shape[0]
+  return segms
+class EvaluationMetric(object):
+  """COCO evaluation metric class."""
+  def __init__(self, filename, include_mask):
+    """Constructs COCO evaluation class.
+    The class provides the interface to metrics_fn in TPUEstimator. The
+    _evaluate() loads a JSON file in COCO annotation format as the
+    groundtruths and runs COCO evaluation.
+    Args:
+      filename: Ground truth JSON file name. If filename is None, use
+        groundtruth data passed from the dataloader for evaluation.
+      include_mask: boolean to indicate whether or not to include mask eval.
+    """
+    if filename:
+      if filename.startswith('gs://'):
+        _, local_val_json = tempfile.mkstemp(suffix='.json')
+        tf.io.gfile.remove(local_val_json)
+        tf.io.gfile.copy(filename, local_val_json)
+        atexit.register(tf.io.gfile.remove, local_val_json)
+      else:
+        local_val_json = filename
+      self.coco_gt = MaskCOCO(local_val_json)
+    self.filename = filename
+    self.metric_names = ['AP', 'AP50', 'AP75', 'APs', 'APm', 'APl', 'ARmax1',
+                         'ARmax10', 'ARmax100', 'ARs', 'ARm', 'ARl']
+    self._include_mask = include_mask
+    if self._include_mask:
+      mask_metric_names = ['mask_' + x for x in self.metric_names]
+      self.metric_names.extend(mask_metric_names)
+    self._reset()
+  def _reset(self):
+    """Reset COCO API object."""
+    if self.filename is None and not hasattr(self, 'coco_gt'):
+      self.coco_gt = MaskCOCO()
+  def predict_metric_fn(self,
+                        predictions,
+                        is_predict_image_mask=False,
+                        groundtruth_data=None):
+    """Generates COCO metrics."""
+    image_ids = list(set(predictions['source_id']))
+    if groundtruth_data is not None:
+      self.coco_gt.reset(groundtruth_data)
+    coco_dt = self.coco_gt.loadRes(
+        predictions, self._include_mask, is_image_mask=is_predict_image_mask)
+    coco_eval = COCOeval(self.coco_gt, coco_dt, iouType='bbox')
+    coco_eval.params.imgIds = image_ids
+    coco_eval.evaluate()
+    coco_eval.accumulate()
+    coco_eval.summarize()
+    coco_metrics = coco_eval.stats
+    if self._include_mask:
+      # Create another object for instance segmentation metric evaluation.
+      mcoco_eval = COCOeval(self.coco_gt, coco_dt, iouType='segm')
+      mcoco_eval.params.imgIds = image_ids
+      mcoco_eval.evaluate()
+      mcoco_eval.accumulate()
+      mcoco_eval.summarize()
+      mask_coco_metrics = mcoco_eval.stats
+    if self._include_mask:
+      metrics = np.hstack((coco_metrics, mask_coco_metrics))
+    else:
+      metrics = coco_metrics
+    # clean up after evaluation is done.
+    self._reset()
+    metrics = metrics.astype(np.float32)
+    metrics_dict = {}
+    for i, name in enumerate(self.metric_names):
+      metrics_dict[name] = metrics[i]
+    return metrics_dict
--- a/TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/dataloader.py
+++ b/TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/dataloader.py
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Data loader and processing.
+Defines input_fn of Mask-RCNN for TF Estimator. The input_fn includes training
+data for category classification, bounding box regression, and number of
+positive examples to normalize the loss during training.
+"""
+import functools
+import math
+import multiprocessing
+import tensorflow as tf
+from mask_rcnn.utils.logging_formatter import logging
+from mask_rcnn.utils.distributed_utils import MPI_is_distributed
+from mask_rcnn.utils.distributed_utils import MPI_rank_and_size
+from mask_rcnn.utils.distributed_utils import MPI_rank
+from mask_rcnn.utils.distributed_utils import MPI_size
+# common functions
+from mask_rcnn.dataloader_utils import dataset_parser
+from distutils.version import LooseVersion
+class InputReader(object):
+    """Input reader for dataset."""
+    def __init__(
+        self,
+        file_pattern,
+        mode=tf.estimator.ModeKeys.TRAIN,
+        num_examples=0,
+        use_fake_data=False,
+        use_instance_mask=False,
+        seed=None
+    ):
+        self._mode = mode
+        self._file_pattern = file_pattern
+        self._num_examples = num_examples
+        self._use_fake_data = use_fake_data
+        self._use_instance_mask = use_instance_mask
+        self._seed = seed
+    def _create_dataset_parser_fn(self, params):
+        """Create parser for parsing input data (dictionary)."""
+        return functools.partial(
+            dataset_parser,
+            mode=self._mode,
+            params=params,
+            use_instance_mask=self._use_instance_mask,
+            seed=self._seed
+        )
+    def __call__(self, params, input_context=None):
+        batch_size = params['batch_size'] if 'batch_size' in params else 1
+        try:
+            seed = params['seed'] if not MPI_is_distributed() else params['seed'] * MPI_rank()
+        except (KeyError, TypeError):
+            seed = None
+        if MPI_is_distributed():
+            n_gpus = MPI_size()
+        elif input_context is not None:
+            n_gpus = input_context.num_input_pipelines
+        else:
+            n_gpus = 1
+        ##################################################
+        dataset = tf.data.Dataset.list_files(
+            self._file_pattern,
+            shuffle=False
+        )
+        if self._mode == tf.estimator.ModeKeys.TRAIN:
+            if input_context is not None:
+                logging.info("Using Dataset Sharding with TF Distributed")
+                _num_shards = input_context.num_input_pipelines
+                _shard_idx = input_context.input_pipeline_id
+            elif MPI_is_distributed():
+                logging.info("Using Dataset Sharding with Horovod")
+                _shard_idx, _num_shards = MPI_rank_and_size()
+            try:
+                dataset = dataset.shard(
+                    num_shards=_num_shards,
+                    index=_shard_idx
+                )
+                dataset = dataset.shuffle(math.ceil(256 / _num_shards))
+            except NameError:  # Not a distributed training setup
+                pass
+        def _prefetch_dataset(filename):
+            return tf.data.TFRecordDataset(filename).prefetch(1)
+        dataset = dataset.interleave(
+            map_func=_prefetch_dataset,
+            cycle_length=32,
+            block_length=64,
+            num_parallel_calls=tf.data.experimental.AUTOTUNE,
+        )
+        if self._num_examples is not None and self._num_examples > 0:
+            logging.info("[*] Limiting the amount of sample to: %d" % self._num_examples)
+            dataset = dataset.take(self._num_examples)
+        dataset = dataset.cache()
+        if self._mode == tf.estimator.ModeKeys.TRAIN:
+            dataset = dataset.shuffle(
+                buffer_size=4096,
+                reshuffle_each_iteration=True,
+                seed=seed
+            )
+            dataset = dataset.repeat()
+        # Parse the fetched records to input tensors for model function.
+        dataset = dataset.map(
+            map_func=self._create_dataset_parser_fn(params),
+            num_parallel_calls=tf.data.experimental.AUTOTUNE,
+        )
+        dataset = dataset.batch(
+            batch_size=batch_size,
+            drop_remainder=True
+        )
+        if self._use_fake_data:
+            # Turn this dataset into a semi-fake dataset which always loop at the
+            # first batch. This reduces variance in performance and is useful in
+            # testing.
+            logging.info("Using Fake Dataset Loop...")
+            dataset = dataset.take(1).cache().repeat()
+            if self._mode != tf.estimator.ModeKeys.TRAIN:
+                dataset = dataset.take(int(5000 / batch_size))
+        dataset = dataset.prefetch(
+            buffer_size=tf.data.experimental.AUTOTUNE,
+        )
+        if self._mode == tf.estimator.ModeKeys.PREDICT or n_gpus > 1:
+            if not tf.distribute.has_strategy():
+                dataset = dataset.apply(
+                    tf.data.experimental.prefetch_to_device(
+                        '/gpu:0',  # With Horovod the local GPU is always 0
+                        buffer_size=1,
+                    )
+                )
+        data_options = tf.data.Options()
+        data_options.experimental_deterministic = seed is not None
+        if LooseVersion(tf.__version__) <= LooseVersion("2.0.0"):
+            data_options.experimental_distribute.auto_shard = False
+        else:
+            data_options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.OFF
+        # data_options.experimental_distribute.auto_shard = False
+        data_options.experimental_slack = True
+        data_options.experimental_threading.max_intra_op_parallelism = 1
+        # data_options.experimental_threading.private_threadpool_size = int(multiprocessing.cpu_count() / n_gpus) * 2
+        # ================= experimental_optimization ================= #
+        data_options.experimental_optimization.apply_default_optimizations = False
+        # data_options.experimental_optimization.autotune = True
+        data_options.experimental_optimization.filter_fusion = True
+        data_options.experimental_optimization.map_and_batch_fusion = True
+        data_options.experimental_optimization.map_and_filter_fusion = True
+        data_options.experimental_optimization.map_fusion = True
+        data_options.experimental_optimization.map_parallelization = True
+        map_vectorization_options = tf.data.experimental.MapVectorizationOptions()
+        map_vectorization_options.enabled = True
+        map_vectorization_options.use_choose_fastest = True
+        data_options.experimental_optimization.map_vectorization = map_vectorization_options
+        data_options.experimental_optimization.noop_elimination = True
+        data_options.experimental_optimization.parallel_batch = True
+        data_options.experimental_optimization.shuffle_and_repeat_fusion = True
+        # ========== Stats on TF Data =============
+        # aggregator = tf.data.experimental.StatsAggregator()
+        # data_options.experimental_stats.aggregator = aggregator
+        # data_options.experimental_stats.latency_all_edges = True
+        dataset = dataset.with_options(data_options)
+        return dataset
+if __name__ == "__main__":
+    '''
+    Data Loading Benchmark Usage:
+    # Real Data - Training
+    python -m mask_rcnn.dataloader \
+        --data_dir="/data/" \
+        --batch_size=2 \
+        --warmup_steps=200 \
+        --benchmark_steps=2000 \
+        --training
+    # Real Data - Inference
+    python -m mask_rcnn.dataloader \
+        --data_dir="/data/" \
+        --batch_size=8 \
+        --warmup_steps=200 \
+        --benchmark_steps=2000
+    # --------------- #
+    # Synthetic Data - Training
+    python -m mask_rcnn.dataloader \
+        --data_dir="/data/" \
+        --batch_size=2 \
+        --warmup_steps=200 \
+        --benchmark_steps=2000 \
+        --training \
+        --use_synthetic_data
+    # Synthetic Data - Inference
+    python -m mask_rcnn.dataloader \
+        --data_dir="/data/" \
+        --batch_size=8 \
+        --warmup_steps=200 \
+        --benchmark_steps=2000 \
+        --use_synthetic_data
+    # --------------- #
+    '''
+    import os
+    import time
+    import argparse
+    import numpy as np
+    os.environ["CUDA_VISIBLE_DEVICES"] = '0'
+    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
+    tf.compat.v1.disable_eager_execution()
+    tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
+    logging.set_verbosity(logging.INFO)
+    parser = argparse.ArgumentParser(description="MaskRCNN Dataloader Benchmark")
+    parser.add_argument(
+        '--data_dir', required=True, type=str, help="Directory path which contains the preprocessed DAGM 2007 dataset"
+    )
+    parser.add_argument(
+        '--batch_size', default=64, type=int, required=True, help="""Batch size used to measure performance."""
+    )
+    parser.add_argument(
+        '--warmup_steps',
+        default=200,
+        type=int,
+        required=True,
+        help="""Number of steps considered as warmup and not taken into account for performance measurements."""
+    )
+    parser.add_argument(
+        '--benchmark_steps',
+        default=200,
+        type=int,
+        required=True,
+        help="Number of steps used to benchmark dataloading performance. Only used in training"
+    )
+    parser.add_argument(
+        '--seed',
+        default=666,
+        type=int,
+        required=False,
+        help="""Reproducibility Seed."""
+    )
+    parser.add_argument("--training", default=False, action="store_true", help="Benchmark in training mode")
+    parser.add_argument("--use_synthetic_data", default=False, action="store_true", help="Use synthetic dataset")
+    FLAGS, unknown_args = parser.parse_known_args()
+    if len(unknown_args) > 0:
+        for bad_arg in unknown_args:
+            print("ERROR: Unknown command line arg: %s" % bad_arg)
+        raise ValueError("Invalid command line arg(s)")
+    BURNIN_STEPS = FLAGS.warmup_steps
+    if FLAGS.training:
+        TOTAL_STEPS = FLAGS.warmup_steps + FLAGS.benchmark_steps
+    else:
+        TOTAL_STEPS = int(1e6)  # Wait for end of dataset
+    if FLAGS.training:
+        input_dataset = InputReader(
+            file_pattern=os.path.join(FLAGS.data_dir, "train*.tfrecord"),
+            mode=tf.estimator.ModeKeys.TRAIN,
+            use_fake_data=FLAGS.use_synthetic_data,
+            use_instance_mask=True,
+            seed=FLAGS.seed
+        )
+    else:
+        input_dataset = InputReader(
+            file_pattern=os.path.join(FLAGS.data_dir, "val*.tfrecord"),
+            mode=tf.estimator.ModeKeys.PREDICT,
+            num_examples=5000,
+            use_fake_data=FLAGS.use_synthetic_data,
+            use_instance_mask=True,
+            seed=FLAGS.seed
+        )
+    logging.info("[*] Executing Benchmark in %s mode" % ("training" if FLAGS.training else "inference"))
+    logging.info("[*] Benchmark using %s data" % ("synthetic" if FLAGS.use_synthetic_data else "real"))
+    time.sleep(1)
+    # Build the data input
+    dataset = input_dataset(
+        params={
+            "anchor_scale": 8.0,
+            "aspect_ratios": [[1.0, 1.0], [1.4, 0.7], [0.7, 1.4]],
+            "batch_size": FLAGS.batch_size,
+            "gt_mask_size": 112,
+            "image_size": [1024, 1024],
+            "include_groundtruth_in_features": False,
+            "augment_input_data": True,
+            "max_level": 6,
+            "min_level": 2,
+            "num_classes": 91,
+            "num_scales": 1,
+            "rpn_batch_size_per_im": 256,
+            "rpn_fg_fraction": 0.5,
+            "rpn_min_size": 0.,
+            "rpn_nms_threshold": 0.7,
+            "rpn_negative_overlap": 0.3,
+            "rpn_positive_overlap": 0.7,
+            "rpn_post_nms_topn": 1000,
+            "rpn_pre_nms_topn": 2000,
+            "skip_crowd_during_training": True,
+            "use_category": True,
+            "visualize_images_summary": False,
+        }
+    )
+    dataset_iterator = dataset.make_initializable_iterator()
+    if FLAGS.training:
+        X, Y = dataset_iterator.get_next()
+    else:
+        X = dataset_iterator.get_next()
+    config = tf.compat.v1.ConfigProto()
+    config.gpu_options.allow_growth = True
+    config.log_device_placement = False
+    with tf.device("gpu:0"):
+        X_gpu_ops = list()
+        Y_gpu_ops = list()
+        if FLAGS.training:
+            for _, _x in X.items():
+                X_gpu_ops.append(tf.identity(_x))
+            for _, _y in Y.items():
+                Y_gpu_ops.append(tf.identity(_y))
+        else:
+            for _, _x in X["features"].items():
+                X_gpu_ops.append(tf.identity(_x))
+        with tf.control_dependencies(X_gpu_ops + Y_gpu_ops):
+            input_op = tf.constant(1.0)
+        with tf.compat.v1.Session(config=config) as sess:
+            sess.run(dataset_iterator.initializer)
+            sess.run(tf.compat.v1.global_variables_initializer())
+            total_files_processed = 0
+            img_per_sec_arr = []
+            processing_time_arr = []
+            processing_start_time = time.time()
+            for step in range(TOTAL_STEPS):
+                try:
+                    start_time = time.time()
+                    sess.run(input_op)
+                    elapsed_time = (time.time() - start_time) * 1000
+                    imgs_per_sec = (FLAGS.batch_size / elapsed_time) * 1000
+                    total_files_processed += FLAGS.batch_size
+                    if (step + 1) > BURNIN_STEPS:
+                        processing_time_arr.append(elapsed_time)
+                        img_per_sec_arr.append(imgs_per_sec)
+                    if (step + 1) % 20 == 0 or (step + 1) == TOTAL_STEPS:
+                        print(
+                            "[STEP %04d] # Batch Size: %03d - Time: %03d msecs - Speed: %6d img/s" %
+                            (step + 1, FLAGS.batch_size, elapsed_time, imgs_per_sec)
+                        )
+                except tf.errors.OutOfRangeError:
+                    break
+            processing_time = time.time() - processing_start_time
+            avg_processing_speed = np.mean(img_per_sec_arr)
+            print("\n###################################################################")
+            print("*** Data Loading Performance Metrics ***\n")
+            print("\t=> Number of Steps: %d" % (step + 1))
+            print("\t=> Batch Size: %d" % FLAGS.batch_size)
+            print("\t=> Files Processed: %d" % total_files_processed)
+            print("\t=> Total Execution Time: %d secs" % processing_time)
+            print("\t=> Median Time per step: %3d msecs" % np.median(processing_time_arr))
+            print("\t=> Median Processing Speed: %d images/secs" % np.median(img_per_sec_arr))
+            print("\t=> Median Processing Time: %.2f msecs/image" % (1 / float(np.median(img_per_sec_arr)) * 1000))
--- a/TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/dataloader_utils.py
+++ b/TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/dataloader_utils.py
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Data loader and processing.
+Defines input_fn of Mask-RCNN for TF Estimator. The input_fn includes training
+data for category classification, bounding box regression, and number of
+positive examples to normalize the loss during training.
+"""
+import tensorflow as tf
+from mask_rcnn import anchors
+from mask_rcnn.utils import coco_utils
+from mask_rcnn.ops import preprocess_ops
+from mask_rcnn.object_detection import tf_example_decoder
+MAX_NUM_INSTANCES = 100
+MAX_NUM_VERTICES_PER_INSTANCE = 1500
+MAX_NUM_POLYGON_LIST_LEN = 2 * MAX_NUM_VERTICES_PER_INSTANCE * MAX_NUM_INSTANCES
+POLYGON_PAD_VALUE = coco_utils.POLYGON_PAD_VALUE
+__all__ = [
+    # dataset parser
+    "dataset_parser",
+    # common functions
+    "preprocess_image",
+    "process_groundtruth_is_crowd",
+    "process_source_id",
+    # eval
+    "prepare_labels_for_eval",
+    # training
+    "augment_image",
+    "process_boxes_classes_indices_for_training",
+    "process_gt_masks_for_training",
+    "process_labels_for_training",
+    "process_targets_for_training"
+]
+###############################################################################################################
+def dataset_parser(value, mode, params, use_instance_mask, seed=None, regenerate_source_id=False):
+    """Parse data to a fixed dimension input image and learning targets.
+    Args:
+    value: A dictionary contains an image and groundtruth annotations.
+    Returns:
+    features: a dictionary that contains the image and auxiliary
+      information. The following describes {key: value} pairs in the
+      dictionary.
+      image: Image tensor that is preproessed to have normalized value and
+        fixed dimension [image_size, image_size, 3]
+      image_info: image information that includes the original height and
+        width, the scale of the proccessed image to the original image, and
+        the scaled height and width.
+      source_ids: Source image id. Default value -1 if the source id is
+        empty in the groundtruth annotation.
+    labels: a dictionary that contains auxiliary information plus (optional)
+      labels. The following describes {key: value} pairs in the dictionary.
+      `labels` is only for training.
+      score_targets_dict: ordered dictionary with keys
+        [min_level, min_level+1, ..., max_level]. The values are tensor with
+        shape [height_l, width_l, num_anchors]. The height_l and width_l
+        represent the dimension of objectiveness score at l-th level.
+      box_targets_dict: ordered dictionary with keys
+        [min_level, min_level+1, ..., max_level]. The values are tensor with
+        shape [height_l, width_l, num_anchors * 4]. The height_l and
+        width_l represent the dimension of bounding box regression output at
+        l-th level.
+      gt_boxes: Groundtruth bounding box annotations. The box is represented
+         in [y1, x1, y2, x2] format. The tennsor is padded with -1 to the
+         fixed dimension [MAX_NUM_INSTANCES, 4].
+      gt_classes: Groundtruth classes annotations. The tennsor is padded
+        with -1 to the fixed dimension [MAX_NUM_INSTANCES].
+      cropped_gt_masks: groundtrugh masks cropped by the bounding box and
+        resized to a fixed size determined by params['gt_mask_size']
+      regenerate_source_id: `bool`, if True TFExampleParser will use hashed
+        value of `image/encoded` for `image/source_id`.
+    """
+    if mode not in [tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.PREDICT, tf.estimator.ModeKeys.EVAL]:
+        raise ValueError("Unknown execution mode received: %s" % mode)
+    def create_example_decoder():
+        return tf_example_decoder.TfExampleDecoder(
+            use_instance_mask=use_instance_mask,
+            regenerate_source_id=regenerate_source_id
+    )
+    example_decoder = create_example_decoder()
+    with tf.xla.experimental.jit_scope(compile_ops=True):
+        with tf.name_scope('parser'):
+            data = example_decoder.decode(value)
+            data['groundtruth_is_crowd'] = process_groundtruth_is_crowd(data)
+            image = tf.image.convert_image_dtype(data['image'], dtype=tf.float32)
+            source_id = process_source_id(data['source_id'])
+            if mode == tf.estimator.ModeKeys.PREDICT:
+                features = {
+                    'source_ids': source_id,
+                }
+                if params['visualize_images_summary']:
+                    features['orig_images'] = tf.image.resize(image, params['image_size'])
+                features["images"], features["image_info"], _, _ = preprocess_image(
+                    image,
+                    boxes=None,
+                    instance_masks=None,
+                    image_size=params['image_size'],
+                    max_level=params['max_level'],
+                    augment_input_data=False,
+                    seed=seed
+                )
+                if params['include_groundtruth_in_features']:
+                    labels = prepare_labels_for_eval(
+                        data,
+                        target_num_instances=MAX_NUM_INSTANCES,
+                        target_polygon_list_len=MAX_NUM_POLYGON_LIST_LEN,
+                        use_instance_mask=params['include_mask']
+                    )
+                    return {'features': features, 'labels': labels}
+                else:
+                    return {'features': features}
+            elif mode == tf.estimator.ModeKeys.TRAIN:
+                labels = {}
+                features = {
+                    'source_ids': source_id
+                }
+                boxes, classes, indices, instance_masks = process_boxes_classes_indices_for_training(
+                    data,
+                    skip_crowd_during_training=params['skip_crowd_during_training'],
+                    use_category=params['use_category'],
+                    use_instance_mask=use_instance_mask
+                )
+                image, image_info, boxes, instance_masks = preprocess_image(
+                    image,
+                    boxes=boxes,
+                    instance_masks=instance_masks,
+                    image_size=params['image_size'],
+                    max_level=params['max_level'],
+                    augment_input_data=params['augment_input_data'],
+                    seed=seed
+                )
+                features.update({
+                    'images': image,
+                    'image_info': image_info,
+                })
+                padded_image_size = image.get_shape().as_list()[:2]
+                # Pads cropped_gt_masks.
+                if use_instance_mask:
+                    labels['cropped_gt_masks'] = process_gt_masks_for_training(
+                        instance_masks,
+                        boxes,
+                        gt_mask_size=params['gt_mask_size'],
+                        padded_image_size=padded_image_size,
+                        max_num_instances=MAX_NUM_INSTANCES
+                    )
+                with tf.xla.experimental.jit_scope(compile_ops=False):
+                    # Assign anchors.
+                    (score_targets, box_targets), input_anchor = process_targets_for_training(
+                        padded_image_size=padded_image_size,
+                        boxes=boxes,
+                        classes=classes,
+                        params=params
+                    )
+                additional_labels = process_labels_for_training(
+                    image_info, boxes, classes, score_targets, box_targets,
+                    max_num_instances=MAX_NUM_INSTANCES,
+                    min_level=params["min_level"],
+                    max_level=params["max_level"]
+                )
+                labels.update(additional_labels)
+                # labels["input_anchor"] = input_anchor
+                # Features
+                # {
+                #   'source_ids': <tf.Tensor 'parser/StringToNumber:0' shape=() dtype=float32>,
+                #   'images': <tf.Tensor 'parser/pad_to_bounding_box/Squeeze:0' shape=(1024, 1024, 3) dtype=float32>,
+                #   'image_info': <tf.Tensor 'parser/stack_1:0' shape=(5,) dtype=float32>
+                # }
+                FAKE_FEATURES = False
+                if FAKE_FEATURES:
+                    labels["source_ids"] = tf.ones(shape=(), dtype=tf.float32)
+                    labels["images"] = tf.ones(shape=(1024, 1024, 3), dtype=tf.float32)
+                    labels["image_info"] = tf.ones(shape=(5,), dtype=tf.float32)
+                # Labels
+                # {
+                #   'cropped_gt_masks': <tf.Tensor 'parser/Reshape_4:0' shape=(100, 116, 116) dtype=float32>,
+                #   'score_targets_2': <tf.Tensor 'parser/Reshape_9:0' shape=(256, 256, 3) dtype=int32>,
+                #   'box_targets_2': <tf.Tensor 'parser/Reshape_14:0' shape=(256, 256, 12) dtype=float32>,
+                #   'score_targets_3': <tf.Tensor 'parser/Reshape_10:0' shape=(128, 128, 3) dtype=int32>,
+                #   'box_targets_3': <tf.Tensor 'parser/Reshape_15:0' shape=(128, 128, 12) dtype=float32>,
+                #   'score_targets_4': <tf.Tensor 'parser/Reshape_11:0' shape=(64, 64, 3) dtype=int32>,
+                #   'box_targets_4': <tf.Tensor 'parser/Reshape_16:0' shape=(64, 64, 12) dtype=float32>,
+                #   'score_targets_5': <tf.Tensor 'parser/Reshape_12:0' shape=(32, 32, 3) dtype=int32>,
+                #   'box_targets_5': <tf.Tensor 'parser/Reshape_17:0' shape=(32, 32, 12) dtype=float32>,
+                #   'score_targets_6': <tf.Tensor 'parser/Reshape_13:0' shape=(16, 16, 3) dtype=int32>,
+                #   'box_targets_6': <tf.Tensor 'parser/Reshape_18:0' shape=(16, 16, 12) dtype=float32>,
+                #   'gt_boxes': <tf.Tensor 'parser/Reshape_20:0' shape=(100, 4) dtype=float32>,
+                #   'gt_classes': <tf.Tensor 'parser/Reshape_22:0' shape=(100, 1) dtype=float32>
+                # }
+                FAKE_LABELS = False
+                if FAKE_LABELS:
+                    labels["cropped_gt_masks"] = tf.ones(shape=(100, 116, 116), dtype=tf.float32)
+                    labels["gt_boxes"] = tf.ones(shape=(100, 4), dtype=tf.float32)
+                    labels["gt_classes"] = tf.ones(shape=(100, 1), dtype=tf.float32)
+                    idx = 1
+                    for dim in [256, 128, 64, 32, 16]:
+                        idx += 1  # Starts at 2
+                        labels["score_targets_%d" % idx] = tf.ones(shape=(dim, dim, 3), dtype=tf.float32)
+                        labels["box_targets_%d" % idx] = tf.ones(shape=(dim, dim, 12), dtype=tf.float32)
+                return features, labels
+###############################################################################################################
+# common functions
+def preprocess_image(image, boxes, instance_masks, image_size, max_level, augment_input_data=False, seed=None):
+    image = preprocess_ops.normalize_image(image)
+    if augment_input_data:
+        image, boxes, instance_masks = augment_image(image=image, boxes=boxes, instance_masks=instance_masks, seed=seed)
+    # Scaling and padding.
+    image, image_info, boxes, instance_masks = preprocess_ops.resize_and_pad(
+        image=image,
+        target_size=image_size,
+        stride=2 ** max_level,
+        boxes=boxes,
+        masks=instance_masks
+    )
+    return image, image_info, boxes, instance_masks
+def process_groundtruth_is_crowd(data):
+    return tf.cond(
+        pred=tf.greater(tf.size(input=data['groundtruth_is_crowd']), 0),
+        true_fn=lambda: data['groundtruth_is_crowd'],
+        false_fn=lambda: tf.zeros_like(data['groundtruth_classes'], dtype=tf.bool)
+    )
+# def process_source_id(data):
+#     source_id = tf.where(tf.equal(source_id, tf.constant('')), '-1', source_id)
+#     source_id = tf.strings.to_number(source_id)
+#     return source_id
+def process_source_id(source_id):
+    """Processes source_id to the right format."""
+    if source_id.dtype == tf.string:
+        source_id = tf.cast(tf.strings.to_number(source_id), tf.int64)
+    with tf.control_dependencies([source_id]):
+        source_id = tf.cond(
+            tf.equal(tf.size(source_id), 0),
+            lambda: tf.cast(tf.constant(-1), tf.int64),
+            lambda: tf.identity(source_id)
+        )
+    return source_id
+# eval
+def prepare_labels_for_eval(
+        data,
+        target_num_instances=MAX_NUM_INSTANCES,
+        target_polygon_list_len=MAX_NUM_POLYGON_LIST_LEN,
+        use_instance_mask=False
+):
+    """Create labels dict for infeed from data of tf.Example."""
+    image = data['image']
+    height, width = tf.shape(input=image)[:2]
+    boxes = data['groundtruth_boxes']
+    classes = tf.cast(data['groundtruth_classes'], dtype=tf.float32)
+    num_labels = tf.shape(input=classes)[0]
+    boxes = preprocess_ops.pad_to_fixed_size(boxes, -1, [target_num_instances, 4])
+    classes = preprocess_ops.pad_to_fixed_size(classes, -1, [target_num_instances, 1])
+    is_crowd = tf.cast(data['groundtruth_is_crowd'], dtype=tf.float32)
+    is_crowd = preprocess_ops.pad_to_fixed_size(is_crowd, 0, [target_num_instances, 1])
+    labels = dict()
+    labels['width'] = width
+    labels['height'] = height
+    labels['groundtruth_boxes'] = boxes
+    labels['groundtruth_classes'] = classes
+    labels['num_groundtruth_labels'] = num_labels
+    labels['groundtruth_is_crowd'] = is_crowd
+    if use_instance_mask:
+        data['groundtruth_polygons'] = preprocess_ops.pad_to_fixed_size(
+            data=data['groundtruth_polygons'],
+            pad_value=POLYGON_PAD_VALUE,
+            output_shape=[target_polygon_list_len, 1]
+        )
+        if 'groundtruth_area' in data:
+            labels['groundtruth_area'] = preprocess_ops.pad_to_fixed_size(
+                data=labels['groundtruth_area'],
+                pad_value=0,
+                output_shape=[target_num_instances, 1]
+            )
+    return labels
+# training
+def augment_image(image, boxes, instance_masks, seed):
+    flipped_results = preprocess_ops.random_horizontal_flip(
+        image,
+        boxes=boxes,
+        masks=instance_masks,
+        seed=seed
+    )
+    if instance_masks is not None:
+        image, boxes, instance_masks = flipped_results
+    else:
+        image, boxes = flipped_results
+    # image = tf.image.random_brightness(image, max_delta=0.1, seed=seed)
+    # image = tf.image.random_contrast(image, lower=0.9, upper=1.1, seed=seed)
+    # image = tf.image.random_saturation(image, lower=0.9, upper=1.1, seed=seed)
+    # image = tf.image.random_jpeg_quality(image, min_jpeg_quality=80, max_jpeg_quality=100, seed=seed)
+    return image, boxes, instance_masks
+def process_boxes_classes_indices_for_training(data, skip_crowd_during_training, use_category, use_instance_mask):
+    boxes = data['groundtruth_boxes']
+    classes = data['groundtruth_classes']
+    classes = tf.reshape(tf.cast(classes, dtype=tf.float32), [-1, 1])
+    indices = None
+    instance_masks = None
+    if not use_category:
+        classes = tf.cast(tf.greater(classes, 0), dtype=tf.float32)
+    if skip_crowd_during_training:
+        indices = tf.where(tf.logical_not(data['groundtruth_is_crowd']))
+        classes = tf.gather_nd(classes, indices)
+        boxes = tf.gather_nd(boxes, indices)
+        if use_instance_mask:
+            instance_masks = tf.gather_nd(data['groundtruth_instance_masks'], indices)
+    return boxes, classes, indices, instance_masks
+def process_gt_masks_for_training(instance_masks, boxes, gt_mask_size, padded_image_size, max_num_instances):
+    cropped_gt_masks = preprocess_ops.crop_gt_masks(
+        instance_masks=instance_masks,
+        boxes=boxes,
+        gt_mask_size=gt_mask_size,
+        image_size=padded_image_size
+    )
+    # cropped_gt_masks = tf.reshape(cropped_gt_masks, [max_num_instances, -1])
+    cropped_gt_masks = preprocess_ops.pad_to_fixed_size(
+        data=cropped_gt_masks,
+        pad_value=-1,
+        output_shape=[max_num_instances, (gt_mask_size + 4) ** 2]
+    )
+    return tf.reshape(cropped_gt_masks, [max_num_instances, gt_mask_size + 4, gt_mask_size + 4])
+def process_labels_for_training(
+    image_info, boxes, classes,
+    score_targets, box_targets,
+    max_num_instances, min_level, max_level
+):
+    labels = {}
+    # Pad groundtruth data.
+    # boxes *= image_info[2]
+    boxes = preprocess_ops.pad_to_fixed_size(boxes, -1, [max_num_instances, 4])
+    classes = preprocess_ops.pad_to_fixed_size(classes, -1, [max_num_instances, 1])
+    for level in range(min_level, max_level + 1):
+        labels['score_targets_%d' % level] = score_targets[level]
+        labels['box_targets_%d' % level] = box_targets[level]
+    labels['gt_boxes'] = boxes
+    labels['gt_classes'] = classes
+    return labels
+def process_targets_for_training(padded_image_size, boxes, classes, params):
+    input_anchors = anchors.Anchors(
+        params['min_level'],
+        params['max_level'],
+        params['num_scales'],
+        params['aspect_ratios'],
+        params['anchor_scale'],
+        padded_image_size
+    )
+    anchor_labeler = anchors.AnchorLabeler(
+        input_anchors,
+        params['num_classes'],
+        params['rpn_positive_overlap'],
+        params['rpn_negative_overlap'],
+        params['rpn_batch_size_per_im'],
+        params['rpn_fg_fraction']
+    )
+    return anchor_labeler.label_anchors(boxes, classes), input_anchors