"backend/vscode:/vscode.git/clone" did not exist on "f6efda9e2ff9782bad2aa0294ebef19c244035bc"
Commit c320b6ef authored by zhenyi's avatar zhenyi
Browse files

tf2 detection

parent 0fc002df
# Default ignored files
/shelf/
/workspace.xml
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="PyDocumentationSettings">
<option name="format" value="PLAIN" />
<option name="myDocStringFormat" value="Plain" />
</component>
</module>
\ No newline at end of file
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9" project-jdk-type="Python SDK" />
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/MaskRCNN.iml" filepath="$PROJECT_DIR$/.idea/MaskRCNN.iml" />
</modules>
</component>
</project>
\ No newline at end of file
import torch
import torchvision
print(torch.__version__)
print(torchvision.__version__)
print(torch.cuda.is_available())
print(torch.cuda.device_count())
print(torch.cuda.get_device_name(0))
a=torch.Tensor([[1,1,2,2],[1,1,3.100001,3],[1,1,3.1,3]])
b=torch.Tensor([0.9,0.98,0.980005])
from torchvision.ops import nms
ccc=nms(a,b,0.4)
print(ccc)
print(a[ccc])
\ No newline at end of file
#===============================================================================
#
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# ==============================================================================
ARG FROM_IMAGE_NAME=nvcr.io/nvidia/tensorflow:20.06-tf1-py3
FROM ${FROM_IMAGE_NAME}
ENV DEBIAN_FRONTEND=noninteractive
RUN rm -rf /workspace && mkdir -p /workspace
ADD . /workspace
WORKDIR /workspace
RUN apt-get update && \
apt-get install -y libsm6 libxext6 libxrender-dev python3-tk cmake && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# Make sure python and pip points to pip3 and python3
RUN python -m pip install --upgrade pip && \
pip --no-cache-dir --no-cache install \
Cython \
matplotlib \
opencv-python-headless \
mpi4py \
Pillow \
pytest \
pyyaml && \
git clone https://github.com/pybind/pybind11 /opt/pybind11 && \
cd /opt/pybind11 && cmake . && make install && pip install . && \
pip --no-cache-dir --no-cache install \
'git+https://github.com/NVIDIA/cocoapi#egg=pycocotools&subdirectory=PythonAPI' && \
pip --no-cache-dir --no-cache install \
'git+https://github.com/NVIDIA/dllogger'
# Update protobuf 3 to 3.3.0
RUN \
curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v3.3.0/protoc-3.3.0-linux-x86_64.zip && \
unzip -u protoc-3.3.0-linux-x86_64.zip -d protoc3 && \
mv protoc3/bin/* /usr/local/bin/ && \
mv protoc3/include/* /usr/local/include/
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright 2019 NVIDIA Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
\ No newline at end of file
# 简介
* Tensorflow训练Mask R-CNN模型
<br>
# 环境准备
## 1)安装工具包
* rocm3.3环境安装tensorflow1.15
* 安装pycocotools
pip3 install pycocotools -i http://pypi.douban.com/simple/ --trusted-host pypi.douban.com
* 更新pandas
pip3 install -U pandas -i http://pypi.douban.com/simple/ --trusted-host pypi.douban.com
* 安装dllogger
git clone --recursive https://github.com/NVIDIA/dllogger.git
python3 setup.py install
<br>
## 2)数据处理(train 和 val)
```
cd dataset/
git clone http://github.com/tensorflow/models tf-models
cd tf-models/research
wget -O protobuf.zip https://github.com/google/protobuf/releases/download/v3.0.0/protoc-3.0.0-linux-x86_64.zip protobuf.zip
unzip protobuf.zip
./bin/protoc object_detection/protos/.proto --python_out=.
```
返回dataset目录
vim create_coco_tf_record.py
注释掉310 316行
<br>
```
PYTHONPATH="tf-models:tf-models/research" python3 create_coco_tf_record.py \
--logtostderr \
--include_masks \
--train_image_dir=/path/to/COCO2017/images/train2017 \
--val_image_dir=/path/to/COCO2017/images/val2017 \
--train_object_annotations_file=/path/to/COCO2017/annotations/instances_train2017.json \
--val_object_annotations_file=/path/to/COCO2017/annotations/instances_val2017.json \
--train_caption_annotations_file=/path/to/COCO2017/annotations/captions_train2017.json \
--val_caption_annotations_file=/path/to/COCO2017/annotations/captions_val2017.json \
--output_dir=coco2017_tfrecord
```
生成coco2017_tfrecord文件夹
## 3)预训练模型下载
<br>
生成的模型文件结构如下:
```
weights/
>mask-rcnn/1555659850/
https://storage.googleapis.com/cloud-tpu-checkpoints/mask-rcnn/1555659850/saved_model.pb
>>variables/
https://storage.googleapis.com/cloud-tpu-checkpoints/mask-rcnn/1555659850/variables/variables.data-00000-of-00001
https://storage.googleapis.com/cloud-tpu-checkpoints/mask-rcnn/1555659850/variables/variables.index
>resnet/
>>extracted_from_maskrcnn/
>>resnet-nhwc-2018-02-07/
https://storage.googleapis.com/cloud-tpu-checkpoints/retinanet/resnet50-checkpoint-2018-02-07/checkpoint
>>>model.ckpt-112603/
https://storage.googleapis.com/cloud-tpu-checkpoints/retinanet/resnet50-checkpoint-2018-02-07/model.ckpt-112603.data-00000-of-00001
https://storage.googleapis.com/cloud-tpu-checkpoints/retinanet/resnet50-checkpoint-2018-02-07/model.ckpt-112603.index
https://storage.googleapis.com/cloud-tpu-checkpoints/retinanet/resnet50-checkpoint-2018-02-07/model.ckpt-112603.meta
>>resnet-nhwc-2018-10-14/
```
# 测试
## 单卡训练
```
python3 scripts/benchmark_training.py --gpus {1,4,8} --batch_size {2,4}
python3 scripts/benchmark_training.py --gpus 1 --batch_size 2 --model_dir save_model --data_dir /public/home/tianlh/AI-application/Tensorflow/MaskRCNN_tf2/dataset/coco2017_tfrecord --weights_dir weights
```
## 多卡训练
```
python3 scripts/benchmark_training.py --gpus 2 --batch_size 4 --model_dir save_model_2dcu --data_dir /public/home/tianlh/AI-application/Tensorflow/MaskRCNN_tf2/dataset/coco2017_tfrecord --weights_dir weights
```
## 推理
```
python3 scripts/benchmark_inference.py --batch_size 2 --model_dir save_model --data_dir /public/home/tianlh/AI-application/Tensorflow/MaskRCNN_tf2/dataset/coco2017_tfrecord --weights_dir weights
```
# 参考资料
[https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow2/Segmentation/MaskRCNN](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow2/Segmentation/MaskRCNN)
\ No newline at end of file
This diff is collapsed.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Convert raw COCO dataset to TFRecord for object_detection.
Example usage:
python create_coco_tf_record.py --logtostderr \
--train_image_dir="${TRAIN_IMAGE_DIR}" \
--val_image_dir="${VAL_IMAGE_DIR}" \
--test_image_dir="${TEST_IMAGE_DIR}" \
--train_annotations_file="${TRAIN_ANNOTATIONS_FILE}" \
--val_annotations_file="${VAL_ANNOTATIONS_FILE}" \
--testdev_annotations_file="${TESTDEV_ANNOTATIONS_FILE}" \
--output_dir="${OUTPUT_DIR}"
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import hashlib
import io
import json
import multiprocessing
import os
from absl import app
from absl import flags
import numpy as np
import PIL.Image
from pycocotools import mask
from research.object_detection.utils import dataset_util
from research.object_detection.utils import label_map_util
import tensorflow as tf
flags.DEFINE_boolean('include_masks', False,
'Whether to include instance segmentations masks '
'(PNG encoded) in the result. default: False.')
flags.DEFINE_string('train_image_dir', '', 'Training image directory.')
flags.DEFINE_string('val_image_dir', '', 'Validation image directory.')
flags.DEFINE_string('test_image_dir', '', 'Test image directory.')
flags.DEFINE_string('train_object_annotations_file', '', '')
flags.DEFINE_string('val_object_annotations_file', '', '')
flags.DEFINE_string('train_caption_annotations_file', '', '')
flags.DEFINE_string('val_caption_annotations_file', '', '')
flags.DEFINE_string('testdev_annotations_file', '',
'Test-dev annotations JSON file.')
flags.DEFINE_string('output_dir', '/tmp/', 'Output data directory.')
FLAGS = flags.FLAGS
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO)
def create_tf_example(image,
bbox_annotations,
caption_annotations,
image_dir,
category_index,
include_masks=False):
"""Converts image and annotations to a tf.Example proto.
Args:
image: dict with keys:
[u'license', u'file_name', u'coco_url', u'height', u'width',
u'date_captured', u'flickr_url', u'id']
bbox_annotations:
list of dicts with keys:
[u'segmentation', u'area', u'iscrowd', u'image_id',
u'bbox', u'category_id', u'id']
Notice that bounding box coordinates in the official COCO dataset are
given as [x, y, width, height] tuples using absolute coordinates where
x, y represent the top-left (0-indexed) corner. This function converts
to the format expected by the Tensorflow Object Detection API (which is
which is [ymin, xmin, ymax, xmax] with coordinates normalized relative
to image size).
image_dir: directory containing the image files.
category_index: a dict containing COCO category information keyed
by the 'id' field of each category. See the
label_map_util.create_category_index function.
include_masks: Whether to include instance segmentations masks
(PNG encoded) in the result. default: False.
Returns:
example: The converted tf.Example
num_annotations_skipped: Number of (invalid) annotations that were ignored.
Raises:
ValueError: if the image pointed to by data['filename'] is not a valid JPEG
"""
image_height = image['height']
image_width = image['width']
filename = image['file_name']
image_id = image['id']
full_path = os.path.join(image_dir, filename)
with tf.io.gfile.GFile(full_path, 'rb') as fid:
encoded_jpg = fid.read()
encoded_jpg_io = io.BytesIO(encoded_jpg)
image = PIL.Image.open(encoded_jpg_io)
key = hashlib.sha256(encoded_jpg).hexdigest()
xmin = []
xmax = []
ymin = []
ymax = []
is_crowd = []
category_names = []
category_ids = []
area = []
encoded_mask_png = []
num_annotations_skipped = 0
for object_annotations in bbox_annotations:
(x, y, width, height) = tuple(object_annotations['bbox'])
if width <= 0 or height <= 0:
num_annotations_skipped += 1
continue
if x + width > image_width or y + height > image_height:
num_annotations_skipped += 1
continue
xmin.append(float(x) / image_width)
xmax.append(float(x + width) / image_width)
ymin.append(float(y) / image_height)
ymax.append(float(y + height) / image_height)
is_crowd.append(object_annotations['iscrowd'])
category_id = int(object_annotations['category_id'])
category_ids.append(category_id)
category_names.append(category_index[category_id]['name'].encode('utf8'))
area.append(object_annotations['area'])
if include_masks:
run_len_encoding = mask.frPyObjects(object_annotations['segmentation'],
image_height, image_width)
binary_mask = mask.decode(run_len_encoding)
if not object_annotations['iscrowd']:
binary_mask = np.amax(binary_mask, axis=2)
pil_image = PIL.Image.fromarray(binary_mask)
output_io = io.BytesIO()
pil_image.save(output_io, format='PNG')
encoded_mask_png.append(output_io.getvalue())
captions = []
for caption_annotation in caption_annotations:
captions.append(caption_annotation['caption'].encode('utf8'))
feature_dict = {
'image/height':
dataset_util.int64_feature(image_height),
'image/width':
dataset_util.int64_feature(image_width),
'image/filename':
dataset_util.bytes_feature(filename.encode('utf8')),
'image/source_id':
dataset_util.bytes_feature(str(image_id).encode('utf8')),
'image/key/sha256':
dataset_util.bytes_feature(key.encode('utf8')),
'image/encoded':
dataset_util.bytes_feature(encoded_jpg),
'image/caption':
dataset_util.bytes_list_feature(captions),
'image/format':
dataset_util.bytes_feature('jpeg'.encode('utf8')),
'image/object/bbox/xmin':
dataset_util.float_list_feature(xmin),
'image/object/bbox/xmax':
dataset_util.float_list_feature(xmax),
'image/object/bbox/ymin':
dataset_util.float_list_feature(ymin),
'image/object/bbox/ymax':
dataset_util.float_list_feature(ymax),
'image/object/class/text':
dataset_util.bytes_list_feature(category_names),
'image/object/class/label':
dataset_util.int64_list_feature(category_ids),
'image/object/is_crowd':
dataset_util.int64_list_feature(is_crowd),
'image/object/area':
dataset_util.float_list_feature(area),
}
if include_masks:
feature_dict['image/object/mask'] = (
dataset_util.bytes_list_feature(encoded_mask_png))
example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
return key, example, num_annotations_skipped
def _pool_create_tf_example(args):
return create_tf_example(*args)
def _load_object_annotations(object_annotations_file):
with tf.io.gfile.GFile(object_annotations_file, 'r') as fid:
obj_annotations = json.load(fid)
images = obj_annotations['images']
category_index = label_map_util.create_category_index(
obj_annotations['categories'])
img_to_obj_annotation = collections.defaultdict(list)
tf.compat.v1.logging.info('Building bounding box index.')
for annotation in obj_annotations['annotations']:
image_id = annotation['image_id']
img_to_obj_annotation[image_id].append(annotation)
missing_annotation_count = 0
for image in images:
image_id = image['id']
if image_id not in img_to_obj_annotation:
missing_annotation_count += 1
tf.compat.v1.logging.info('%d images are missing bboxes.', missing_annotation_count)
return images, img_to_obj_annotation, category_index
def _load_caption_annotations(caption_annotations_file):
with tf.io.gfile.GFile(caption_annotations_file, 'r') as fid:
caption_annotations = json.load(fid)
img_to_caption_annotation = collections.defaultdict(list)
tf.compat.v1.logging.info('Building caption index.')
for annotation in caption_annotations['annotations']:
image_id = annotation['image_id']
img_to_caption_annotation[image_id].append(annotation)
missing_annotation_count = 0
images = caption_annotations['images']
for image in images:
image_id = image['id']
if image_id not in img_to_caption_annotation:
missing_annotation_count += 1
tf.compat.v1.logging.info('%d images are missing captions.', missing_annotation_count)
return img_to_caption_annotation
def _create_tf_record_from_coco_annotations(
object_annotations_file,
caption_annotations_file,
image_dir, output_path, include_masks, num_shards):
"""Loads COCO annotation json files and converts to tf.Record format.
Args:
object_annotations_file: JSON file containing bounding box annotations.
caption_annotations_file: JSON file containing caption annotations.
image_dir: Directory containing the image files.
output_path: Path to output tf.Record file.
include_masks: Whether to include instance segmentations masks
(PNG encoded) in the result. default: False.
num_shards: Number of output files to create.
"""
tf.compat.v1.logging.info('writing to output path: %s', output_path)
writers = [
tf.io.TFRecordWriter(output_path + '-%05d-of-%05d.tfrecord' %
(i, num_shards)) for i in range(num_shards)
]
images, img_to_obj_annotation, category_index = (
_load_object_annotations(object_annotations_file))
img_to_caption_annotation = (
_load_caption_annotations(caption_annotations_file))
pool = multiprocessing.Pool()
total_num_annotations_skipped = 0
for idx, (_, tf_example, num_annotations_skipped) in enumerate(
pool.imap(_pool_create_tf_example,
[(image,
img_to_obj_annotation[image['id']],
img_to_caption_annotation[image['id']],
image_dir,
category_index,
include_masks)
for image in images])):
if idx % 100 == 0:
tf.compat.v1.logging.info('On image %d of %d', idx, len(images))
total_num_annotations_skipped += num_annotations_skipped
writers[idx % num_shards].write(tf_example.SerializeToString())
pool.close()
pool.join()
for writer in writers:
writer.close()
tf.compat.v1.logging.info('Finished writing, skipped %d annotations.',
total_num_annotations_skipped)
def main(_):
assert FLAGS.train_image_dir, '`train_image_dir` missing.'
assert FLAGS.val_image_dir, '`val_image_dir` missing.'
assert FLAGS.test_image_dir, '`test_image_dir` missing.'
if not tf.io.gfile.isdir(FLAGS.output_dir):
tf.io.gfile.makedirs(FLAGS.output_dir)
train_output_path = os.path.join(FLAGS.output_dir, 'train')
val_output_path = os.path.join(FLAGS.output_dir, 'val')
testdev_output_path = os.path.join(FLAGS.output_dir, 'test-dev')
_create_tf_record_from_coco_annotations(
FLAGS.train_object_annotations_file,
FLAGS.train_caption_annotations_file,
FLAGS.train_image_dir,
train_output_path,
FLAGS.include_masks,
num_shards=256)
_create_tf_record_from_coco_annotations(
FLAGS.val_object_annotations_file,
FLAGS.val_caption_annotations_file,
FLAGS.val_image_dir,
val_output_path,
FLAGS.include_masks,
num_shards=32)
if __name__ == '__main__':
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO)
app.run(main)
#!/bin/bash
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# Script to download and preprocess the COCO data set for detection.
#
# The outputs of this script are TFRecord files containing serialized
# tf.Example protocol buffers. See create_coco_tf_record.py for details of how
# the tf.Example protocol buffers are constructed and see
# http://cocodataset.org/#overview for an overview of the dataset.
#
# usage:
# bash download_and_preprocess_coco.sh /data-dir/coco
set -e
set -x
if [ -z "$1" ]; then
echo "usage download_and_preprocess_coco.sh [data dir]"
exit
fi
#sudo apt install -y protobuf-compiler python-pil python-lxml\
# python-pip python-dev git unzip
#pip install Cython git+https://github.com/cocodataset/cocoapi#subdirectory=PythonAPI
echo "Cloning Tensorflow models directory (for conversion utilities)"
if [ ! -e tf-models ]; then
git clone http://github.com/tensorflow/models tf-models
fi
(cd tf-models/research && protoc object_detection/protos/*.proto --python_out=.)
UNZIP="unzip -nq"
# Create the output directories.
OUTPUT_DIR="${1%/}"
SCRATCH_DIR="${OUTPUT_DIR}/raw-data"
mkdir -p "${OUTPUT_DIR}"
mkdir -p "${SCRATCH_DIR}"
CURRENT_DIR=$(pwd)
# Helper function to download and unpack a .zip file.
function download_and_unzip() {
local BASE_URL=${1}
local FILENAME=${2}
if [ ! -f ${FILENAME} ]; then
echo "Downloading ${FILENAME} to $(pwd)"
wget -nd -c "${BASE_URL}/${FILENAME}"
else
echo "Skipping download of ${FILENAME}"
fi
echo "Unzipping ${FILENAME}"
${UNZIP} ${FILENAME}
}
cd ${SCRATCH_DIR}
# Download the images.
BASE_IMAGE_URL="http://images.cocodataset.org/zips"
TRAIN_IMAGE_FILE="train2017.zip"
download_and_unzip ${BASE_IMAGE_URL} ${TRAIN_IMAGE_FILE}
TRAIN_IMAGE_DIR="${SCRATCH_DIR}/train2017"
VAL_IMAGE_FILE="val2017.zip"
download_and_unzip ${BASE_IMAGE_URL} ${VAL_IMAGE_FILE}
VAL_IMAGE_DIR="${SCRATCH_DIR}/val2017"
TEST_IMAGE_FILE="test2017.zip"
download_and_unzip ${BASE_IMAGE_URL} ${TEST_IMAGE_FILE}
TEST_IMAGE_DIR="${SCRATCH_DIR}/test2017"
# Download the annotations.
BASE_INSTANCES_URL="http://images.cocodataset.org/annotations"
INSTANCES_FILE="annotations_trainval2017.zip"
download_and_unzip ${BASE_INSTANCES_URL} ${INSTANCES_FILE}
TRAIN_OBJ_ANNOTATIONS_FILE="${SCRATCH_DIR}/annotations/instances_train2017.json"
VAL_OBJ_ANNOTATIONS_FILE="${SCRATCH_DIR}/annotations/instances_val2017.json"
TRAIN_CAPTION_ANNOTATIONS_FILE="${SCRATCH_DIR}/annotations/captions_train2017.json"
VAL_CAPTION_ANNOTATIONS_FILE="${SCRATCH_DIR}/annotations/captions_val2017.json"
# Download the test image info.
BASE_IMAGE_INFO_URL="http://images.cocodataset.org/annotations"
IMAGE_INFO_FILE="image_info_test2017.zip"
download_and_unzip ${BASE_IMAGE_INFO_URL} ${IMAGE_INFO_FILE}
TESTDEV_ANNOTATIONS_FILE="${SCRATCH_DIR}/annotations/image_info_test-dev2017.json"
# # Build TFRecords of the image data.
cd "${CURRENT_DIR}"
# Setup packages
touch tf-models/__init__.py
touch tf-models/research/__init__.py
# Run our conversion
SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
PYTHONPATH="tf-models:tf-models/research" python $SCRIPT_DIR/create_coco_tf_record.py \
--logtostderr \
--include_masks \
--train_image_dir="${TRAIN_IMAGE_DIR}" \
--val_image_dir="${VAL_IMAGE_DIR}" \
--test_image_dir="${TEST_IMAGE_DIR}" \
--train_object_annotations_file="${TRAIN_OBJ_ANNOTATIONS_FILE}" \
--val_object_annotations_file="${VAL_OBJ_ANNOTATIONS_FILE}" \
--train_caption_annotations_file="${TRAIN_CAPTION_ANNOTATIONS_FILE}" \
--val_caption_annotations_file="${VAL_CAPTION_ANNOTATIONS_FILE}" \
--testdev_annotations_file="${TESTDEV_ANNOTATIONS_FILE}" \
--output_dir="${OUTPUT_DIR}"
mv ${SCRATCH_DIR}/annotations/ ${OUTPUT_DIR}
#!/usr/bin/env bash
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
mkdir -p /model
cd /model
# DOWNLOAD CHECKPOINTS
## Mask RCNN
## ====================== Mask RCNN ====================== ##
BASE_URL="https://storage.googleapis.com/cloud-tpu-checkpoints/mask-rcnn/1555659850"
DEST_DIR="mask-rcnn/1555659850"
wget -N ${BASE_URL}/saved_model.pb -P ${DEST_DIR}
wget -N ${BASE_URL}/variables/variables.data-00000-of-00001 -P ${DEST_DIR}/variables
wget -N ${BASE_URL}/variables/variables.index -P ${DEST_DIR}/variables
## ====================== resnet-nhwc-2018-02-07 ====================== ##
BASE_URL="https://storage.googleapis.com/cloud-tpu-checkpoints/retinanet/resnet50-checkpoint-2018-02-07"
DEST_DIR="resnet/resnet-nhwc-2018-02-07"
wget -N ${BASE_URL}/checkpoint -P ${DEST_DIR}
wget -N ${BASE_URL}/model.ckpt-112603.data-00000-of-00001 -P ${DEST_DIR}
wget -N ${BASE_URL}/model.ckpt-112603.index -P ${DEST_DIR}
wget -N ${BASE_URL}/model.ckpt-112603.meta -P ${DEST_DIR}
## ====================== resnet-nhwc-2018-10-14 ====================== ##
#BASE_URL="https://storage.googleapis.com/cloud-tpu-artifacts/resnet/resnet-nhwc-2018-10-14"
#DEST_DIR="resnet/resnet-nhwc-2018-10-14"
#
#wget -N ${BASE_URL}/model.ckpt-112602.data-00000-of-00001 -P ${DEST_DIR}
#wget -N ${BASE_URL}/model.ckpt-112602.index -P ${DEST_DIR}
#wget -N ${BASE_URL}/model.ckpt-112602.meta -P ${DEST_DIR}
# VERIFY CHECKPOINTS
echo "Verifying and Processing Checkpoints..."
python pb_to_ckpt.py \
--frozen_model_filename=mask-rcnn/1555659850/ \
--output_filename=mask-rcnn/1555659850/ckpt/model.ckpt
python extract_RN50_weights.py \
--checkpoint_dir=mask-rcnn/1555659850/ckpt/model.ckpt \
--save_to=resnet/extracted_from_maskrcnn
echo "Generating list of tensors and their shape..."
python inspect_checkpoint.py --file_name=mask-rcnn/1555659850/ckpt/model.ckpt \
> mask-rcnn/1555659850/tensors_and_shape.txt
python inspect_checkpoint.py --file_name=resnet/resnet-nhwc-2018-02-07/model.ckpt-112603 \
> resnet/resnet-nhwc-2018-02-07/tensors_and_shape.txt
#python inspect_checkpoint.py --file_name=resnet/resnet-nhwc-2018-10-14/model.ckpt-112602 \
# > resnet/resnet-nhwc-2018-10-14/tensors_and_shape.txt
python inspect_checkpoint.py --file_name=resnet/extracted_from_maskrcnn/resnet50.ckpt \
> resnet/extracted_from_maskrcnn/tensors_and_shape.txt
echo "Script Finished with Success"
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Mask-RCNN anchor definition."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from collections import OrderedDict
import numpy as np
import tensorflow as tf
from mask_rcnn.object_detection import argmax_matcher
from mask_rcnn.object_detection import balanced_positive_negative_sampler
from mask_rcnn.object_detection import box_list
from mask_rcnn.object_detection import faster_rcnn_box_coder
from mask_rcnn.object_detection import region_similarity_calculator
from mask_rcnn.object_detection import target_assigner
def _generate_anchor_configs(min_level, max_level, num_scales, aspect_ratios):
"""Generates mapping from output level to a list of anchor configurations.
A configuration is a tuple of (num_anchors, scale, aspect_ratio).
Args:
min_level: integer number of minimum level of the output feature pyramid.
max_level: integer number of maximum level of the output feature pyramid.
num_scales: integer number representing intermediate scales added
on each level. For instances, num_scales=2 adds two additional
anchor scales [2^0, 2^0.5] on each level.
aspect_ratios: list of tuples representing the aspect raito anchors added
on each level. For instances, aspect_ratios =
[(1, 1), (1.4, 0.7), (0.7, 1.4)] adds three anchors on each level.
Returns:
anchor_configs: a dictionary with keys as the levels of anchors and
values as a list of anchor configuration.
"""
anchor_configs = {}
for level in range(min_level, max_level + 1):
anchor_configs[level] = []
for scale_octave in range(num_scales):
for aspect in aspect_ratios:
anchor_configs[level].append(
(2**level, scale_octave / float(num_scales), aspect))
return anchor_configs
def _generate_anchor_boxes(image_size, anchor_scale, anchor_configs):
"""Generates multiscale anchor boxes.
Args:
image_size: integer number of input image size. The input image has the
same dimension for width and height. The image_size should be divided by
the largest feature stride 2^max_level.
anchor_scale: float number representing the scale of size of the base
anchor to the feature stride 2^level.
anchor_configs: a dictionary with keys as the levels of anchors and
values as a list of anchor configuration.
Returns:
anchor_boxes: a numpy array with shape [N, 4], which stacks anchors on all
feature levels.
Raises:
ValueError: input size must be the multiple of largest feature stride.
"""
boxes_all = []
for _, configs in anchor_configs.items():
boxes_level = []
for config in configs:
stride, octave_scale, aspect = config
if image_size[0] % stride != 0 or image_size[1] % stride != 0:
raise ValueError('input size must be divided by the stride.')
base_anchor_size = anchor_scale * stride * 2**octave_scale
anchor_size_x_2 = base_anchor_size * aspect[0] / 2.0
anchor_size_y_2 = base_anchor_size * aspect[1] / 2.0
x = np.arange(stride / 2, image_size[1], stride)
y = np.arange(stride / 2, image_size[0], stride)
xv, yv = np.meshgrid(x, y)
xv = xv.reshape(-1)
yv = yv.reshape(-1)
boxes = np.vstack((yv - anchor_size_y_2, xv - anchor_size_x_2,
yv + anchor_size_y_2, xv + anchor_size_x_2))
boxes = np.swapaxes(boxes, 0, 1)
boxes_level.append(np.expand_dims(boxes, axis=1))
# concat anchors on the same level to the reshape NxAx4
boxes_level = np.concatenate(boxes_level, axis=1)
boxes_all.append(boxes_level.reshape([-1, 4]))
anchor_boxes = np.vstack(boxes_all)
return anchor_boxes
class Anchors(object):
"""Mask-RCNN Anchors class."""
def __init__(self, min_level, max_level, num_scales, aspect_ratios, anchor_scale, image_size):
"""Constructs multiscale Mask-RCNN anchors.
Args:
min_level: integer number of minimum level of the output feature pyramid.
max_level: integer number of maximum level of the output feature pyramid.
num_scales: integer number representing intermediate scales added
on each level. For instances, num_scales=2 adds two additional
anchor scales [2^0, 2^0.5] on each level.
aspect_ratios: list of tuples representing the aspect raito anchors added
on each level. For instances, aspect_ratios =
[(1, 1), (1.4, 0.7), (0.7, 1.4)] adds three anchors on each level.
anchor_scale: float number representing the scale of size of the base
anchor to the feature stride 2^level.
image_size: integer number of input image size. The input image has the
same dimension for width and height. The image_size should be divided by
the largest feature stride 2^max_level.
"""
self.min_level = min_level
self.max_level = max_level
self.num_scales = num_scales
self.aspect_ratios = aspect_ratios
self.anchor_scale = anchor_scale
self.image_size = image_size
self.config = self._generate_configs()
self.boxes = self._generate_boxes()
def _generate_configs(self):
"""Generate configurations of anchor boxes."""
return _generate_anchor_configs(self.min_level, self.max_level,
self.num_scales, self.aspect_ratios)
def _generate_boxes(self):
"""Generates multiscale anchor boxes."""
boxes = _generate_anchor_boxes(self.image_size, self.anchor_scale,
self.config)
boxes = tf.convert_to_tensor(value=boxes, dtype=tf.float32)
return boxes
def get_anchors_per_location(self):
return self.num_scales * len(self.aspect_ratios)
def get_unpacked_boxes(self):
return self.unpack_labels(self.boxes)
def unpack_labels(self, labels):
"""Unpacks an array of labels into multiscales labels."""
labels_unpacked = OrderedDict()
count = 0
for level in range(self.min_level, self.max_level + 1):
feat_size0 = int(self.image_size[0] / 2**level)
feat_size1 = int(self.image_size[1] / 2**level)
steps = feat_size0 * feat_size1 * self.get_anchors_per_location()
indices = tf.range(count, count + steps)
count += steps
labels_unpacked[level] = tf.reshape(
tf.gather(labels, indices), [feat_size0, feat_size1, -1])
return labels_unpacked
class AnchorLabeler(object):
"""Labeler for multiscale anchor boxes."""
def __init__(self, anchors, num_classes, match_threshold=0.7,
unmatched_threshold=0.3, rpn_batch_size_per_im=256,
rpn_fg_fraction=0.5):
"""Constructs anchor labeler to assign labels to anchors.
Args:
anchors: an instance of class Anchors.
num_classes: integer number representing number of classes in the dataset.
match_threshold: a float number between 0 and 1 representing the
lower-bound threshold to assign positive labels for anchors. An anchor
with a score over the threshold is labeled positive.
unmatched_threshold: a float number between 0 and 1 representing the
upper-bound threshold to assign negative labels for anchors. An anchor
with a score below the threshold is labeled negative.
rpn_batch_size_per_im: a integer number that represents the number of
sampled anchors per image in the first stage (region proposal network).
rpn_fg_fraction: a float number between 0 and 1 representing the fraction
of positive anchors (foreground) in the first stage.
"""
similarity_calc = region_similarity_calculator.IouSimilarity()
matcher = argmax_matcher.ArgMaxMatcher(
match_threshold,
unmatched_threshold=unmatched_threshold,
negatives_lower_than_unmatched=True,
force_match_for_each_row=True)
box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
self._target_assigner = target_assigner.TargetAssigner(
similarity_calc, matcher, box_coder)
self._anchors = anchors
self._match_threshold = match_threshold
self._unmatched_threshold = unmatched_threshold
self._rpn_batch_size_per_im = rpn_batch_size_per_im
self._rpn_fg_fraction = rpn_fg_fraction
self._num_classes = num_classes
def _get_rpn_samples(self, match_results):
"""Computes anchor labels.
This function performs subsampling for foreground (fg) and background (bg)
anchors.
Args:
match_results: A integer tensor with shape [N] representing the
matching results of anchors. (1) match_results[i]>=0,
meaning that column i is matched with row match_results[i].
(2) match_results[i]=-1, meaning that column i is not matched.
(3) match_results[i]=-2, meaning that column i is ignored.
Returns:
score_targets: a integer tensor with the a shape of [N].
(1) score_targets[i]=1, the anchor is a positive sample.
(2) score_targets[i]=0, negative. (3) score_targets[i]=-1, the anchor is
don't care (ignore).
"""
sampler = (
balanced_positive_negative_sampler.BalancedPositiveNegativeSampler(
positive_fraction=self._rpn_fg_fraction, is_static=False))
# indicator includes both positive and negative labels.
# labels includes only positives labels.
# positives = indicator & labels.
# negatives = indicator & !labels.
# ignore = !indicator.
indicator = tf.greater(match_results, -2)
labels = tf.greater(match_results, -1)
samples = sampler.subsample(
indicator, self._rpn_batch_size_per_im, labels)
positive_labels = tf.where(
tf.logical_and(samples, labels),
tf.constant(2, dtype=tf.int32, shape=match_results.shape),
tf.constant(0, dtype=tf.int32, shape=match_results.shape))
negative_labels = tf.where(
tf.logical_and(samples, tf.logical_not(labels)),
tf.constant(1, dtype=tf.int32, shape=match_results.shape),
tf.constant(0, dtype=tf.int32, shape=match_results.shape))
ignore_labels = tf.fill(match_results.shape, -1)
return (ignore_labels + positive_labels + negative_labels,
positive_labels, negative_labels)
def label_anchors(self, gt_boxes, gt_labels):
"""Labels anchors with ground truth inputs.
Args:
gt_boxes: A float tensor with shape [N, 4] representing groundtruth boxes.
For each row, it stores [y0, x0, y1, x1] for four corners of a box.
gt_labels: A integer tensor with shape [N, 1] representing groundtruth
classes.
Returns:
score_targets_dict: ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with
shape [height_l, width_l, num_anchors]. The height_l and width_l
represent the dimension of class logits at l-th level.
box_targets_dict: ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with
shape [height_l, width_l, num_anchors * 4]. The height_l and
width_l represent the dimension of bounding box regression output at
l-th level.
"""
gt_box_list = box_list.BoxList(gt_boxes)
anchor_box_list = box_list.BoxList(self._anchors.boxes)
# cls_targets, cls_weights, box_weights are not used
_, _, box_targets, _, matches = self._target_assigner.assign(
anchor_box_list, gt_box_list, gt_labels)
# score_targets contains the subsampled positive and negative anchors.
score_targets, _, _ = self._get_rpn_samples(matches.match_results)
# Unpack labels.
score_targets_dict = self._anchors.unpack_labels(score_targets)
box_targets_dict = self._anchors.unpack_labels(box_targets)
return score_targets_dict, box_targets_dict
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""COCO-style evaluation metrics.
Implements the interface of COCO API and metric_fn in tf.TPUEstimator.
COCO API: github.com/cocodataset/cocoapi/
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import atexit
import copy
import tempfile
import numpy as np
import tensorflow as tf
from mask_rcnn.utils.logging_formatter import logging
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
import pycocotools.mask as maskUtils
import cv2
class MaskCOCO(COCO):
"""COCO object for mask evaluation.
"""
def reset(self, dataset):
"""Reset the dataset and groundtruth data index in this object.
Args:
dataset: dict of groundtruth data. It should has similar structure as the
COCO groundtruth JSON file. Must contains three keys: {'images',
'annotations', 'categories'}.
'images': list of image information dictionary. Required keys: 'id',
'width' and 'height'.
'annotations': list of dict. Bounding boxes and segmentations related
information. Required keys: {'id', 'image_id', 'category_id', 'bbox',
'iscrowd', 'area', 'segmentation'}.
'categories': list of dict of the category information.
Required key: 'id'.
Refer to http://cocodataset.org/#format-data for more details.
Raises:
AttributeError: If the dataset is empty or not a dict.
"""
assert dataset, 'Groundtruth should not be empty.'
assert isinstance(dataset,
dict), 'annotation file format {} not supported'.format(
type(dataset))
self.anns, self.cats, self.imgs = dict(), dict(), dict()
self.dataset = copy.deepcopy(dataset)
self.createIndex()
def loadRes(self, detection_results, include_mask, is_image_mask=False):
"""Load result file and return a result api object.
Args:
detection_results: a dictionary containing predictions results.
include_mask: a boolean, whether to include mask in detection results.
is_image_mask: a boolean, where the predict mask is a whole image mask.
Returns:
res: result MaskCOCO api object
"""
res = MaskCOCO()
res.dataset['images'] = [img for img in self.dataset['images']]
logging.info('Loading and preparing results...')
predictions = self.load_predictions(
detection_results,
include_mask=include_mask,
is_image_mask=is_image_mask)
assert isinstance(predictions, list), 'results in not an array of objects'
if predictions:
image_ids = [pred['image_id'] for pred in predictions]
assert set(image_ids) == (set(image_ids) & set(self.getImgIds())), \
'Results do not correspond to current coco set'
if (predictions and 'bbox' in predictions[0] and predictions[0]['bbox']):
res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
for idx, pred in enumerate(predictions):
bb = pred['bbox']
x1, x2, y1, y2 = [bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]]
if 'segmentation' not in pred:
pred['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]]
pred['area'] = bb[2] * bb[3]
pred['id'] = idx + 1
pred['iscrowd'] = 0
elif 'segmentation' in predictions[0]:
res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
for idx, pred in enumerate(predictions):
# now only support compressed RLE format as segmentation results
pred['area'] = maskUtils.area(pred['segmentation'])
if 'bbox' not in pred:
pred['bbox'] = maskUtils.toBbox(pred['segmentation'])
pred['id'] = idx + 1
pred['iscrowd'] = 0
res.dataset['annotations'] = predictions
res.createIndex()
return res
def load_predictions(self,
detection_results,
include_mask,
is_image_mask=False):
"""Create prediction dictionary list from detection and mask results.
Args:
detection_results: a dictionary containing numpy arrays which corresponds
to prediction results.
include_mask: a boolean, whether to include mask in detection results.
is_image_mask: a boolean, where the predict mask is a whole image mask.
Returns:
a list of dictionary including different prediction results from the model
in numpy form.
"""
predictions = []
num_detections = detection_results['detection_scores'].size
current_index = 0
for i, image_id in enumerate(detection_results['source_id']):
if include_mask:
box_coorindates_in_image = detection_results['detection_boxes'][i]
segments = generate_segmentation_from_masks(
detection_results['detection_masks'][i],
box_coorindates_in_image,
int(detection_results['image_info'][i][3]),
int(detection_results['image_info'][i][4]),
is_image_mask=is_image_mask
)
# Convert the mask to uint8 and then to fortranarray for RLE encoder.
encoded_masks = [
maskUtils.encode(np.asfortranarray(instance_mask.astype(np.uint8)))
for instance_mask in segments
]
for box_index in range(int(detection_results['num_detections'][i])):
if current_index % 1000 == 0:
logging.info('{}/{}'.format(current_index, num_detections))
current_index += 1
prediction = {
'image_id': int(image_id),
'bbox': detection_results['detection_boxes'][i][box_index].tolist(),
'score': detection_results['detection_scores'][i][box_index],
'category_id': int(
detection_results['detection_classes'][i][box_index]),
}
if include_mask:
prediction['segmentation'] = encoded_masks[box_index]
predictions.append(prediction)
return predictions
def generate_segmentation_from_masks(masks,
detected_boxes,
image_height,
image_width,
is_image_mask=False):
"""Generates segmentation result from instance masks.
Args:
masks: a numpy array of shape [N, mask_height, mask_width] representing the
instance masks w.r.t. the `detected_boxes`.
detected_boxes: a numpy array of shape [N, 4] representing the reference
bounding boxes.
image_height: an integer representing the height of the image.
image_width: an integer representing the width of the image.
is_image_mask: bool. True: input masks are whole-image masks. False: input
masks are bounding-box level masks.
Returns:
segms: a numpy array of shape [N, image_height, image_width] representing
the instance masks *pasted* on the image canvas.
"""
def expand_boxes(boxes, scale):
"""Expands an array of boxes by a given scale."""
# Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/utils/boxes.py#L227
# The `boxes` in the reference implementation is in [x1, y1, x2, y2] form,
# whereas `boxes` here is in [x1, y1, w, h] form
w_half = boxes[:, 2] * .5
h_half = boxes[:, 3] * .5
x_c = boxes[:, 0] + w_half
y_c = boxes[:, 1] + h_half
w_half *= scale
h_half *= scale
boxes_exp = np.zeros(boxes.shape)
boxes_exp[:, 0] = x_c - w_half
boxes_exp[:, 2] = x_c + w_half
boxes_exp[:, 1] = y_c - h_half
boxes_exp[:, 3] = y_c + h_half
return boxes_exp
# Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/core/test.py#L812
# To work around an issue with cv2.resize (it seems to automatically pad
# with repeated border values), we manually zero-pad the masks by 1 pixel
# prior to resizing back to the original image resolution. This prevents
# "top hat" artifacts. We therefore need to expand the reference boxes by an
# appropriate factor.
_, mask_height, mask_width = masks.shape
scale = max((mask_width + 2.0) / mask_width,
(mask_height + 2.0) / mask_height)
ref_boxes = expand_boxes(detected_boxes, scale)
ref_boxes = ref_boxes.astype(np.int32)
padded_mask = np.zeros((mask_height + 2, mask_width + 2), dtype=np.float32)
segms = []
for mask_ind, mask in enumerate(masks):
im_mask = np.zeros((image_height, image_width), dtype=np.uint8)
if is_image_mask:
# Process whole-image masks.
im_mask[:, :] = mask[:, :]
else:
# Process mask inside bounding boxes.
padded_mask[1:-1, 1:-1] = mask[:, :]
ref_box = ref_boxes[mask_ind, :]
w = ref_box[2] - ref_box[0] + 1
h = ref_box[3] - ref_box[1] + 1
w = np.maximum(w, 1)
h = np.maximum(h, 1)
mask = cv2.resize(padded_mask, (w, h))
mask = np.array(mask > 0.5, dtype=np.uint8)
x_0 = max(ref_box[0], 0)
x_1 = min(ref_box[2] + 1, image_width)
y_0 = max(ref_box[1], 0)
y_1 = min(ref_box[3] + 1, image_height)
im_mask[y_0:y_1, x_0:x_1] = mask[(y_0 - ref_box[1]):(y_1 - ref_box[1]), (
x_0 - ref_box[0]):(x_1 - ref_box[0])]
segms.append(im_mask)
segms = np.array(segms)
assert masks.shape[0] == segms.shape[0]
return segms
class EvaluationMetric(object):
"""COCO evaluation metric class."""
def __init__(self, filename, include_mask):
"""Constructs COCO evaluation class.
The class provides the interface to metrics_fn in TPUEstimator. The
_evaluate() loads a JSON file in COCO annotation format as the
groundtruths and runs COCO evaluation.
Args:
filename: Ground truth JSON file name. If filename is None, use
groundtruth data passed from the dataloader for evaluation.
include_mask: boolean to indicate whether or not to include mask eval.
"""
if filename:
if filename.startswith('gs://'):
_, local_val_json = tempfile.mkstemp(suffix='.json')
tf.io.gfile.remove(local_val_json)
tf.io.gfile.copy(filename, local_val_json)
atexit.register(tf.io.gfile.remove, local_val_json)
else:
local_val_json = filename
self.coco_gt = MaskCOCO(local_val_json)
self.filename = filename
self.metric_names = ['AP', 'AP50', 'AP75', 'APs', 'APm', 'APl', 'ARmax1',
'ARmax10', 'ARmax100', 'ARs', 'ARm', 'ARl']
self._include_mask = include_mask
if self._include_mask:
mask_metric_names = ['mask_' + x for x in self.metric_names]
self.metric_names.extend(mask_metric_names)
self._reset()
def _reset(self):
"""Reset COCO API object."""
if self.filename is None and not hasattr(self, 'coco_gt'):
self.coco_gt = MaskCOCO()
def predict_metric_fn(self,
predictions,
is_predict_image_mask=False,
groundtruth_data=None):
"""Generates COCO metrics."""
image_ids = list(set(predictions['source_id']))
if groundtruth_data is not None:
self.coco_gt.reset(groundtruth_data)
coco_dt = self.coco_gt.loadRes(
predictions, self._include_mask, is_image_mask=is_predict_image_mask)
coco_eval = COCOeval(self.coco_gt, coco_dt, iouType='bbox')
coco_eval.params.imgIds = image_ids
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()
coco_metrics = coco_eval.stats
if self._include_mask:
# Create another object for instance segmentation metric evaluation.
mcoco_eval = COCOeval(self.coco_gt, coco_dt, iouType='segm')
mcoco_eval.params.imgIds = image_ids
mcoco_eval.evaluate()
mcoco_eval.accumulate()
mcoco_eval.summarize()
mask_coco_metrics = mcoco_eval.stats
if self._include_mask:
metrics = np.hstack((coco_metrics, mask_coco_metrics))
else:
metrics = coco_metrics
# clean up after evaluation is done.
self._reset()
metrics = metrics.astype(np.float32)
metrics_dict = {}
for i, name in enumerate(self.metric_names):
metrics_dict[name] = metrics[i]
return metrics_dict
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Data loader and processing.
Defines input_fn of Mask-RCNN for TF Estimator. The input_fn includes training
data for category classification, bounding box regression, and number of
positive examples to normalize the loss during training.
"""
import functools
import math
import multiprocessing
import tensorflow as tf
from mask_rcnn.utils.logging_formatter import logging
from mask_rcnn.utils.distributed_utils import MPI_is_distributed
from mask_rcnn.utils.distributed_utils import MPI_rank_and_size
from mask_rcnn.utils.distributed_utils import MPI_rank
from mask_rcnn.utils.distributed_utils import MPI_size
# common functions
from mask_rcnn.dataloader_utils import dataset_parser
from distutils.version import LooseVersion
class InputReader(object):
"""Input reader for dataset."""
def __init__(
self,
file_pattern,
mode=tf.estimator.ModeKeys.TRAIN,
num_examples=0,
use_fake_data=False,
use_instance_mask=False,
seed=None
):
self._mode = mode
self._file_pattern = file_pattern
self._num_examples = num_examples
self._use_fake_data = use_fake_data
self._use_instance_mask = use_instance_mask
self._seed = seed
def _create_dataset_parser_fn(self, params):
"""Create parser for parsing input data (dictionary)."""
return functools.partial(
dataset_parser,
mode=self._mode,
params=params,
use_instance_mask=self._use_instance_mask,
seed=self._seed
)
def __call__(self, params, input_context=None):
batch_size = params['batch_size'] if 'batch_size' in params else 1
try:
seed = params['seed'] if not MPI_is_distributed() else params['seed'] * MPI_rank()
except (KeyError, TypeError):
seed = None
if MPI_is_distributed():
n_gpus = MPI_size()
elif input_context is not None:
n_gpus = input_context.num_input_pipelines
else:
n_gpus = 1
##################################################
dataset = tf.data.Dataset.list_files(
self._file_pattern,
shuffle=False
)
if self._mode == tf.estimator.ModeKeys.TRAIN:
if input_context is not None:
logging.info("Using Dataset Sharding with TF Distributed")
_num_shards = input_context.num_input_pipelines
_shard_idx = input_context.input_pipeline_id
elif MPI_is_distributed():
logging.info("Using Dataset Sharding with Horovod")
_shard_idx, _num_shards = MPI_rank_and_size()
try:
dataset = dataset.shard(
num_shards=_num_shards,
index=_shard_idx
)
dataset = dataset.shuffle(math.ceil(256 / _num_shards))
except NameError: # Not a distributed training setup
pass
def _prefetch_dataset(filename):
return tf.data.TFRecordDataset(filename).prefetch(1)
dataset = dataset.interleave(
map_func=_prefetch_dataset,
cycle_length=32,
block_length=64,
num_parallel_calls=tf.data.experimental.AUTOTUNE,
)
if self._num_examples is not None and self._num_examples > 0:
logging.info("[*] Limiting the amount of sample to: %d" % self._num_examples)
dataset = dataset.take(self._num_examples)
dataset = dataset.cache()
if self._mode == tf.estimator.ModeKeys.TRAIN:
dataset = dataset.shuffle(
buffer_size=4096,
reshuffle_each_iteration=True,
seed=seed
)
dataset = dataset.repeat()
# Parse the fetched records to input tensors for model function.
dataset = dataset.map(
map_func=self._create_dataset_parser_fn(params),
num_parallel_calls=tf.data.experimental.AUTOTUNE,
)
dataset = dataset.batch(
batch_size=batch_size,
drop_remainder=True
)
if self._use_fake_data:
# Turn this dataset into a semi-fake dataset which always loop at the
# first batch. This reduces variance in performance and is useful in
# testing.
logging.info("Using Fake Dataset Loop...")
dataset = dataset.take(1).cache().repeat()
if self._mode != tf.estimator.ModeKeys.TRAIN:
dataset = dataset.take(int(5000 / batch_size))
dataset = dataset.prefetch(
buffer_size=tf.data.experimental.AUTOTUNE,
)
if self._mode == tf.estimator.ModeKeys.PREDICT or n_gpus > 1:
if not tf.distribute.has_strategy():
dataset = dataset.apply(
tf.data.experimental.prefetch_to_device(
'/gpu:0', # With Horovod the local GPU is always 0
buffer_size=1,
)
)
data_options = tf.data.Options()
data_options.experimental_deterministic = seed is not None
if LooseVersion(tf.__version__) <= LooseVersion("2.0.0"):
data_options.experimental_distribute.auto_shard = False
else:
data_options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.OFF
# data_options.experimental_distribute.auto_shard = False
data_options.experimental_slack = True
data_options.experimental_threading.max_intra_op_parallelism = 1
# data_options.experimental_threading.private_threadpool_size = int(multiprocessing.cpu_count() / n_gpus) * 2
# ================= experimental_optimization ================= #
data_options.experimental_optimization.apply_default_optimizations = False
# data_options.experimental_optimization.autotune = True
data_options.experimental_optimization.filter_fusion = True
data_options.experimental_optimization.map_and_batch_fusion = True
data_options.experimental_optimization.map_and_filter_fusion = True
data_options.experimental_optimization.map_fusion = True
data_options.experimental_optimization.map_parallelization = True
map_vectorization_options = tf.data.experimental.MapVectorizationOptions()
map_vectorization_options.enabled = True
map_vectorization_options.use_choose_fastest = True
data_options.experimental_optimization.map_vectorization = map_vectorization_options
data_options.experimental_optimization.noop_elimination = True
data_options.experimental_optimization.parallel_batch = True
data_options.experimental_optimization.shuffle_and_repeat_fusion = True
# ========== Stats on TF Data =============
# aggregator = tf.data.experimental.StatsAggregator()
# data_options.experimental_stats.aggregator = aggregator
# data_options.experimental_stats.latency_all_edges = True
dataset = dataset.with_options(data_options)
return dataset
if __name__ == "__main__":
'''
Data Loading Benchmark Usage:
# Real Data - Training
python -m mask_rcnn.dataloader \
--data_dir="/data/" \
--batch_size=2 \
--warmup_steps=200 \
--benchmark_steps=2000 \
--training
# Real Data - Inference
python -m mask_rcnn.dataloader \
--data_dir="/data/" \
--batch_size=8 \
--warmup_steps=200 \
--benchmark_steps=2000
# --------------- #
# Synthetic Data - Training
python -m mask_rcnn.dataloader \
--data_dir="/data/" \
--batch_size=2 \
--warmup_steps=200 \
--benchmark_steps=2000 \
--training \
--use_synthetic_data
# Synthetic Data - Inference
python -m mask_rcnn.dataloader \
--data_dir="/data/" \
--batch_size=8 \
--warmup_steps=200 \
--benchmark_steps=2000 \
--use_synthetic_data
# --------------- #
'''
import os
import time
import argparse
import numpy as np
os.environ["CUDA_VISIBLE_DEVICES"] = '0'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
tf.compat.v1.disable_eager_execution()
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
logging.set_verbosity(logging.INFO)
parser = argparse.ArgumentParser(description="MaskRCNN Dataloader Benchmark")
parser.add_argument(
'--data_dir', required=True, type=str, help="Directory path which contains the preprocessed DAGM 2007 dataset"
)
parser.add_argument(
'--batch_size', default=64, type=int, required=True, help="""Batch size used to measure performance."""
)
parser.add_argument(
'--warmup_steps',
default=200,
type=int,
required=True,
help="""Number of steps considered as warmup and not taken into account for performance measurements."""
)
parser.add_argument(
'--benchmark_steps',
default=200,
type=int,
required=True,
help="Number of steps used to benchmark dataloading performance. Only used in training"
)
parser.add_argument(
'--seed',
default=666,
type=int,
required=False,
help="""Reproducibility Seed."""
)
parser.add_argument("--training", default=False, action="store_true", help="Benchmark in training mode")
parser.add_argument("--use_synthetic_data", default=False, action="store_true", help="Use synthetic dataset")
FLAGS, unknown_args = parser.parse_known_args()
if len(unknown_args) > 0:
for bad_arg in unknown_args:
print("ERROR: Unknown command line arg: %s" % bad_arg)
raise ValueError("Invalid command line arg(s)")
BURNIN_STEPS = FLAGS.warmup_steps
if FLAGS.training:
TOTAL_STEPS = FLAGS.warmup_steps + FLAGS.benchmark_steps
else:
TOTAL_STEPS = int(1e6) # Wait for end of dataset
if FLAGS.training:
input_dataset = InputReader(
file_pattern=os.path.join(FLAGS.data_dir, "train*.tfrecord"),
mode=tf.estimator.ModeKeys.TRAIN,
use_fake_data=FLAGS.use_synthetic_data,
use_instance_mask=True,
seed=FLAGS.seed
)
else:
input_dataset = InputReader(
file_pattern=os.path.join(FLAGS.data_dir, "val*.tfrecord"),
mode=tf.estimator.ModeKeys.PREDICT,
num_examples=5000,
use_fake_data=FLAGS.use_synthetic_data,
use_instance_mask=True,
seed=FLAGS.seed
)
logging.info("[*] Executing Benchmark in %s mode" % ("training" if FLAGS.training else "inference"))
logging.info("[*] Benchmark using %s data" % ("synthetic" if FLAGS.use_synthetic_data else "real"))
time.sleep(1)
# Build the data input
dataset = input_dataset(
params={
"anchor_scale": 8.0,
"aspect_ratios": [[1.0, 1.0], [1.4, 0.7], [0.7, 1.4]],
"batch_size": FLAGS.batch_size,
"gt_mask_size": 112,
"image_size": [1024, 1024],
"include_groundtruth_in_features": False,
"augment_input_data": True,
"max_level": 6,
"min_level": 2,
"num_classes": 91,
"num_scales": 1,
"rpn_batch_size_per_im": 256,
"rpn_fg_fraction": 0.5,
"rpn_min_size": 0.,
"rpn_nms_threshold": 0.7,
"rpn_negative_overlap": 0.3,
"rpn_positive_overlap": 0.7,
"rpn_post_nms_topn": 1000,
"rpn_pre_nms_topn": 2000,
"skip_crowd_during_training": True,
"use_category": True,
"visualize_images_summary": False,
}
)
dataset_iterator = dataset.make_initializable_iterator()
if FLAGS.training:
X, Y = dataset_iterator.get_next()
else:
X = dataset_iterator.get_next()
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
config.log_device_placement = False
with tf.device("gpu:0"):
X_gpu_ops = list()
Y_gpu_ops = list()
if FLAGS.training:
for _, _x in X.items():
X_gpu_ops.append(tf.identity(_x))
for _, _y in Y.items():
Y_gpu_ops.append(tf.identity(_y))
else:
for _, _x in X["features"].items():
X_gpu_ops.append(tf.identity(_x))
with tf.control_dependencies(X_gpu_ops + Y_gpu_ops):
input_op = tf.constant(1.0)
with tf.compat.v1.Session(config=config) as sess:
sess.run(dataset_iterator.initializer)
sess.run(tf.compat.v1.global_variables_initializer())
total_files_processed = 0
img_per_sec_arr = []
processing_time_arr = []
processing_start_time = time.time()
for step in range(TOTAL_STEPS):
try:
start_time = time.time()
sess.run(input_op)
elapsed_time = (time.time() - start_time) * 1000
imgs_per_sec = (FLAGS.batch_size / elapsed_time) * 1000
total_files_processed += FLAGS.batch_size
if (step + 1) > BURNIN_STEPS:
processing_time_arr.append(elapsed_time)
img_per_sec_arr.append(imgs_per_sec)
if (step + 1) % 20 == 0 or (step + 1) == TOTAL_STEPS:
print(
"[STEP %04d] # Batch Size: %03d - Time: %03d msecs - Speed: %6d img/s" %
(step + 1, FLAGS.batch_size, elapsed_time, imgs_per_sec)
)
except tf.errors.OutOfRangeError:
break
processing_time = time.time() - processing_start_time
avg_processing_speed = np.mean(img_per_sec_arr)
print("\n###################################################################")
print("*** Data Loading Performance Metrics ***\n")
print("\t=> Number of Steps: %d" % (step + 1))
print("\t=> Batch Size: %d" % FLAGS.batch_size)
print("\t=> Files Processed: %d" % total_files_processed)
print("\t=> Total Execution Time: %d secs" % processing_time)
print("\t=> Median Time per step: %3d msecs" % np.median(processing_time_arr))
print("\t=> Median Processing Speed: %d images/secs" % np.median(img_per_sec_arr))
print("\t=> Median Processing Time: %.2f msecs/image" % (1 / float(np.median(img_per_sec_arr)) * 1000))
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Data loader and processing.
Defines input_fn of Mask-RCNN for TF Estimator. The input_fn includes training
data for category classification, bounding box regression, and number of
positive examples to normalize the loss during training.
"""
import tensorflow as tf
from mask_rcnn import anchors
from mask_rcnn.utils import coco_utils
from mask_rcnn.ops import preprocess_ops
from mask_rcnn.object_detection import tf_example_decoder
MAX_NUM_INSTANCES = 100
MAX_NUM_VERTICES_PER_INSTANCE = 1500
MAX_NUM_POLYGON_LIST_LEN = 2 * MAX_NUM_VERTICES_PER_INSTANCE * MAX_NUM_INSTANCES
POLYGON_PAD_VALUE = coco_utils.POLYGON_PAD_VALUE
__all__ = [
# dataset parser
"dataset_parser",
# common functions
"preprocess_image",
"process_groundtruth_is_crowd",
"process_source_id",
# eval
"prepare_labels_for_eval",
# training
"augment_image",
"process_boxes_classes_indices_for_training",
"process_gt_masks_for_training",
"process_labels_for_training",
"process_targets_for_training"
]
###############################################################################################################
def dataset_parser(value, mode, params, use_instance_mask, seed=None, regenerate_source_id=False):
"""Parse data to a fixed dimension input image and learning targets.
Args:
value: A dictionary contains an image and groundtruth annotations.
Returns:
features: a dictionary that contains the image and auxiliary
information. The following describes {key: value} pairs in the
dictionary.
image: Image tensor that is preproessed to have normalized value and
fixed dimension [image_size, image_size, 3]
image_info: image information that includes the original height and
width, the scale of the proccessed image to the original image, and
the scaled height and width.
source_ids: Source image id. Default value -1 if the source id is
empty in the groundtruth annotation.
labels: a dictionary that contains auxiliary information plus (optional)
labels. The following describes {key: value} pairs in the dictionary.
`labels` is only for training.
score_targets_dict: ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with
shape [height_l, width_l, num_anchors]. The height_l and width_l
represent the dimension of objectiveness score at l-th level.
box_targets_dict: ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with
shape [height_l, width_l, num_anchors * 4]. The height_l and
width_l represent the dimension of bounding box regression output at
l-th level.
gt_boxes: Groundtruth bounding box annotations. The box is represented
in [y1, x1, y2, x2] format. The tennsor is padded with -1 to the
fixed dimension [MAX_NUM_INSTANCES, 4].
gt_classes: Groundtruth classes annotations. The tennsor is padded
with -1 to the fixed dimension [MAX_NUM_INSTANCES].
cropped_gt_masks: groundtrugh masks cropped by the bounding box and
resized to a fixed size determined by params['gt_mask_size']
regenerate_source_id: `bool`, if True TFExampleParser will use hashed
value of `image/encoded` for `image/source_id`.
"""
if mode not in [tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.PREDICT, tf.estimator.ModeKeys.EVAL]:
raise ValueError("Unknown execution mode received: %s" % mode)
def create_example_decoder():
return tf_example_decoder.TfExampleDecoder(
use_instance_mask=use_instance_mask,
regenerate_source_id=regenerate_source_id
)
example_decoder = create_example_decoder()
with tf.xla.experimental.jit_scope(compile_ops=True):
with tf.name_scope('parser'):
data = example_decoder.decode(value)
data['groundtruth_is_crowd'] = process_groundtruth_is_crowd(data)
image = tf.image.convert_image_dtype(data['image'], dtype=tf.float32)
source_id = process_source_id(data['source_id'])
if mode == tf.estimator.ModeKeys.PREDICT:
features = {
'source_ids': source_id,
}
if params['visualize_images_summary']:
features['orig_images'] = tf.image.resize(image, params['image_size'])
features["images"], features["image_info"], _, _ = preprocess_image(
image,
boxes=None,
instance_masks=None,
image_size=params['image_size'],
max_level=params['max_level'],
augment_input_data=False,
seed=seed
)
if params['include_groundtruth_in_features']:
labels = prepare_labels_for_eval(
data,
target_num_instances=MAX_NUM_INSTANCES,
target_polygon_list_len=MAX_NUM_POLYGON_LIST_LEN,
use_instance_mask=params['include_mask']
)
return {'features': features, 'labels': labels}
else:
return {'features': features}
elif mode == tf.estimator.ModeKeys.TRAIN:
labels = {}
features = {
'source_ids': source_id
}
boxes, classes, indices, instance_masks = process_boxes_classes_indices_for_training(
data,
skip_crowd_during_training=params['skip_crowd_during_training'],
use_category=params['use_category'],
use_instance_mask=use_instance_mask
)
image, image_info, boxes, instance_masks = preprocess_image(
image,
boxes=boxes,
instance_masks=instance_masks,
image_size=params['image_size'],
max_level=params['max_level'],
augment_input_data=params['augment_input_data'],
seed=seed
)
features.update({
'images': image,
'image_info': image_info,
})
padded_image_size = image.get_shape().as_list()[:2]
# Pads cropped_gt_masks.
if use_instance_mask:
labels['cropped_gt_masks'] = process_gt_masks_for_training(
instance_masks,
boxes,
gt_mask_size=params['gt_mask_size'],
padded_image_size=padded_image_size,
max_num_instances=MAX_NUM_INSTANCES
)
with tf.xla.experimental.jit_scope(compile_ops=False):
# Assign anchors.
(score_targets, box_targets), input_anchor = process_targets_for_training(
padded_image_size=padded_image_size,
boxes=boxes,
classes=classes,
params=params
)
additional_labels = process_labels_for_training(
image_info, boxes, classes, score_targets, box_targets,
max_num_instances=MAX_NUM_INSTANCES,
min_level=params["min_level"],
max_level=params["max_level"]
)
labels.update(additional_labels)
# labels["input_anchor"] = input_anchor
# Features
# {
# 'source_ids': <tf.Tensor 'parser/StringToNumber:0' shape=() dtype=float32>,
# 'images': <tf.Tensor 'parser/pad_to_bounding_box/Squeeze:0' shape=(1024, 1024, 3) dtype=float32>,
# 'image_info': <tf.Tensor 'parser/stack_1:0' shape=(5,) dtype=float32>
# }
FAKE_FEATURES = False
if FAKE_FEATURES:
labels["source_ids"] = tf.ones(shape=(), dtype=tf.float32)
labels["images"] = tf.ones(shape=(1024, 1024, 3), dtype=tf.float32)
labels["image_info"] = tf.ones(shape=(5,), dtype=tf.float32)
# Labels
# {
# 'cropped_gt_masks': <tf.Tensor 'parser/Reshape_4:0' shape=(100, 116, 116) dtype=float32>,
# 'score_targets_2': <tf.Tensor 'parser/Reshape_9:0' shape=(256, 256, 3) dtype=int32>,
# 'box_targets_2': <tf.Tensor 'parser/Reshape_14:0' shape=(256, 256, 12) dtype=float32>,
# 'score_targets_3': <tf.Tensor 'parser/Reshape_10:0' shape=(128, 128, 3) dtype=int32>,
# 'box_targets_3': <tf.Tensor 'parser/Reshape_15:0' shape=(128, 128, 12) dtype=float32>,
# 'score_targets_4': <tf.Tensor 'parser/Reshape_11:0' shape=(64, 64, 3) dtype=int32>,
# 'box_targets_4': <tf.Tensor 'parser/Reshape_16:0' shape=(64, 64, 12) dtype=float32>,
# 'score_targets_5': <tf.Tensor 'parser/Reshape_12:0' shape=(32, 32, 3) dtype=int32>,
# 'box_targets_5': <tf.Tensor 'parser/Reshape_17:0' shape=(32, 32, 12) dtype=float32>,
# 'score_targets_6': <tf.Tensor 'parser/Reshape_13:0' shape=(16, 16, 3) dtype=int32>,
# 'box_targets_6': <tf.Tensor 'parser/Reshape_18:0' shape=(16, 16, 12) dtype=float32>,
# 'gt_boxes': <tf.Tensor 'parser/Reshape_20:0' shape=(100, 4) dtype=float32>,
# 'gt_classes': <tf.Tensor 'parser/Reshape_22:0' shape=(100, 1) dtype=float32>
# }
FAKE_LABELS = False
if FAKE_LABELS:
labels["cropped_gt_masks"] = tf.ones(shape=(100, 116, 116), dtype=tf.float32)
labels["gt_boxes"] = tf.ones(shape=(100, 4), dtype=tf.float32)
labels["gt_classes"] = tf.ones(shape=(100, 1), dtype=tf.float32)
idx = 1
for dim in [256, 128, 64, 32, 16]:
idx += 1 # Starts at 2
labels["score_targets_%d" % idx] = tf.ones(shape=(dim, dim, 3), dtype=tf.float32)
labels["box_targets_%d" % idx] = tf.ones(shape=(dim, dim, 12), dtype=tf.float32)
return features, labels
###############################################################################################################
# common functions
def preprocess_image(image, boxes, instance_masks, image_size, max_level, augment_input_data=False, seed=None):
image = preprocess_ops.normalize_image(image)
if augment_input_data:
image, boxes, instance_masks = augment_image(image=image, boxes=boxes, instance_masks=instance_masks, seed=seed)
# Scaling and padding.
image, image_info, boxes, instance_masks = preprocess_ops.resize_and_pad(
image=image,
target_size=image_size,
stride=2 ** max_level,
boxes=boxes,
masks=instance_masks
)
return image, image_info, boxes, instance_masks
def process_groundtruth_is_crowd(data):
return tf.cond(
pred=tf.greater(tf.size(input=data['groundtruth_is_crowd']), 0),
true_fn=lambda: data['groundtruth_is_crowd'],
false_fn=lambda: tf.zeros_like(data['groundtruth_classes'], dtype=tf.bool)
)
# def process_source_id(data):
# source_id = tf.where(tf.equal(source_id, tf.constant('')), '-1', source_id)
# source_id = tf.strings.to_number(source_id)
# return source_id
def process_source_id(source_id):
"""Processes source_id to the right format."""
if source_id.dtype == tf.string:
source_id = tf.cast(tf.strings.to_number(source_id), tf.int64)
with tf.control_dependencies([source_id]):
source_id = tf.cond(
tf.equal(tf.size(source_id), 0),
lambda: tf.cast(tf.constant(-1), tf.int64),
lambda: tf.identity(source_id)
)
return source_id
# eval
def prepare_labels_for_eval(
data,
target_num_instances=MAX_NUM_INSTANCES,
target_polygon_list_len=MAX_NUM_POLYGON_LIST_LEN,
use_instance_mask=False
):
"""Create labels dict for infeed from data of tf.Example."""
image = data['image']
height, width = tf.shape(input=image)[:2]
boxes = data['groundtruth_boxes']
classes = tf.cast(data['groundtruth_classes'], dtype=tf.float32)
num_labels = tf.shape(input=classes)[0]
boxes = preprocess_ops.pad_to_fixed_size(boxes, -1, [target_num_instances, 4])
classes = preprocess_ops.pad_to_fixed_size(classes, -1, [target_num_instances, 1])
is_crowd = tf.cast(data['groundtruth_is_crowd'], dtype=tf.float32)
is_crowd = preprocess_ops.pad_to_fixed_size(is_crowd, 0, [target_num_instances, 1])
labels = dict()
labels['width'] = width
labels['height'] = height
labels['groundtruth_boxes'] = boxes
labels['groundtruth_classes'] = classes
labels['num_groundtruth_labels'] = num_labels
labels['groundtruth_is_crowd'] = is_crowd
if use_instance_mask:
data['groundtruth_polygons'] = preprocess_ops.pad_to_fixed_size(
data=data['groundtruth_polygons'],
pad_value=POLYGON_PAD_VALUE,
output_shape=[target_polygon_list_len, 1]
)
if 'groundtruth_area' in data:
labels['groundtruth_area'] = preprocess_ops.pad_to_fixed_size(
data=labels['groundtruth_area'],
pad_value=0,
output_shape=[target_num_instances, 1]
)
return labels
# training
def augment_image(image, boxes, instance_masks, seed):
flipped_results = preprocess_ops.random_horizontal_flip(
image,
boxes=boxes,
masks=instance_masks,
seed=seed
)
if instance_masks is not None:
image, boxes, instance_masks = flipped_results
else:
image, boxes = flipped_results
# image = tf.image.random_brightness(image, max_delta=0.1, seed=seed)
# image = tf.image.random_contrast(image, lower=0.9, upper=1.1, seed=seed)
# image = tf.image.random_saturation(image, lower=0.9, upper=1.1, seed=seed)
# image = tf.image.random_jpeg_quality(image, min_jpeg_quality=80, max_jpeg_quality=100, seed=seed)
return image, boxes, instance_masks
def process_boxes_classes_indices_for_training(data, skip_crowd_during_training, use_category, use_instance_mask):
boxes = data['groundtruth_boxes']
classes = data['groundtruth_classes']
classes = tf.reshape(tf.cast(classes, dtype=tf.float32), [-1, 1])
indices = None
instance_masks = None
if not use_category:
classes = tf.cast(tf.greater(classes, 0), dtype=tf.float32)
if skip_crowd_during_training:
indices = tf.where(tf.logical_not(data['groundtruth_is_crowd']))
classes = tf.gather_nd(classes, indices)
boxes = tf.gather_nd(boxes, indices)
if use_instance_mask:
instance_masks = tf.gather_nd(data['groundtruth_instance_masks'], indices)
return boxes, classes, indices, instance_masks
def process_gt_masks_for_training(instance_masks, boxes, gt_mask_size, padded_image_size, max_num_instances):
cropped_gt_masks = preprocess_ops.crop_gt_masks(
instance_masks=instance_masks,
boxes=boxes,
gt_mask_size=gt_mask_size,
image_size=padded_image_size
)
# cropped_gt_masks = tf.reshape(cropped_gt_masks, [max_num_instances, -1])
cropped_gt_masks = preprocess_ops.pad_to_fixed_size(
data=cropped_gt_masks,
pad_value=-1,
output_shape=[max_num_instances, (gt_mask_size + 4) ** 2]
)
return tf.reshape(cropped_gt_masks, [max_num_instances, gt_mask_size + 4, gt_mask_size + 4])
def process_labels_for_training(
image_info, boxes, classes,
score_targets, box_targets,
max_num_instances, min_level, max_level
):
labels = {}
# Pad groundtruth data.
# boxes *= image_info[2]
boxes = preprocess_ops.pad_to_fixed_size(boxes, -1, [max_num_instances, 4])
classes = preprocess_ops.pad_to_fixed_size(classes, -1, [max_num_instances, 1])
for level in range(min_level, max_level + 1):
labels['score_targets_%d' % level] = score_targets[level]
labels['box_targets_%d' % level] = box_targets[level]
labels['gt_boxes'] = boxes
labels['gt_classes'] = classes
return labels
def process_targets_for_training(padded_image_size, boxes, classes, params):
input_anchors = anchors.Anchors(
params['min_level'],
params['max_level'],
params['num_scales'],
params['aspect_ratios'],
params['anchor_scale'],
padded_image_size
)
anchor_labeler = anchors.AnchorLabeler(
input_anchors,
params['num_classes'],
params['rpn_positive_overlap'],
params['rpn_negative_overlap'],
params['rpn_batch_size_per_im'],
params['rpn_fg_fraction']
)
return anchor_labeler.label_anchors(boxes, classes), input_anchors
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment