Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
dcuai
dlexamples
Commits
c320b6ef
Commit
c320b6ef
authored
Apr 15, 2022
by
zhenyi
Browse files
tf2 detection
parent
0fc002df
Changes
195
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
3141 additions
and
0 deletions
+3141
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/.idea/.gitignore
...rFlow2x/ComputeVision/Detection/MaskRCNN/.idea/.gitignore
+3
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/.idea/MaskRCNN.iml
...low2x/ComputeVision/Detection/MaskRCNN/.idea/MaskRCNN.iml
+12
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/.idea/inspectionProfiles/profiles_settings.xml
...n/MaskRCNN/.idea/inspectionProfiles/profiles_settings.xml
+6
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/.idea/misc.xml
TensorFlow2x/ComputeVision/Detection/MaskRCNN/.idea/misc.xml
+4
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/.idea/modules.xml
...Flow2x/ComputeVision/Detection/MaskRCNN/.idea/modules.xml
+8
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/1.py
TensorFlow2x/ComputeVision/Detection/MaskRCNN/1.py
+21
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/Dockerfile
TensorFlow2x/ComputeVision/Detection/MaskRCNN/Dockerfile
+57
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/LICENSE
TensorFlow2x/ComputeVision/Detection/MaskRCNN/LICENSE
+201
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/README.md
TensorFlow2x/ComputeVision/Detection/MaskRCNN/README.md
+82
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/README.md-org
TensorFlow2x/ComputeVision/Detection/MaskRCNN/README.md-org
+637
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/dataset/create_coco_tf_record.py
...ision/Detection/MaskRCNN/dataset/create_coco_tf_record.py
+336
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/dataset/download_and_preprocess_coco.sh
...etection/MaskRCNN/dataset/download_and_preprocess_coco.sh
+129
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/download_and_process_pretrained_weights.sh
...ction/MaskRCNN/download_and_process_pretrained_weights.sh
+73
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/images/MaskRCNN_TF1_conv.png
...uteVision/Detection/MaskRCNN/images/MaskRCNN_TF1_conv.png
+0
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/images/MaskRCNN_TF2_conv.png
...uteVision/Detection/MaskRCNN/images/MaskRCNN_TF2_conv.png
+0
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/__init__.py
...2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/__init__.py
+0
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/anchors.py
...w2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/anchors.py
+289
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/coco_metric.py
...ComputeVision/Detection/MaskRCNN/mask_rcnn/coco_metric.py
+351
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/dataloader.py
.../ComputeVision/Detection/MaskRCNN/mask_rcnn/dataloader.py
+466
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/dataloader_utils.py
...teVision/Detection/MaskRCNN/mask_rcnn/dataloader_utils.py
+466
-0
No files found.
TensorFlow2x/ComputeVision/Detection/MaskRCNN/.idea/.gitignore
0 → 100644
View file @
c320b6ef
# Default ignored files
/shelf/
/workspace.xml
TensorFlow2x/ComputeVision/Detection/MaskRCNN/.idea/MaskRCNN.iml
0 → 100644
View file @
c320b6ef
<?xml version="1.0" encoding="UTF-8"?>
<module
type=
"PYTHON_MODULE"
version=
"4"
>
<component
name=
"NewModuleRootManager"
>
<content
url=
"file://$MODULE_DIR$"
/>
<orderEntry
type=
"inheritedJdk"
/>
<orderEntry
type=
"sourceFolder"
forTests=
"false"
/>
</component>
<component
name=
"PyDocumentationSettings"
>
<option
name=
"format"
value=
"PLAIN"
/>
<option
name=
"myDocStringFormat"
value=
"Plain"
/>
</component>
</module>
\ No newline at end of file
TensorFlow2x/ComputeVision/Detection/MaskRCNN/.idea/inspectionProfiles/profiles_settings.xml
0 → 100644
View file @
c320b6ef
<component
name=
"InspectionProjectProfileManager"
>
<settings>
<option
name=
"USE_PROJECT_PROFILE"
value=
"false"
/>
<version
value=
"1.0"
/>
</settings>
</component>
\ No newline at end of file
TensorFlow2x/ComputeVision/Detection/MaskRCNN/.idea/misc.xml
0 → 100644
View file @
c320b6ef
<?xml version="1.0" encoding="UTF-8"?>
<project
version=
"4"
>
<component
name=
"ProjectRootManager"
version=
"2"
project-jdk-name=
"Python 3.9"
project-jdk-type=
"Python SDK"
/>
</project>
\ No newline at end of file
TensorFlow2x/ComputeVision/Detection/MaskRCNN/.idea/modules.xml
0 → 100644
View file @
c320b6ef
<?xml version="1.0" encoding="UTF-8"?>
<project
version=
"4"
>
<component
name=
"ProjectModuleManager"
>
<modules>
<module
fileurl=
"file://$PROJECT_DIR$/.idea/MaskRCNN.iml"
filepath=
"$PROJECT_DIR$/.idea/MaskRCNN.iml"
/>
</modules>
</component>
</project>
\ No newline at end of file
TensorFlow2x/ComputeVision/Detection/MaskRCNN/1.py
0 → 100644
View file @
c320b6ef
import
torch
import
torchvision
print
(
torch
.
__version__
)
print
(
torchvision
.
__version__
)
print
(
torch
.
cuda
.
is_available
())
print
(
torch
.
cuda
.
device_count
())
print
(
torch
.
cuda
.
get_device_name
(
0
))
a
=
torch
.
Tensor
([[
1
,
1
,
2
,
2
],[
1
,
1
,
3.100001
,
3
],[
1
,
1
,
3.1
,
3
]])
b
=
torch
.
Tensor
([
0.9
,
0.98
,
0.980005
])
from
torchvision.ops
import
nms
ccc
=
nms
(
a
,
b
,
0.4
)
print
(
ccc
)
print
(
a
[
ccc
])
\ No newline at end of file
TensorFlow2x/ComputeVision/Detection/MaskRCNN/Dockerfile
0 → 100644
View file @
c320b6ef
#===============================================================================
#
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# ==============================================================================
ARG
FROM_IMAGE_NAME=nvcr.io/nvidia/tensorflow:20.06-tf1-py3
FROM
${FROM_IMAGE_NAME}
ENV
DEBIAN_FRONTEND=noninteractive
RUN
rm
-rf
/workspace
&&
mkdir
-p
/workspace
ADD
. /workspace
WORKDIR
/workspace
RUN
apt-get update
&&
\
apt-get
install
-y
libsm6 libxext6 libxrender-dev python3-tk cmake
&&
\
apt-get clean
&&
\
rm
-rf
/var/lib/apt/lists/
*
# Make sure python and pip points to pip3 and python3
RUN
python
-m
pip
install
--upgrade
pip
&&
\
pip
--no-cache-dir
--no-cache
install
\
Cython
\
matplotlib
\
opencv-python-headless
\
mpi4py
\
Pillow
\
pytest
\
pyyaml
&&
\
git clone https://github.com/pybind/pybind11 /opt/pybind11
&&
\
cd
/opt/pybind11
&&
cmake
.
&&
make
install
&&
pip
install
.
&&
\
pip
--no-cache-dir
--no-cache
install
\
'git+https://github.com/NVIDIA/cocoapi#egg=pycocotools&subdirectory=PythonAPI'
&&
\
pip
--no-cache-dir
--no-cache
install
\
'git+https://github.com/NVIDIA/dllogger'
# Update protobuf 3 to 3.3.0
RUN
\
curl
-OL
https://github.com/protocolbuffers/protobuf/releases/download/v3.3.0/protoc-3.3.0-linux-x86_64.zip
&&
\
unzip
-u
protoc-3.3.0-linux-x86_64.zip
-d
protoc3
&&
\
mv
protoc3/bin/
*
/usr/local/bin/
&&
\
mv
protoc3/include/
*
/usr/local/include/
TensorFlow2x/ComputeVision/Detection/MaskRCNN/LICENSE
0 → 100644
View file @
c320b6ef
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright 2019 NVIDIA Corporation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
\ No newline at end of file
TensorFlow2x/ComputeVision/Detection/MaskRCNN/README.md
0 → 100644
View file @
c320b6ef
# 简介
*
Tensorflow训练Mask R-CNN模型
<br>
# 环境准备
## 1)安装工具包
*
rocm3.3环境安装tensorflow1.15
*
安装pycocotools
pip3 install pycocotools -i http://pypi.douban.com/simple/ --trusted-host pypi.douban.com
*
更新pandas
pip3 install -U pandas -i http://pypi.douban.com/simple/ --trusted-host pypi.douban.com
*
安装dllogger
git clone --recursive https://github.com/NVIDIA/dllogger.git
python3 setup.py install
<br>
## 2)数据处理(train 和 val)
```
cd dataset/
git clone http://github.com/tensorflow/models tf-models
cd tf-models/research
wget -O protobuf.zip https://github.com/google/protobuf/releases/download/v3.0.0/protoc-3.0.0-linux-x86_64.zip protobuf.zip
unzip protobuf.zip
./bin/protoc object_detection/protos/.proto --python_out=.
```
返回dataset目录
vim create_coco_tf_record.py
注释掉310 316行
<br>
```
PYTHONPATH="tf-models:tf-models/research" python3 create_coco_tf_record.py \
--logtostderr \
--include_masks \
--train_image_dir=/path/to/COCO2017/images/train2017 \
--val_image_dir=/path/to/COCO2017/images/val2017 \
--train_object_annotations_file=/path/to/COCO2017/annotations/instances_train2017.json \
--val_object_annotations_file=/path/to/COCO2017/annotations/instances_val2017.json \
--train_caption_annotations_file=/path/to/COCO2017/annotations/captions_train2017.json \
--val_caption_annotations_file=/path/to/COCO2017/annotations/captions_val2017.json \
--output_dir=coco2017_tfrecord
```
生成coco2017_tfrecord文件夹
## 3)预训练模型下载
<br>
生成的模型文件结构如下:
```
weights/
>mask-rcnn/1555659850/
https://storage.googleapis.com/cloud-tpu-checkpoints/mask-rcnn/1555659850/saved_model.pb
>>variables/
https://storage.googleapis.com/cloud-tpu-checkpoints/mask-rcnn/1555659850/variables/variables.data-00000-of-00001
https://storage.googleapis.com/cloud-tpu-checkpoints/mask-rcnn/1555659850/variables/variables.index
>resnet/
>>extracted_from_maskrcnn/
>>resnet-nhwc-2018-02-07/
https://storage.googleapis.com/cloud-tpu-checkpoints/retinanet/resnet50-checkpoint-2018-02-07/checkpoint
>>>model.ckpt-112603/
https://storage.googleapis.com/cloud-tpu-checkpoints/retinanet/resnet50-checkpoint-2018-02-07/model.ckpt-112603.data-00000-of-00001
https://storage.googleapis.com/cloud-tpu-checkpoints/retinanet/resnet50-checkpoint-2018-02-07/model.ckpt-112603.index
https://storage.googleapis.com/cloud-tpu-checkpoints/retinanet/resnet50-checkpoint-2018-02-07/model.ckpt-112603.meta
>>resnet-nhwc-2018-10-14/
```
# 测试
## 单卡训练
```
python3 scripts/benchmark_training.py --gpus {1,4,8} --batch_size {2,4}
python3 scripts/benchmark_training.py --gpus 1 --batch_size 2 --model_dir save_model --data_dir /public/home/tianlh/AI-application/Tensorflow/MaskRCNN_tf2/dataset/coco2017_tfrecord --weights_dir weights
```
## 多卡训练
```
python3 scripts/benchmark_training.py --gpus 2 --batch_size 4 --model_dir save_model_2dcu --data_dir /public/home/tianlh/AI-application/Tensorflow/MaskRCNN_tf2/dataset/coco2017_tfrecord --weights_dir weights
```
## 推理
```
python3 scripts/benchmark_inference.py --batch_size 2 --model_dir save_model --data_dir /public/home/tianlh/AI-application/Tensorflow/MaskRCNN_tf2/dataset/coco2017_tfrecord --weights_dir weights
```
# 参考资料
[
https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow2/Segmentation/MaskRCNN
](
https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow2/Segmentation/MaskRCNN
)
\ No newline at end of file
TensorFlow2x/ComputeVision/Detection/MaskRCNN/README.md-org
0 → 100644
View file @
c320b6ef
This diff is collapsed.
Click to expand it.
TensorFlow2x/ComputeVision/Detection/MaskRCNN/dataset/create_coco_tf_record.py
0 → 100644
View file @
c320b6ef
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r
"""Convert raw COCO dataset to TFRecord for object_detection.
Example usage:
python create_coco_tf_record.py --logtostderr \
--train_image_dir="${TRAIN_IMAGE_DIR}" \
--val_image_dir="${VAL_IMAGE_DIR}" \
--test_image_dir="${TEST_IMAGE_DIR}" \
--train_annotations_file="${TRAIN_ANNOTATIONS_FILE}" \
--val_annotations_file="${VAL_ANNOTATIONS_FILE}" \
--testdev_annotations_file="${TESTDEV_ANNOTATIONS_FILE}" \
--output_dir="${OUTPUT_DIR}"
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
collections
import
hashlib
import
io
import
json
import
multiprocessing
import
os
from
absl
import
app
from
absl
import
flags
import
numpy
as
np
import
PIL.Image
from
pycocotools
import
mask
from
research.object_detection.utils
import
dataset_util
from
research.object_detection.utils
import
label_map_util
import
tensorflow
as
tf
flags
.
DEFINE_boolean
(
'include_masks'
,
False
,
'Whether to include instance segmentations masks '
'(PNG encoded) in the result. default: False.'
)
flags
.
DEFINE_string
(
'train_image_dir'
,
''
,
'Training image directory.'
)
flags
.
DEFINE_string
(
'val_image_dir'
,
''
,
'Validation image directory.'
)
flags
.
DEFINE_string
(
'test_image_dir'
,
''
,
'Test image directory.'
)
flags
.
DEFINE_string
(
'train_object_annotations_file'
,
''
,
''
)
flags
.
DEFINE_string
(
'val_object_annotations_file'
,
''
,
''
)
flags
.
DEFINE_string
(
'train_caption_annotations_file'
,
''
,
''
)
flags
.
DEFINE_string
(
'val_caption_annotations_file'
,
''
,
''
)
flags
.
DEFINE_string
(
'testdev_annotations_file'
,
''
,
'Test-dev annotations JSON file.'
)
flags
.
DEFINE_string
(
'output_dir'
,
'/tmp/'
,
'Output data directory.'
)
FLAGS
=
flags
.
FLAGS
tf
.
compat
.
v1
.
logging
.
set_verbosity
(
tf
.
compat
.
v1
.
logging
.
INFO
)
def
create_tf_example
(
image
,
bbox_annotations
,
caption_annotations
,
image_dir
,
category_index
,
include_masks
=
False
):
"""Converts image and annotations to a tf.Example proto.
Args:
image: dict with keys:
[u'license', u'file_name', u'coco_url', u'height', u'width',
u'date_captured', u'flickr_url', u'id']
bbox_annotations:
list of dicts with keys:
[u'segmentation', u'area', u'iscrowd', u'image_id',
u'bbox', u'category_id', u'id']
Notice that bounding box coordinates in the official COCO dataset are
given as [x, y, width, height] tuples using absolute coordinates where
x, y represent the top-left (0-indexed) corner. This function converts
to the format expected by the Tensorflow Object Detection API (which is
which is [ymin, xmin, ymax, xmax] with coordinates normalized relative
to image size).
image_dir: directory containing the image files.
category_index: a dict containing COCO category information keyed
by the 'id' field of each category. See the
label_map_util.create_category_index function.
include_masks: Whether to include instance segmentations masks
(PNG encoded) in the result. default: False.
Returns:
example: The converted tf.Example
num_annotations_skipped: Number of (invalid) annotations that were ignored.
Raises:
ValueError: if the image pointed to by data['filename'] is not a valid JPEG
"""
image_height
=
image
[
'height'
]
image_width
=
image
[
'width'
]
filename
=
image
[
'file_name'
]
image_id
=
image
[
'id'
]
full_path
=
os
.
path
.
join
(
image_dir
,
filename
)
with
tf
.
io
.
gfile
.
GFile
(
full_path
,
'rb'
)
as
fid
:
encoded_jpg
=
fid
.
read
()
encoded_jpg_io
=
io
.
BytesIO
(
encoded_jpg
)
image
=
PIL
.
Image
.
open
(
encoded_jpg_io
)
key
=
hashlib
.
sha256
(
encoded_jpg
).
hexdigest
()
xmin
=
[]
xmax
=
[]
ymin
=
[]
ymax
=
[]
is_crowd
=
[]
category_names
=
[]
category_ids
=
[]
area
=
[]
encoded_mask_png
=
[]
num_annotations_skipped
=
0
for
object_annotations
in
bbox_annotations
:
(
x
,
y
,
width
,
height
)
=
tuple
(
object_annotations
[
'bbox'
])
if
width
<=
0
or
height
<=
0
:
num_annotations_skipped
+=
1
continue
if
x
+
width
>
image_width
or
y
+
height
>
image_height
:
num_annotations_skipped
+=
1
continue
xmin
.
append
(
float
(
x
)
/
image_width
)
xmax
.
append
(
float
(
x
+
width
)
/
image_width
)
ymin
.
append
(
float
(
y
)
/
image_height
)
ymax
.
append
(
float
(
y
+
height
)
/
image_height
)
is_crowd
.
append
(
object_annotations
[
'iscrowd'
])
category_id
=
int
(
object_annotations
[
'category_id'
])
category_ids
.
append
(
category_id
)
category_names
.
append
(
category_index
[
category_id
][
'name'
].
encode
(
'utf8'
))
area
.
append
(
object_annotations
[
'area'
])
if
include_masks
:
run_len_encoding
=
mask
.
frPyObjects
(
object_annotations
[
'segmentation'
],
image_height
,
image_width
)
binary_mask
=
mask
.
decode
(
run_len_encoding
)
if
not
object_annotations
[
'iscrowd'
]:
binary_mask
=
np
.
amax
(
binary_mask
,
axis
=
2
)
pil_image
=
PIL
.
Image
.
fromarray
(
binary_mask
)
output_io
=
io
.
BytesIO
()
pil_image
.
save
(
output_io
,
format
=
'PNG'
)
encoded_mask_png
.
append
(
output_io
.
getvalue
())
captions
=
[]
for
caption_annotation
in
caption_annotations
:
captions
.
append
(
caption_annotation
[
'caption'
].
encode
(
'utf8'
))
feature_dict
=
{
'image/height'
:
dataset_util
.
int64_feature
(
image_height
),
'image/width'
:
dataset_util
.
int64_feature
(
image_width
),
'image/filename'
:
dataset_util
.
bytes_feature
(
filename
.
encode
(
'utf8'
)),
'image/source_id'
:
dataset_util
.
bytes_feature
(
str
(
image_id
).
encode
(
'utf8'
)),
'image/key/sha256'
:
dataset_util
.
bytes_feature
(
key
.
encode
(
'utf8'
)),
'image/encoded'
:
dataset_util
.
bytes_feature
(
encoded_jpg
),
'image/caption'
:
dataset_util
.
bytes_list_feature
(
captions
),
'image/format'
:
dataset_util
.
bytes_feature
(
'jpeg'
.
encode
(
'utf8'
)),
'image/object/bbox/xmin'
:
dataset_util
.
float_list_feature
(
xmin
),
'image/object/bbox/xmax'
:
dataset_util
.
float_list_feature
(
xmax
),
'image/object/bbox/ymin'
:
dataset_util
.
float_list_feature
(
ymin
),
'image/object/bbox/ymax'
:
dataset_util
.
float_list_feature
(
ymax
),
'image/object/class/text'
:
dataset_util
.
bytes_list_feature
(
category_names
),
'image/object/class/label'
:
dataset_util
.
int64_list_feature
(
category_ids
),
'image/object/is_crowd'
:
dataset_util
.
int64_list_feature
(
is_crowd
),
'image/object/area'
:
dataset_util
.
float_list_feature
(
area
),
}
if
include_masks
:
feature_dict
[
'image/object/mask'
]
=
(
dataset_util
.
bytes_list_feature
(
encoded_mask_png
))
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
feature_dict
))
return
key
,
example
,
num_annotations_skipped
def
_pool_create_tf_example
(
args
):
return
create_tf_example
(
*
args
)
def
_load_object_annotations
(
object_annotations_file
):
with
tf
.
io
.
gfile
.
GFile
(
object_annotations_file
,
'r'
)
as
fid
:
obj_annotations
=
json
.
load
(
fid
)
images
=
obj_annotations
[
'images'
]
category_index
=
label_map_util
.
create_category_index
(
obj_annotations
[
'categories'
])
img_to_obj_annotation
=
collections
.
defaultdict
(
list
)
tf
.
compat
.
v1
.
logging
.
info
(
'Building bounding box index.'
)
for
annotation
in
obj_annotations
[
'annotations'
]:
image_id
=
annotation
[
'image_id'
]
img_to_obj_annotation
[
image_id
].
append
(
annotation
)
missing_annotation_count
=
0
for
image
in
images
:
image_id
=
image
[
'id'
]
if
image_id
not
in
img_to_obj_annotation
:
missing_annotation_count
+=
1
tf
.
compat
.
v1
.
logging
.
info
(
'%d images are missing bboxes.'
,
missing_annotation_count
)
return
images
,
img_to_obj_annotation
,
category_index
def
_load_caption_annotations
(
caption_annotations_file
):
with
tf
.
io
.
gfile
.
GFile
(
caption_annotations_file
,
'r'
)
as
fid
:
caption_annotations
=
json
.
load
(
fid
)
img_to_caption_annotation
=
collections
.
defaultdict
(
list
)
tf
.
compat
.
v1
.
logging
.
info
(
'Building caption index.'
)
for
annotation
in
caption_annotations
[
'annotations'
]:
image_id
=
annotation
[
'image_id'
]
img_to_caption_annotation
[
image_id
].
append
(
annotation
)
missing_annotation_count
=
0
images
=
caption_annotations
[
'images'
]
for
image
in
images
:
image_id
=
image
[
'id'
]
if
image_id
not
in
img_to_caption_annotation
:
missing_annotation_count
+=
1
tf
.
compat
.
v1
.
logging
.
info
(
'%d images are missing captions.'
,
missing_annotation_count
)
return
img_to_caption_annotation
def
_create_tf_record_from_coco_annotations
(
object_annotations_file
,
caption_annotations_file
,
image_dir
,
output_path
,
include_masks
,
num_shards
):
"""Loads COCO annotation json files and converts to tf.Record format.
Args:
object_annotations_file: JSON file containing bounding box annotations.
caption_annotations_file: JSON file containing caption annotations.
image_dir: Directory containing the image files.
output_path: Path to output tf.Record file.
include_masks: Whether to include instance segmentations masks
(PNG encoded) in the result. default: False.
num_shards: Number of output files to create.
"""
tf
.
compat
.
v1
.
logging
.
info
(
'writing to output path: %s'
,
output_path
)
writers
=
[
tf
.
io
.
TFRecordWriter
(
output_path
+
'-%05d-of-%05d.tfrecord'
%
(
i
,
num_shards
))
for
i
in
range
(
num_shards
)
]
images
,
img_to_obj_annotation
,
category_index
=
(
_load_object_annotations
(
object_annotations_file
))
img_to_caption_annotation
=
(
_load_caption_annotations
(
caption_annotations_file
))
pool
=
multiprocessing
.
Pool
()
total_num_annotations_skipped
=
0
for
idx
,
(
_
,
tf_example
,
num_annotations_skipped
)
in
enumerate
(
pool
.
imap
(
_pool_create_tf_example
,
[(
image
,
img_to_obj_annotation
[
image
[
'id'
]],
img_to_caption_annotation
[
image
[
'id'
]],
image_dir
,
category_index
,
include_masks
)
for
image
in
images
])):
if
idx
%
100
==
0
:
tf
.
compat
.
v1
.
logging
.
info
(
'On image %d of %d'
,
idx
,
len
(
images
))
total_num_annotations_skipped
+=
num_annotations_skipped
writers
[
idx
%
num_shards
].
write
(
tf_example
.
SerializeToString
())
pool
.
close
()
pool
.
join
()
for
writer
in
writers
:
writer
.
close
()
tf
.
compat
.
v1
.
logging
.
info
(
'Finished writing, skipped %d annotations.'
,
total_num_annotations_skipped
)
def
main
(
_
):
assert
FLAGS
.
train_image_dir
,
'`train_image_dir` missing.'
assert
FLAGS
.
val_image_dir
,
'`val_image_dir` missing.'
assert
FLAGS
.
test_image_dir
,
'`test_image_dir` missing.'
if
not
tf
.
io
.
gfile
.
isdir
(
FLAGS
.
output_dir
):
tf
.
io
.
gfile
.
makedirs
(
FLAGS
.
output_dir
)
train_output_path
=
os
.
path
.
join
(
FLAGS
.
output_dir
,
'train'
)
val_output_path
=
os
.
path
.
join
(
FLAGS
.
output_dir
,
'val'
)
testdev_output_path
=
os
.
path
.
join
(
FLAGS
.
output_dir
,
'test-dev'
)
_create_tf_record_from_coco_annotations
(
FLAGS
.
train_object_annotations_file
,
FLAGS
.
train_caption_annotations_file
,
FLAGS
.
train_image_dir
,
train_output_path
,
FLAGS
.
include_masks
,
num_shards
=
256
)
_create_tf_record_from_coco_annotations
(
FLAGS
.
val_object_annotations_file
,
FLAGS
.
val_caption_annotations_file
,
FLAGS
.
val_image_dir
,
val_output_path
,
FLAGS
.
include_masks
,
num_shards
=
32
)
if
__name__
==
'__main__'
:
tf
.
compat
.
v1
.
logging
.
set_verbosity
(
tf
.
compat
.
v1
.
logging
.
INFO
)
app
.
run
(
main
)
TensorFlow2x/ComputeVision/Detection/MaskRCNN/dataset/download_and_preprocess_coco.sh
0 → 100644
View file @
c320b6ef
#!/bin/bash
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# Script to download and preprocess the COCO data set for detection.
#
# The outputs of this script are TFRecord files containing serialized
# tf.Example protocol buffers. See create_coco_tf_record.py for details of how
# the tf.Example protocol buffers are constructed and see
# http://cocodataset.org/#overview for an overview of the dataset.
#
# usage:
# bash download_and_preprocess_coco.sh /data-dir/coco
set
-e
set
-x
if
[
-z
"
$1
"
]
;
then
echo
"usage download_and_preprocess_coco.sh [data dir]"
exit
fi
#sudo apt install -y protobuf-compiler python-pil python-lxml\
# python-pip python-dev git unzip
#pip install Cython git+https://github.com/cocodataset/cocoapi#subdirectory=PythonAPI
echo
"Cloning Tensorflow models directory (for conversion utilities)"
if
[
!
-e
tf-models
]
;
then
git clone http://github.com/tensorflow/models tf-models
fi
(
cd
tf-models/research
&&
protoc object_detection/protos/
*
.proto
--python_out
=
.
)
UNZIP
=
"unzip -nq"
# Create the output directories.
OUTPUT_DIR
=
"
${
1
%/
}
"
SCRATCH_DIR
=
"
${
OUTPUT_DIR
}
/raw-data"
mkdir
-p
"
${
OUTPUT_DIR
}
"
mkdir
-p
"
${
SCRATCH_DIR
}
"
CURRENT_DIR
=
$(
pwd
)
# Helper function to download and unpack a .zip file.
function
download_and_unzip
()
{
local
BASE_URL
=
${
1
}
local
FILENAME
=
${
2
}
if
[
!
-f
${
FILENAME
}
]
;
then
echo
"Downloading
${
FILENAME
}
to
$(
pwd
)
"
wget
-nd
-c
"
${
BASE_URL
}
/
${
FILENAME
}
"
else
echo
"Skipping download of
${
FILENAME
}
"
fi
echo
"Unzipping
${
FILENAME
}
"
${
UNZIP
}
${
FILENAME
}
}
cd
${
SCRATCH_DIR
}
# Download the images.
BASE_IMAGE_URL
=
"http://images.cocodataset.org/zips"
TRAIN_IMAGE_FILE
=
"train2017.zip"
download_and_unzip
${
BASE_IMAGE_URL
}
${
TRAIN_IMAGE_FILE
}
TRAIN_IMAGE_DIR
=
"
${
SCRATCH_DIR
}
/train2017"
VAL_IMAGE_FILE
=
"val2017.zip"
download_and_unzip
${
BASE_IMAGE_URL
}
${
VAL_IMAGE_FILE
}
VAL_IMAGE_DIR
=
"
${
SCRATCH_DIR
}
/val2017"
TEST_IMAGE_FILE
=
"test2017.zip"
download_and_unzip
${
BASE_IMAGE_URL
}
${
TEST_IMAGE_FILE
}
TEST_IMAGE_DIR
=
"
${
SCRATCH_DIR
}
/test2017"
# Download the annotations.
BASE_INSTANCES_URL
=
"http://images.cocodataset.org/annotations"
INSTANCES_FILE
=
"annotations_trainval2017.zip"
download_and_unzip
${
BASE_INSTANCES_URL
}
${
INSTANCES_FILE
}
TRAIN_OBJ_ANNOTATIONS_FILE
=
"
${
SCRATCH_DIR
}
/annotations/instances_train2017.json"
VAL_OBJ_ANNOTATIONS_FILE
=
"
${
SCRATCH_DIR
}
/annotations/instances_val2017.json"
TRAIN_CAPTION_ANNOTATIONS_FILE
=
"
${
SCRATCH_DIR
}
/annotations/captions_train2017.json"
VAL_CAPTION_ANNOTATIONS_FILE
=
"
${
SCRATCH_DIR
}
/annotations/captions_val2017.json"
# Download the test image info.
BASE_IMAGE_INFO_URL
=
"http://images.cocodataset.org/annotations"
IMAGE_INFO_FILE
=
"image_info_test2017.zip"
download_and_unzip
${
BASE_IMAGE_INFO_URL
}
${
IMAGE_INFO_FILE
}
TESTDEV_ANNOTATIONS_FILE
=
"
${
SCRATCH_DIR
}
/annotations/image_info_test-dev2017.json"
# # Build TFRecords of the image data.
cd
"
${
CURRENT_DIR
}
"
# Setup packages
touch
tf-models/__init__.py
touch
tf-models/research/__init__.py
# Run our conversion
SCRIPT_DIR
=
$(
dirname
"
$(
readlink
-f
"
$0
"
)
"
)
PYTHONPATH
=
"tf-models:tf-models/research"
python
$SCRIPT_DIR
/create_coco_tf_record.py
\
--logtostderr
\
--include_masks
\
--train_image_dir
=
"
${
TRAIN_IMAGE_DIR
}
"
\
--val_image_dir
=
"
${
VAL_IMAGE_DIR
}
"
\
--test_image_dir
=
"
${
TEST_IMAGE_DIR
}
"
\
--train_object_annotations_file
=
"
${
TRAIN_OBJ_ANNOTATIONS_FILE
}
"
\
--val_object_annotations_file
=
"
${
VAL_OBJ_ANNOTATIONS_FILE
}
"
\
--train_caption_annotations_file
=
"
${
TRAIN_CAPTION_ANNOTATIONS_FILE
}
"
\
--val_caption_annotations_file
=
"
${
VAL_CAPTION_ANNOTATIONS_FILE
}
"
\
--testdev_annotations_file
=
"
${
TESTDEV_ANNOTATIONS_FILE
}
"
\
--output_dir
=
"
${
OUTPUT_DIR
}
"
mv
${
SCRATCH_DIR
}
/annotations/
${
OUTPUT_DIR
}
TensorFlow2x/ComputeVision/Detection/MaskRCNN/download_and_process_pretrained_weights.sh
0 → 100644
View file @
c320b6ef
#!/usr/bin/env bash
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
mkdir
-p
/model
cd
/model
# DOWNLOAD CHECKPOINTS
## Mask RCNN
## ====================== Mask RCNN ====================== ##
BASE_URL
=
"https://storage.googleapis.com/cloud-tpu-checkpoints/mask-rcnn/1555659850"
DEST_DIR
=
"mask-rcnn/1555659850"
wget
-N
${
BASE_URL
}
/saved_model.pb
-P
${
DEST_DIR
}
wget
-N
${
BASE_URL
}
/variables/variables.data-00000-of-00001
-P
${
DEST_DIR
}
/variables
wget
-N
${
BASE_URL
}
/variables/variables.index
-P
${
DEST_DIR
}
/variables
## ====================== resnet-nhwc-2018-02-07 ====================== ##
BASE_URL
=
"https://storage.googleapis.com/cloud-tpu-checkpoints/retinanet/resnet50-checkpoint-2018-02-07"
DEST_DIR
=
"resnet/resnet-nhwc-2018-02-07"
wget
-N
${
BASE_URL
}
/checkpoint
-P
${
DEST_DIR
}
wget
-N
${
BASE_URL
}
/model.ckpt-112603.data-00000-of-00001
-P
${
DEST_DIR
}
wget
-N
${
BASE_URL
}
/model.ckpt-112603.index
-P
${
DEST_DIR
}
wget
-N
${
BASE_URL
}
/model.ckpt-112603.meta
-P
${
DEST_DIR
}
## ====================== resnet-nhwc-2018-10-14 ====================== ##
#BASE_URL="https://storage.googleapis.com/cloud-tpu-artifacts/resnet/resnet-nhwc-2018-10-14"
#DEST_DIR="resnet/resnet-nhwc-2018-10-14"
#
#wget -N ${BASE_URL}/model.ckpt-112602.data-00000-of-00001 -P ${DEST_DIR}
#wget -N ${BASE_URL}/model.ckpt-112602.index -P ${DEST_DIR}
#wget -N ${BASE_URL}/model.ckpt-112602.meta -P ${DEST_DIR}
# VERIFY CHECKPOINTS
echo
"Verifying and Processing Checkpoints..."
python pb_to_ckpt.py
\
--frozen_model_filename
=
mask-rcnn/1555659850/
\
--output_filename
=
mask-rcnn/1555659850/ckpt/model.ckpt
python extract_RN50_weights.py
\
--checkpoint_dir
=
mask-rcnn/1555659850/ckpt/model.ckpt
\
--save_to
=
resnet/extracted_from_maskrcnn
echo
"Generating list of tensors and their shape..."
python inspect_checkpoint.py
--file_name
=
mask-rcnn/1555659850/ckpt/model.ckpt
\
>
mask-rcnn/1555659850/tensors_and_shape.txt
python inspect_checkpoint.py
--file_name
=
resnet/resnet-nhwc-2018-02-07/model.ckpt-112603
\
>
resnet/resnet-nhwc-2018-02-07/tensors_and_shape.txt
#python inspect_checkpoint.py --file_name=resnet/resnet-nhwc-2018-10-14/model.ckpt-112602 \
# > resnet/resnet-nhwc-2018-10-14/tensors_and_shape.txt
python inspect_checkpoint.py
--file_name
=
resnet/extracted_from_maskrcnn/resnet50.ckpt
\
>
resnet/extracted_from_maskrcnn/tensors_and_shape.txt
echo
"Script Finished with Success"
TensorFlow2x/ComputeVision/Detection/MaskRCNN/images/MaskRCNN_TF1_conv.png
0 → 100644
View file @
c320b6ef
60.9 KB
TensorFlow2x/ComputeVision/Detection/MaskRCNN/images/MaskRCNN_TF2_conv.png
0 → 100644
View file @
c320b6ef
62.9 KB
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/__init__.py
0 → 100644
View file @
c320b6ef
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/anchors.py
0 → 100644
View file @
c320b6ef
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Mask-RCNN anchor definition."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
collections
import
OrderedDict
import
numpy
as
np
import
tensorflow
as
tf
from
mask_rcnn.object_detection
import
argmax_matcher
from
mask_rcnn.object_detection
import
balanced_positive_negative_sampler
from
mask_rcnn.object_detection
import
box_list
from
mask_rcnn.object_detection
import
faster_rcnn_box_coder
from
mask_rcnn.object_detection
import
region_similarity_calculator
from
mask_rcnn.object_detection
import
target_assigner
def
_generate_anchor_configs
(
min_level
,
max_level
,
num_scales
,
aspect_ratios
):
"""Generates mapping from output level to a list of anchor configurations.
A configuration is a tuple of (num_anchors, scale, aspect_ratio).
Args:
min_level: integer number of minimum level of the output feature pyramid.
max_level: integer number of maximum level of the output feature pyramid.
num_scales: integer number representing intermediate scales added
on each level. For instances, num_scales=2 adds two additional
anchor scales [2^0, 2^0.5] on each level.
aspect_ratios: list of tuples representing the aspect raito anchors added
on each level. For instances, aspect_ratios =
[(1, 1), (1.4, 0.7), (0.7, 1.4)] adds three anchors on each level.
Returns:
anchor_configs: a dictionary with keys as the levels of anchors and
values as a list of anchor configuration.
"""
anchor_configs
=
{}
for
level
in
range
(
min_level
,
max_level
+
1
):
anchor_configs
[
level
]
=
[]
for
scale_octave
in
range
(
num_scales
):
for
aspect
in
aspect_ratios
:
anchor_configs
[
level
].
append
(
(
2
**
level
,
scale_octave
/
float
(
num_scales
),
aspect
))
return
anchor_configs
def
_generate_anchor_boxes
(
image_size
,
anchor_scale
,
anchor_configs
):
"""Generates multiscale anchor boxes.
Args:
image_size: integer number of input image size. The input image has the
same dimension for width and height. The image_size should be divided by
the largest feature stride 2^max_level.
anchor_scale: float number representing the scale of size of the base
anchor to the feature stride 2^level.
anchor_configs: a dictionary with keys as the levels of anchors and
values as a list of anchor configuration.
Returns:
anchor_boxes: a numpy array with shape [N, 4], which stacks anchors on all
feature levels.
Raises:
ValueError: input size must be the multiple of largest feature stride.
"""
boxes_all
=
[]
for
_
,
configs
in
anchor_configs
.
items
():
boxes_level
=
[]
for
config
in
configs
:
stride
,
octave_scale
,
aspect
=
config
if
image_size
[
0
]
%
stride
!=
0
or
image_size
[
1
]
%
stride
!=
0
:
raise
ValueError
(
'input size must be divided by the stride.'
)
base_anchor_size
=
anchor_scale
*
stride
*
2
**
octave_scale
anchor_size_x_2
=
base_anchor_size
*
aspect
[
0
]
/
2.0
anchor_size_y_2
=
base_anchor_size
*
aspect
[
1
]
/
2.0
x
=
np
.
arange
(
stride
/
2
,
image_size
[
1
],
stride
)
y
=
np
.
arange
(
stride
/
2
,
image_size
[
0
],
stride
)
xv
,
yv
=
np
.
meshgrid
(
x
,
y
)
xv
=
xv
.
reshape
(
-
1
)
yv
=
yv
.
reshape
(
-
1
)
boxes
=
np
.
vstack
((
yv
-
anchor_size_y_2
,
xv
-
anchor_size_x_2
,
yv
+
anchor_size_y_2
,
xv
+
anchor_size_x_2
))
boxes
=
np
.
swapaxes
(
boxes
,
0
,
1
)
boxes_level
.
append
(
np
.
expand_dims
(
boxes
,
axis
=
1
))
# concat anchors on the same level to the reshape NxAx4
boxes_level
=
np
.
concatenate
(
boxes_level
,
axis
=
1
)
boxes_all
.
append
(
boxes_level
.
reshape
([
-
1
,
4
]))
anchor_boxes
=
np
.
vstack
(
boxes_all
)
return
anchor_boxes
class
Anchors
(
object
):
"""Mask-RCNN Anchors class."""
def
__init__
(
self
,
min_level
,
max_level
,
num_scales
,
aspect_ratios
,
anchor_scale
,
image_size
):
"""Constructs multiscale Mask-RCNN anchors.
Args:
min_level: integer number of minimum level of the output feature pyramid.
max_level: integer number of maximum level of the output feature pyramid.
num_scales: integer number representing intermediate scales added
on each level. For instances, num_scales=2 adds two additional
anchor scales [2^0, 2^0.5] on each level.
aspect_ratios: list of tuples representing the aspect raito anchors added
on each level. For instances, aspect_ratios =
[(1, 1), (1.4, 0.7), (0.7, 1.4)] adds three anchors on each level.
anchor_scale: float number representing the scale of size of the base
anchor to the feature stride 2^level.
image_size: integer number of input image size. The input image has the
same dimension for width and height. The image_size should be divided by
the largest feature stride 2^max_level.
"""
self
.
min_level
=
min_level
self
.
max_level
=
max_level
self
.
num_scales
=
num_scales
self
.
aspect_ratios
=
aspect_ratios
self
.
anchor_scale
=
anchor_scale
self
.
image_size
=
image_size
self
.
config
=
self
.
_generate_configs
()
self
.
boxes
=
self
.
_generate_boxes
()
def
_generate_configs
(
self
):
"""Generate configurations of anchor boxes."""
return
_generate_anchor_configs
(
self
.
min_level
,
self
.
max_level
,
self
.
num_scales
,
self
.
aspect_ratios
)
def
_generate_boxes
(
self
):
"""Generates multiscale anchor boxes."""
boxes
=
_generate_anchor_boxes
(
self
.
image_size
,
self
.
anchor_scale
,
self
.
config
)
boxes
=
tf
.
convert_to_tensor
(
value
=
boxes
,
dtype
=
tf
.
float32
)
return
boxes
def
get_anchors_per_location
(
self
):
return
self
.
num_scales
*
len
(
self
.
aspect_ratios
)
def
get_unpacked_boxes
(
self
):
return
self
.
unpack_labels
(
self
.
boxes
)
def
unpack_labels
(
self
,
labels
):
"""Unpacks an array of labels into multiscales labels."""
labels_unpacked
=
OrderedDict
()
count
=
0
for
level
in
range
(
self
.
min_level
,
self
.
max_level
+
1
):
feat_size0
=
int
(
self
.
image_size
[
0
]
/
2
**
level
)
feat_size1
=
int
(
self
.
image_size
[
1
]
/
2
**
level
)
steps
=
feat_size0
*
feat_size1
*
self
.
get_anchors_per_location
()
indices
=
tf
.
range
(
count
,
count
+
steps
)
count
+=
steps
labels_unpacked
[
level
]
=
tf
.
reshape
(
tf
.
gather
(
labels
,
indices
),
[
feat_size0
,
feat_size1
,
-
1
])
return
labels_unpacked
class
AnchorLabeler
(
object
):
"""Labeler for multiscale anchor boxes."""
def
__init__
(
self
,
anchors
,
num_classes
,
match_threshold
=
0.7
,
unmatched_threshold
=
0.3
,
rpn_batch_size_per_im
=
256
,
rpn_fg_fraction
=
0.5
):
"""Constructs anchor labeler to assign labels to anchors.
Args:
anchors: an instance of class Anchors.
num_classes: integer number representing number of classes in the dataset.
match_threshold: a float number between 0 and 1 representing the
lower-bound threshold to assign positive labels for anchors. An anchor
with a score over the threshold is labeled positive.
unmatched_threshold: a float number between 0 and 1 representing the
upper-bound threshold to assign negative labels for anchors. An anchor
with a score below the threshold is labeled negative.
rpn_batch_size_per_im: a integer number that represents the number of
sampled anchors per image in the first stage (region proposal network).
rpn_fg_fraction: a float number between 0 and 1 representing the fraction
of positive anchors (foreground) in the first stage.
"""
similarity_calc
=
region_similarity_calculator
.
IouSimilarity
()
matcher
=
argmax_matcher
.
ArgMaxMatcher
(
match_threshold
,
unmatched_threshold
=
unmatched_threshold
,
negatives_lower_than_unmatched
=
True
,
force_match_for_each_row
=
True
)
box_coder
=
faster_rcnn_box_coder
.
FasterRcnnBoxCoder
()
self
.
_target_assigner
=
target_assigner
.
TargetAssigner
(
similarity_calc
,
matcher
,
box_coder
)
self
.
_anchors
=
anchors
self
.
_match_threshold
=
match_threshold
self
.
_unmatched_threshold
=
unmatched_threshold
self
.
_rpn_batch_size_per_im
=
rpn_batch_size_per_im
self
.
_rpn_fg_fraction
=
rpn_fg_fraction
self
.
_num_classes
=
num_classes
def
_get_rpn_samples
(
self
,
match_results
):
"""Computes anchor labels.
This function performs subsampling for foreground (fg) and background (bg)
anchors.
Args:
match_results: A integer tensor with shape [N] representing the
matching results of anchors. (1) match_results[i]>=0,
meaning that column i is matched with row match_results[i].
(2) match_results[i]=-1, meaning that column i is not matched.
(3) match_results[i]=-2, meaning that column i is ignored.
Returns:
score_targets: a integer tensor with the a shape of [N].
(1) score_targets[i]=1, the anchor is a positive sample.
(2) score_targets[i]=0, negative. (3) score_targets[i]=-1, the anchor is
don't care (ignore).
"""
sampler
=
(
balanced_positive_negative_sampler
.
BalancedPositiveNegativeSampler
(
positive_fraction
=
self
.
_rpn_fg_fraction
,
is_static
=
False
))
# indicator includes both positive and negative labels.
# labels includes only positives labels.
# positives = indicator & labels.
# negatives = indicator & !labels.
# ignore = !indicator.
indicator
=
tf
.
greater
(
match_results
,
-
2
)
labels
=
tf
.
greater
(
match_results
,
-
1
)
samples
=
sampler
.
subsample
(
indicator
,
self
.
_rpn_batch_size_per_im
,
labels
)
positive_labels
=
tf
.
where
(
tf
.
logical_and
(
samples
,
labels
),
tf
.
constant
(
2
,
dtype
=
tf
.
int32
,
shape
=
match_results
.
shape
),
tf
.
constant
(
0
,
dtype
=
tf
.
int32
,
shape
=
match_results
.
shape
))
negative_labels
=
tf
.
where
(
tf
.
logical_and
(
samples
,
tf
.
logical_not
(
labels
)),
tf
.
constant
(
1
,
dtype
=
tf
.
int32
,
shape
=
match_results
.
shape
),
tf
.
constant
(
0
,
dtype
=
tf
.
int32
,
shape
=
match_results
.
shape
))
ignore_labels
=
tf
.
fill
(
match_results
.
shape
,
-
1
)
return
(
ignore_labels
+
positive_labels
+
negative_labels
,
positive_labels
,
negative_labels
)
def
label_anchors
(
self
,
gt_boxes
,
gt_labels
):
"""Labels anchors with ground truth inputs.
Args:
gt_boxes: A float tensor with shape [N, 4] representing groundtruth boxes.
For each row, it stores [y0, x0, y1, x1] for four corners of a box.
gt_labels: A integer tensor with shape [N, 1] representing groundtruth
classes.
Returns:
score_targets_dict: ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with
shape [height_l, width_l, num_anchors]. The height_l and width_l
represent the dimension of class logits at l-th level.
box_targets_dict: ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with
shape [height_l, width_l, num_anchors * 4]. The height_l and
width_l represent the dimension of bounding box regression output at
l-th level.
"""
gt_box_list
=
box_list
.
BoxList
(
gt_boxes
)
anchor_box_list
=
box_list
.
BoxList
(
self
.
_anchors
.
boxes
)
# cls_targets, cls_weights, box_weights are not used
_
,
_
,
box_targets
,
_
,
matches
=
self
.
_target_assigner
.
assign
(
anchor_box_list
,
gt_box_list
,
gt_labels
)
# score_targets contains the subsampled positive and negative anchors.
score_targets
,
_
,
_
=
self
.
_get_rpn_samples
(
matches
.
match_results
)
# Unpack labels.
score_targets_dict
=
self
.
_anchors
.
unpack_labels
(
score_targets
)
box_targets_dict
=
self
.
_anchors
.
unpack_labels
(
box_targets
)
return
score_targets_dict
,
box_targets_dict
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/coco_metric.py
0 → 100644
View file @
c320b6ef
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""COCO-style evaluation metrics.
Implements the interface of COCO API and metric_fn in tf.TPUEstimator.
COCO API: github.com/cocodataset/cocoapi/
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
atexit
import
copy
import
tempfile
import
numpy
as
np
import
tensorflow
as
tf
from
mask_rcnn.utils.logging_formatter
import
logging
from
pycocotools.coco
import
COCO
from
pycocotools.cocoeval
import
COCOeval
import
pycocotools.mask
as
maskUtils
import
cv2
class
MaskCOCO
(
COCO
):
"""COCO object for mask evaluation.
"""
def
reset
(
self
,
dataset
):
"""Reset the dataset and groundtruth data index in this object.
Args:
dataset: dict of groundtruth data. It should has similar structure as the
COCO groundtruth JSON file. Must contains three keys: {'images',
'annotations', 'categories'}.
'images': list of image information dictionary. Required keys: 'id',
'width' and 'height'.
'annotations': list of dict. Bounding boxes and segmentations related
information. Required keys: {'id', 'image_id', 'category_id', 'bbox',
'iscrowd', 'area', 'segmentation'}.
'categories': list of dict of the category information.
Required key: 'id'.
Refer to http://cocodataset.org/#format-data for more details.
Raises:
AttributeError: If the dataset is empty or not a dict.
"""
assert
dataset
,
'Groundtruth should not be empty.'
assert
isinstance
(
dataset
,
dict
),
'annotation file format {} not supported'
.
format
(
type
(
dataset
))
self
.
anns
,
self
.
cats
,
self
.
imgs
=
dict
(),
dict
(),
dict
()
self
.
dataset
=
copy
.
deepcopy
(
dataset
)
self
.
createIndex
()
def
loadRes
(
self
,
detection_results
,
include_mask
,
is_image_mask
=
False
):
"""Load result file and return a result api object.
Args:
detection_results: a dictionary containing predictions results.
include_mask: a boolean, whether to include mask in detection results.
is_image_mask: a boolean, where the predict mask is a whole image mask.
Returns:
res: result MaskCOCO api object
"""
res
=
MaskCOCO
()
res
.
dataset
[
'images'
]
=
[
img
for
img
in
self
.
dataset
[
'images'
]]
logging
.
info
(
'Loading and preparing results...'
)
predictions
=
self
.
load_predictions
(
detection_results
,
include_mask
=
include_mask
,
is_image_mask
=
is_image_mask
)
assert
isinstance
(
predictions
,
list
),
'results in not an array of objects'
if
predictions
:
image_ids
=
[
pred
[
'image_id'
]
for
pred
in
predictions
]
assert
set
(
image_ids
)
==
(
set
(
image_ids
)
&
set
(
self
.
getImgIds
())),
\
'Results do not correspond to current coco set'
if
(
predictions
and
'bbox'
in
predictions
[
0
]
and
predictions
[
0
][
'bbox'
]):
res
.
dataset
[
'categories'
]
=
copy
.
deepcopy
(
self
.
dataset
[
'categories'
])
for
idx
,
pred
in
enumerate
(
predictions
):
bb
=
pred
[
'bbox'
]
x1
,
x2
,
y1
,
y2
=
[
bb
[
0
],
bb
[
0
]
+
bb
[
2
],
bb
[
1
],
bb
[
1
]
+
bb
[
3
]]
if
'segmentation'
not
in
pred
:
pred
[
'segmentation'
]
=
[[
x1
,
y1
,
x1
,
y2
,
x2
,
y2
,
x2
,
y1
]]
pred
[
'area'
]
=
bb
[
2
]
*
bb
[
3
]
pred
[
'id'
]
=
idx
+
1
pred
[
'iscrowd'
]
=
0
elif
'segmentation'
in
predictions
[
0
]:
res
.
dataset
[
'categories'
]
=
copy
.
deepcopy
(
self
.
dataset
[
'categories'
])
for
idx
,
pred
in
enumerate
(
predictions
):
# now only support compressed RLE format as segmentation results
pred
[
'area'
]
=
maskUtils
.
area
(
pred
[
'segmentation'
])
if
'bbox'
not
in
pred
:
pred
[
'bbox'
]
=
maskUtils
.
toBbox
(
pred
[
'segmentation'
])
pred
[
'id'
]
=
idx
+
1
pred
[
'iscrowd'
]
=
0
res
.
dataset
[
'annotations'
]
=
predictions
res
.
createIndex
()
return
res
def
load_predictions
(
self
,
detection_results
,
include_mask
,
is_image_mask
=
False
):
"""Create prediction dictionary list from detection and mask results.
Args:
detection_results: a dictionary containing numpy arrays which corresponds
to prediction results.
include_mask: a boolean, whether to include mask in detection results.
is_image_mask: a boolean, where the predict mask is a whole image mask.
Returns:
a list of dictionary including different prediction results from the model
in numpy form.
"""
predictions
=
[]
num_detections
=
detection_results
[
'detection_scores'
].
size
current_index
=
0
for
i
,
image_id
in
enumerate
(
detection_results
[
'source_id'
]):
if
include_mask
:
box_coorindates_in_image
=
detection_results
[
'detection_boxes'
][
i
]
segments
=
generate_segmentation_from_masks
(
detection_results
[
'detection_masks'
][
i
],
box_coorindates_in_image
,
int
(
detection_results
[
'image_info'
][
i
][
3
]),
int
(
detection_results
[
'image_info'
][
i
][
4
]),
is_image_mask
=
is_image_mask
)
# Convert the mask to uint8 and then to fortranarray for RLE encoder.
encoded_masks
=
[
maskUtils
.
encode
(
np
.
asfortranarray
(
instance_mask
.
astype
(
np
.
uint8
)))
for
instance_mask
in
segments
]
for
box_index
in
range
(
int
(
detection_results
[
'num_detections'
][
i
])):
if
current_index
%
1000
==
0
:
logging
.
info
(
'{}/{}'
.
format
(
current_index
,
num_detections
))
current_index
+=
1
prediction
=
{
'image_id'
:
int
(
image_id
),
'bbox'
:
detection_results
[
'detection_boxes'
][
i
][
box_index
].
tolist
(),
'score'
:
detection_results
[
'detection_scores'
][
i
][
box_index
],
'category_id'
:
int
(
detection_results
[
'detection_classes'
][
i
][
box_index
]),
}
if
include_mask
:
prediction
[
'segmentation'
]
=
encoded_masks
[
box_index
]
predictions
.
append
(
prediction
)
return
predictions
def
generate_segmentation_from_masks
(
masks
,
detected_boxes
,
image_height
,
image_width
,
is_image_mask
=
False
):
"""Generates segmentation result from instance masks.
Args:
masks: a numpy array of shape [N, mask_height, mask_width] representing the
instance masks w.r.t. the `detected_boxes`.
detected_boxes: a numpy array of shape [N, 4] representing the reference
bounding boxes.
image_height: an integer representing the height of the image.
image_width: an integer representing the width of the image.
is_image_mask: bool. True: input masks are whole-image masks. False: input
masks are bounding-box level masks.
Returns:
segms: a numpy array of shape [N, image_height, image_width] representing
the instance masks *pasted* on the image canvas.
"""
def
expand_boxes
(
boxes
,
scale
):
"""Expands an array of boxes by a given scale."""
# Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/utils/boxes.py#L227
# The `boxes` in the reference implementation is in [x1, y1, x2, y2] form,
# whereas `boxes` here is in [x1, y1, w, h] form
w_half
=
boxes
[:,
2
]
*
.
5
h_half
=
boxes
[:,
3
]
*
.
5
x_c
=
boxes
[:,
0
]
+
w_half
y_c
=
boxes
[:,
1
]
+
h_half
w_half
*=
scale
h_half
*=
scale
boxes_exp
=
np
.
zeros
(
boxes
.
shape
)
boxes_exp
[:,
0
]
=
x_c
-
w_half
boxes_exp
[:,
2
]
=
x_c
+
w_half
boxes_exp
[:,
1
]
=
y_c
-
h_half
boxes_exp
[:,
3
]
=
y_c
+
h_half
return
boxes_exp
# Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/core/test.py#L812
# To work around an issue with cv2.resize (it seems to automatically pad
# with repeated border values), we manually zero-pad the masks by 1 pixel
# prior to resizing back to the original image resolution. This prevents
# "top hat" artifacts. We therefore need to expand the reference boxes by an
# appropriate factor.
_
,
mask_height
,
mask_width
=
masks
.
shape
scale
=
max
((
mask_width
+
2.0
)
/
mask_width
,
(
mask_height
+
2.0
)
/
mask_height
)
ref_boxes
=
expand_boxes
(
detected_boxes
,
scale
)
ref_boxes
=
ref_boxes
.
astype
(
np
.
int32
)
padded_mask
=
np
.
zeros
((
mask_height
+
2
,
mask_width
+
2
),
dtype
=
np
.
float32
)
segms
=
[]
for
mask_ind
,
mask
in
enumerate
(
masks
):
im_mask
=
np
.
zeros
((
image_height
,
image_width
),
dtype
=
np
.
uint8
)
if
is_image_mask
:
# Process whole-image masks.
im_mask
[:,
:]
=
mask
[:,
:]
else
:
# Process mask inside bounding boxes.
padded_mask
[
1
:
-
1
,
1
:
-
1
]
=
mask
[:,
:]
ref_box
=
ref_boxes
[
mask_ind
,
:]
w
=
ref_box
[
2
]
-
ref_box
[
0
]
+
1
h
=
ref_box
[
3
]
-
ref_box
[
1
]
+
1
w
=
np
.
maximum
(
w
,
1
)
h
=
np
.
maximum
(
h
,
1
)
mask
=
cv2
.
resize
(
padded_mask
,
(
w
,
h
))
mask
=
np
.
array
(
mask
>
0.5
,
dtype
=
np
.
uint8
)
x_0
=
max
(
ref_box
[
0
],
0
)
x_1
=
min
(
ref_box
[
2
]
+
1
,
image_width
)
y_0
=
max
(
ref_box
[
1
],
0
)
y_1
=
min
(
ref_box
[
3
]
+
1
,
image_height
)
im_mask
[
y_0
:
y_1
,
x_0
:
x_1
]
=
mask
[(
y_0
-
ref_box
[
1
]):(
y_1
-
ref_box
[
1
]),
(
x_0
-
ref_box
[
0
]):(
x_1
-
ref_box
[
0
])]
segms
.
append
(
im_mask
)
segms
=
np
.
array
(
segms
)
assert
masks
.
shape
[
0
]
==
segms
.
shape
[
0
]
return
segms
class
EvaluationMetric
(
object
):
"""COCO evaluation metric class."""
def
__init__
(
self
,
filename
,
include_mask
):
"""Constructs COCO evaluation class.
The class provides the interface to metrics_fn in TPUEstimator. The
_evaluate() loads a JSON file in COCO annotation format as the
groundtruths and runs COCO evaluation.
Args:
filename: Ground truth JSON file name. If filename is None, use
groundtruth data passed from the dataloader for evaluation.
include_mask: boolean to indicate whether or not to include mask eval.
"""
if
filename
:
if
filename
.
startswith
(
'gs://'
):
_
,
local_val_json
=
tempfile
.
mkstemp
(
suffix
=
'.json'
)
tf
.
io
.
gfile
.
remove
(
local_val_json
)
tf
.
io
.
gfile
.
copy
(
filename
,
local_val_json
)
atexit
.
register
(
tf
.
io
.
gfile
.
remove
,
local_val_json
)
else
:
local_val_json
=
filename
self
.
coco_gt
=
MaskCOCO
(
local_val_json
)
self
.
filename
=
filename
self
.
metric_names
=
[
'AP'
,
'AP50'
,
'AP75'
,
'APs'
,
'APm'
,
'APl'
,
'ARmax1'
,
'ARmax10'
,
'ARmax100'
,
'ARs'
,
'ARm'
,
'ARl'
]
self
.
_include_mask
=
include_mask
if
self
.
_include_mask
:
mask_metric_names
=
[
'mask_'
+
x
for
x
in
self
.
metric_names
]
self
.
metric_names
.
extend
(
mask_metric_names
)
self
.
_reset
()
def
_reset
(
self
):
"""Reset COCO API object."""
if
self
.
filename
is
None
and
not
hasattr
(
self
,
'coco_gt'
):
self
.
coco_gt
=
MaskCOCO
()
def
predict_metric_fn
(
self
,
predictions
,
is_predict_image_mask
=
False
,
groundtruth_data
=
None
):
"""Generates COCO metrics."""
image_ids
=
list
(
set
(
predictions
[
'source_id'
]))
if
groundtruth_data
is
not
None
:
self
.
coco_gt
.
reset
(
groundtruth_data
)
coco_dt
=
self
.
coco_gt
.
loadRes
(
predictions
,
self
.
_include_mask
,
is_image_mask
=
is_predict_image_mask
)
coco_eval
=
COCOeval
(
self
.
coco_gt
,
coco_dt
,
iouType
=
'bbox'
)
coco_eval
.
params
.
imgIds
=
image_ids
coco_eval
.
evaluate
()
coco_eval
.
accumulate
()
coco_eval
.
summarize
()
coco_metrics
=
coco_eval
.
stats
if
self
.
_include_mask
:
# Create another object for instance segmentation metric evaluation.
mcoco_eval
=
COCOeval
(
self
.
coco_gt
,
coco_dt
,
iouType
=
'segm'
)
mcoco_eval
.
params
.
imgIds
=
image_ids
mcoco_eval
.
evaluate
()
mcoco_eval
.
accumulate
()
mcoco_eval
.
summarize
()
mask_coco_metrics
=
mcoco_eval
.
stats
if
self
.
_include_mask
:
metrics
=
np
.
hstack
((
coco_metrics
,
mask_coco_metrics
))
else
:
metrics
=
coco_metrics
# clean up after evaluation is done.
self
.
_reset
()
metrics
=
metrics
.
astype
(
np
.
float32
)
metrics_dict
=
{}
for
i
,
name
in
enumerate
(
self
.
metric_names
):
metrics_dict
[
name
]
=
metrics
[
i
]
return
metrics_dict
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/dataloader.py
0 → 100644
View file @
c320b6ef
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Data loader and processing.
Defines input_fn of Mask-RCNN for TF Estimator. The input_fn includes training
data for category classification, bounding box regression, and number of
positive examples to normalize the loss during training.
"""
import
functools
import
math
import
multiprocessing
import
tensorflow
as
tf
from
mask_rcnn.utils.logging_formatter
import
logging
from
mask_rcnn.utils.distributed_utils
import
MPI_is_distributed
from
mask_rcnn.utils.distributed_utils
import
MPI_rank_and_size
from
mask_rcnn.utils.distributed_utils
import
MPI_rank
from
mask_rcnn.utils.distributed_utils
import
MPI_size
# common functions
from
mask_rcnn.dataloader_utils
import
dataset_parser
from
distutils.version
import
LooseVersion
class
InputReader
(
object
):
"""Input reader for dataset."""
def
__init__
(
self
,
file_pattern
,
mode
=
tf
.
estimator
.
ModeKeys
.
TRAIN
,
num_examples
=
0
,
use_fake_data
=
False
,
use_instance_mask
=
False
,
seed
=
None
):
self
.
_mode
=
mode
self
.
_file_pattern
=
file_pattern
self
.
_num_examples
=
num_examples
self
.
_use_fake_data
=
use_fake_data
self
.
_use_instance_mask
=
use_instance_mask
self
.
_seed
=
seed
def
_create_dataset_parser_fn
(
self
,
params
):
"""Create parser for parsing input data (dictionary)."""
return
functools
.
partial
(
dataset_parser
,
mode
=
self
.
_mode
,
params
=
params
,
use_instance_mask
=
self
.
_use_instance_mask
,
seed
=
self
.
_seed
)
def
__call__
(
self
,
params
,
input_context
=
None
):
batch_size
=
params
[
'batch_size'
]
if
'batch_size'
in
params
else
1
try
:
seed
=
params
[
'seed'
]
if
not
MPI_is_distributed
()
else
params
[
'seed'
]
*
MPI_rank
()
except
(
KeyError
,
TypeError
):
seed
=
None
if
MPI_is_distributed
():
n_gpus
=
MPI_size
()
elif
input_context
is
not
None
:
n_gpus
=
input_context
.
num_input_pipelines
else
:
n_gpus
=
1
##################################################
dataset
=
tf
.
data
.
Dataset
.
list_files
(
self
.
_file_pattern
,
shuffle
=
False
)
if
self
.
_mode
==
tf
.
estimator
.
ModeKeys
.
TRAIN
:
if
input_context
is
not
None
:
logging
.
info
(
"Using Dataset Sharding with TF Distributed"
)
_num_shards
=
input_context
.
num_input_pipelines
_shard_idx
=
input_context
.
input_pipeline_id
elif
MPI_is_distributed
():
logging
.
info
(
"Using Dataset Sharding with Horovod"
)
_shard_idx
,
_num_shards
=
MPI_rank_and_size
()
try
:
dataset
=
dataset
.
shard
(
num_shards
=
_num_shards
,
index
=
_shard_idx
)
dataset
=
dataset
.
shuffle
(
math
.
ceil
(
256
/
_num_shards
))
except
NameError
:
# Not a distributed training setup
pass
def
_prefetch_dataset
(
filename
):
return
tf
.
data
.
TFRecordDataset
(
filename
).
prefetch
(
1
)
dataset
=
dataset
.
interleave
(
map_func
=
_prefetch_dataset
,
cycle_length
=
32
,
block_length
=
64
,
num_parallel_calls
=
tf
.
data
.
experimental
.
AUTOTUNE
,
)
if
self
.
_num_examples
is
not
None
and
self
.
_num_examples
>
0
:
logging
.
info
(
"[*] Limiting the amount of sample to: %d"
%
self
.
_num_examples
)
dataset
=
dataset
.
take
(
self
.
_num_examples
)
dataset
=
dataset
.
cache
()
if
self
.
_mode
==
tf
.
estimator
.
ModeKeys
.
TRAIN
:
dataset
=
dataset
.
shuffle
(
buffer_size
=
4096
,
reshuffle_each_iteration
=
True
,
seed
=
seed
)
dataset
=
dataset
.
repeat
()
# Parse the fetched records to input tensors for model function.
dataset
=
dataset
.
map
(
map_func
=
self
.
_create_dataset_parser_fn
(
params
),
num_parallel_calls
=
tf
.
data
.
experimental
.
AUTOTUNE
,
)
dataset
=
dataset
.
batch
(
batch_size
=
batch_size
,
drop_remainder
=
True
)
if
self
.
_use_fake_data
:
# Turn this dataset into a semi-fake dataset which always loop at the
# first batch. This reduces variance in performance and is useful in
# testing.
logging
.
info
(
"Using Fake Dataset Loop..."
)
dataset
=
dataset
.
take
(
1
).
cache
().
repeat
()
if
self
.
_mode
!=
tf
.
estimator
.
ModeKeys
.
TRAIN
:
dataset
=
dataset
.
take
(
int
(
5000
/
batch_size
))
dataset
=
dataset
.
prefetch
(
buffer_size
=
tf
.
data
.
experimental
.
AUTOTUNE
,
)
if
self
.
_mode
==
tf
.
estimator
.
ModeKeys
.
PREDICT
or
n_gpus
>
1
:
if
not
tf
.
distribute
.
has_strategy
():
dataset
=
dataset
.
apply
(
tf
.
data
.
experimental
.
prefetch_to_device
(
'/gpu:0'
,
# With Horovod the local GPU is always 0
buffer_size
=
1
,
)
)
data_options
=
tf
.
data
.
Options
()
data_options
.
experimental_deterministic
=
seed
is
not
None
if
LooseVersion
(
tf
.
__version__
)
<=
LooseVersion
(
"2.0.0"
):
data_options
.
experimental_distribute
.
auto_shard
=
False
else
:
data_options
.
experimental_distribute
.
auto_shard_policy
=
tf
.
data
.
experimental
.
AutoShardPolicy
.
OFF
# data_options.experimental_distribute.auto_shard = False
data_options
.
experimental_slack
=
True
data_options
.
experimental_threading
.
max_intra_op_parallelism
=
1
# data_options.experimental_threading.private_threadpool_size = int(multiprocessing.cpu_count() / n_gpus) * 2
# ================= experimental_optimization ================= #
data_options
.
experimental_optimization
.
apply_default_optimizations
=
False
# data_options.experimental_optimization.autotune = True
data_options
.
experimental_optimization
.
filter_fusion
=
True
data_options
.
experimental_optimization
.
map_and_batch_fusion
=
True
data_options
.
experimental_optimization
.
map_and_filter_fusion
=
True
data_options
.
experimental_optimization
.
map_fusion
=
True
data_options
.
experimental_optimization
.
map_parallelization
=
True
map_vectorization_options
=
tf
.
data
.
experimental
.
MapVectorizationOptions
()
map_vectorization_options
.
enabled
=
True
map_vectorization_options
.
use_choose_fastest
=
True
data_options
.
experimental_optimization
.
map_vectorization
=
map_vectorization_options
data_options
.
experimental_optimization
.
noop_elimination
=
True
data_options
.
experimental_optimization
.
parallel_batch
=
True
data_options
.
experimental_optimization
.
shuffle_and_repeat_fusion
=
True
# ========== Stats on TF Data =============
# aggregator = tf.data.experimental.StatsAggregator()
# data_options.experimental_stats.aggregator = aggregator
# data_options.experimental_stats.latency_all_edges = True
dataset
=
dataset
.
with_options
(
data_options
)
return
dataset
if
__name__
==
"__main__"
:
'''
Data Loading Benchmark Usage:
# Real Data - Training
python -m mask_rcnn.dataloader
\
--data_dir="/data/"
\
--batch_size=2
\
--warmup_steps=200
\
--benchmark_steps=2000
\
--training
# Real Data - Inference
python -m mask_rcnn.dataloader
\
--data_dir="/data/"
\
--batch_size=8
\
--warmup_steps=200
\
--benchmark_steps=2000
# --------------- #
# Synthetic Data - Training
python -m mask_rcnn.dataloader
\
--data_dir="/data/"
\
--batch_size=2
\
--warmup_steps=200
\
--benchmark_steps=2000
\
--training
\
--use_synthetic_data
# Synthetic Data - Inference
python -m mask_rcnn.dataloader
\
--data_dir="/data/"
\
--batch_size=8
\
--warmup_steps=200
\
--benchmark_steps=2000
\
--use_synthetic_data
# --------------- #
'''
import
os
import
time
import
argparse
import
numpy
as
np
os
.
environ
[
"CUDA_VISIBLE_DEVICES"
]
=
'0'
os
.
environ
[
'TF_CPP_MIN_LOG_LEVEL'
]
=
'3'
tf
.
compat
.
v1
.
disable_eager_execution
()
tf
.
compat
.
v1
.
logging
.
set_verbosity
(
tf
.
compat
.
v1
.
logging
.
ERROR
)
logging
.
set_verbosity
(
logging
.
INFO
)
parser
=
argparse
.
ArgumentParser
(
description
=
"MaskRCNN Dataloader Benchmark"
)
parser
.
add_argument
(
'--data_dir'
,
required
=
True
,
type
=
str
,
help
=
"Directory path which contains the preprocessed DAGM 2007 dataset"
)
parser
.
add_argument
(
'--batch_size'
,
default
=
64
,
type
=
int
,
required
=
True
,
help
=
"""Batch size used to measure performance."""
)
parser
.
add_argument
(
'--warmup_steps'
,
default
=
200
,
type
=
int
,
required
=
True
,
help
=
"""Number of steps considered as warmup and not taken into account for performance measurements."""
)
parser
.
add_argument
(
'--benchmark_steps'
,
default
=
200
,
type
=
int
,
required
=
True
,
help
=
"Number of steps used to benchmark dataloading performance. Only used in training"
)
parser
.
add_argument
(
'--seed'
,
default
=
666
,
type
=
int
,
required
=
False
,
help
=
"""Reproducibility Seed."""
)
parser
.
add_argument
(
"--training"
,
default
=
False
,
action
=
"store_true"
,
help
=
"Benchmark in training mode"
)
parser
.
add_argument
(
"--use_synthetic_data"
,
default
=
False
,
action
=
"store_true"
,
help
=
"Use synthetic dataset"
)
FLAGS
,
unknown_args
=
parser
.
parse_known_args
()
if
len
(
unknown_args
)
>
0
:
for
bad_arg
in
unknown_args
:
print
(
"ERROR: Unknown command line arg: %s"
%
bad_arg
)
raise
ValueError
(
"Invalid command line arg(s)"
)
BURNIN_STEPS
=
FLAGS
.
warmup_steps
if
FLAGS
.
training
:
TOTAL_STEPS
=
FLAGS
.
warmup_steps
+
FLAGS
.
benchmark_steps
else
:
TOTAL_STEPS
=
int
(
1e6
)
# Wait for end of dataset
if
FLAGS
.
training
:
input_dataset
=
InputReader
(
file_pattern
=
os
.
path
.
join
(
FLAGS
.
data_dir
,
"train*.tfrecord"
),
mode
=
tf
.
estimator
.
ModeKeys
.
TRAIN
,
use_fake_data
=
FLAGS
.
use_synthetic_data
,
use_instance_mask
=
True
,
seed
=
FLAGS
.
seed
)
else
:
input_dataset
=
InputReader
(
file_pattern
=
os
.
path
.
join
(
FLAGS
.
data_dir
,
"val*.tfrecord"
),
mode
=
tf
.
estimator
.
ModeKeys
.
PREDICT
,
num_examples
=
5000
,
use_fake_data
=
FLAGS
.
use_synthetic_data
,
use_instance_mask
=
True
,
seed
=
FLAGS
.
seed
)
logging
.
info
(
"[*] Executing Benchmark in %s mode"
%
(
"training"
if
FLAGS
.
training
else
"inference"
))
logging
.
info
(
"[*] Benchmark using %s data"
%
(
"synthetic"
if
FLAGS
.
use_synthetic_data
else
"real"
))
time
.
sleep
(
1
)
# Build the data input
dataset
=
input_dataset
(
params
=
{
"anchor_scale"
:
8.0
,
"aspect_ratios"
:
[[
1.0
,
1.0
],
[
1.4
,
0.7
],
[
0.7
,
1.4
]],
"batch_size"
:
FLAGS
.
batch_size
,
"gt_mask_size"
:
112
,
"image_size"
:
[
1024
,
1024
],
"include_groundtruth_in_features"
:
False
,
"augment_input_data"
:
True
,
"max_level"
:
6
,
"min_level"
:
2
,
"num_classes"
:
91
,
"num_scales"
:
1
,
"rpn_batch_size_per_im"
:
256
,
"rpn_fg_fraction"
:
0.5
,
"rpn_min_size"
:
0.
,
"rpn_nms_threshold"
:
0.7
,
"rpn_negative_overlap"
:
0.3
,
"rpn_positive_overlap"
:
0.7
,
"rpn_post_nms_topn"
:
1000
,
"rpn_pre_nms_topn"
:
2000
,
"skip_crowd_during_training"
:
True
,
"use_category"
:
True
,
"visualize_images_summary"
:
False
,
}
)
dataset_iterator
=
dataset
.
make_initializable_iterator
()
if
FLAGS
.
training
:
X
,
Y
=
dataset_iterator
.
get_next
()
else
:
X
=
dataset_iterator
.
get_next
()
config
=
tf
.
compat
.
v1
.
ConfigProto
()
config
.
gpu_options
.
allow_growth
=
True
config
.
log_device_placement
=
False
with
tf
.
device
(
"gpu:0"
):
X_gpu_ops
=
list
()
Y_gpu_ops
=
list
()
if
FLAGS
.
training
:
for
_
,
_x
in
X
.
items
():
X_gpu_ops
.
append
(
tf
.
identity
(
_x
))
for
_
,
_y
in
Y
.
items
():
Y_gpu_ops
.
append
(
tf
.
identity
(
_y
))
else
:
for
_
,
_x
in
X
[
"features"
].
items
():
X_gpu_ops
.
append
(
tf
.
identity
(
_x
))
with
tf
.
control_dependencies
(
X_gpu_ops
+
Y_gpu_ops
):
input_op
=
tf
.
constant
(
1.0
)
with
tf
.
compat
.
v1
.
Session
(
config
=
config
)
as
sess
:
sess
.
run
(
dataset_iterator
.
initializer
)
sess
.
run
(
tf
.
compat
.
v1
.
global_variables_initializer
())
total_files_processed
=
0
img_per_sec_arr
=
[]
processing_time_arr
=
[]
processing_start_time
=
time
.
time
()
for
step
in
range
(
TOTAL_STEPS
):
try
:
start_time
=
time
.
time
()
sess
.
run
(
input_op
)
elapsed_time
=
(
time
.
time
()
-
start_time
)
*
1000
imgs_per_sec
=
(
FLAGS
.
batch_size
/
elapsed_time
)
*
1000
total_files_processed
+=
FLAGS
.
batch_size
if
(
step
+
1
)
>
BURNIN_STEPS
:
processing_time_arr
.
append
(
elapsed_time
)
img_per_sec_arr
.
append
(
imgs_per_sec
)
if
(
step
+
1
)
%
20
==
0
or
(
step
+
1
)
==
TOTAL_STEPS
:
print
(
"[STEP %04d] # Batch Size: %03d - Time: %03d msecs - Speed: %6d img/s"
%
(
step
+
1
,
FLAGS
.
batch_size
,
elapsed_time
,
imgs_per_sec
)
)
except
tf
.
errors
.
OutOfRangeError
:
break
processing_time
=
time
.
time
()
-
processing_start_time
avg_processing_speed
=
np
.
mean
(
img_per_sec_arr
)
print
(
"
\n
###################################################################"
)
print
(
"*** Data Loading Performance Metrics ***
\n
"
)
print
(
"
\t
=> Number of Steps: %d"
%
(
step
+
1
))
print
(
"
\t
=> Batch Size: %d"
%
FLAGS
.
batch_size
)
print
(
"
\t
=> Files Processed: %d"
%
total_files_processed
)
print
(
"
\t
=> Total Execution Time: %d secs"
%
processing_time
)
print
(
"
\t
=> Median Time per step: %3d msecs"
%
np
.
median
(
processing_time_arr
))
print
(
"
\t
=> Median Processing Speed: %d images/secs"
%
np
.
median
(
img_per_sec_arr
))
print
(
"
\t
=> Median Processing Time: %.2f msecs/image"
%
(
1
/
float
(
np
.
median
(
img_per_sec_arr
))
*
1000
))
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/dataloader_utils.py
0 → 100644
View file @
c320b6ef
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Data loader and processing.
Defines input_fn of Mask-RCNN for TF Estimator. The input_fn includes training
data for category classification, bounding box regression, and number of
positive examples to normalize the loss during training.
"""
import
tensorflow
as
tf
from
mask_rcnn
import
anchors
from
mask_rcnn.utils
import
coco_utils
from
mask_rcnn.ops
import
preprocess_ops
from
mask_rcnn.object_detection
import
tf_example_decoder
MAX_NUM_INSTANCES
=
100
MAX_NUM_VERTICES_PER_INSTANCE
=
1500
MAX_NUM_POLYGON_LIST_LEN
=
2
*
MAX_NUM_VERTICES_PER_INSTANCE
*
MAX_NUM_INSTANCES
POLYGON_PAD_VALUE
=
coco_utils
.
POLYGON_PAD_VALUE
__all__
=
[
# dataset parser
"dataset_parser"
,
# common functions
"preprocess_image"
,
"process_groundtruth_is_crowd"
,
"process_source_id"
,
# eval
"prepare_labels_for_eval"
,
# training
"augment_image"
,
"process_boxes_classes_indices_for_training"
,
"process_gt_masks_for_training"
,
"process_labels_for_training"
,
"process_targets_for_training"
]
###############################################################################################################
def
dataset_parser
(
value
,
mode
,
params
,
use_instance_mask
,
seed
=
None
,
regenerate_source_id
=
False
):
"""Parse data to a fixed dimension input image and learning targets.
Args:
value: A dictionary contains an image and groundtruth annotations.
Returns:
features: a dictionary that contains the image and auxiliary
information. The following describes {key: value} pairs in the
dictionary.
image: Image tensor that is preproessed to have normalized value and
fixed dimension [image_size, image_size, 3]
image_info: image information that includes the original height and
width, the scale of the proccessed image to the original image, and
the scaled height and width.
source_ids: Source image id. Default value -1 if the source id is
empty in the groundtruth annotation.
labels: a dictionary that contains auxiliary information plus (optional)
labels. The following describes {key: value} pairs in the dictionary.
`labels` is only for training.
score_targets_dict: ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with
shape [height_l, width_l, num_anchors]. The height_l and width_l
represent the dimension of objectiveness score at l-th level.
box_targets_dict: ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with
shape [height_l, width_l, num_anchors * 4]. The height_l and
width_l represent the dimension of bounding box regression output at
l-th level.
gt_boxes: Groundtruth bounding box annotations. The box is represented
in [y1, x1, y2, x2] format. The tennsor is padded with -1 to the
fixed dimension [MAX_NUM_INSTANCES, 4].
gt_classes: Groundtruth classes annotations. The tennsor is padded
with -1 to the fixed dimension [MAX_NUM_INSTANCES].
cropped_gt_masks: groundtrugh masks cropped by the bounding box and
resized to a fixed size determined by params['gt_mask_size']
regenerate_source_id: `bool`, if True TFExampleParser will use hashed
value of `image/encoded` for `image/source_id`.
"""
if
mode
not
in
[
tf
.
estimator
.
ModeKeys
.
TRAIN
,
tf
.
estimator
.
ModeKeys
.
PREDICT
,
tf
.
estimator
.
ModeKeys
.
EVAL
]:
raise
ValueError
(
"Unknown execution mode received: %s"
%
mode
)
def
create_example_decoder
():
return
tf_example_decoder
.
TfExampleDecoder
(
use_instance_mask
=
use_instance_mask
,
regenerate_source_id
=
regenerate_source_id
)
example_decoder
=
create_example_decoder
()
with
tf
.
xla
.
experimental
.
jit_scope
(
compile_ops
=
True
):
with
tf
.
name_scope
(
'parser'
):
data
=
example_decoder
.
decode
(
value
)
data
[
'groundtruth_is_crowd'
]
=
process_groundtruth_is_crowd
(
data
)
image
=
tf
.
image
.
convert_image_dtype
(
data
[
'image'
],
dtype
=
tf
.
float32
)
source_id
=
process_source_id
(
data
[
'source_id'
])
if
mode
==
tf
.
estimator
.
ModeKeys
.
PREDICT
:
features
=
{
'source_ids'
:
source_id
,
}
if
params
[
'visualize_images_summary'
]:
features
[
'orig_images'
]
=
tf
.
image
.
resize
(
image
,
params
[
'image_size'
])
features
[
"images"
],
features
[
"image_info"
],
_
,
_
=
preprocess_image
(
image
,
boxes
=
None
,
instance_masks
=
None
,
image_size
=
params
[
'image_size'
],
max_level
=
params
[
'max_level'
],
augment_input_data
=
False
,
seed
=
seed
)
if
params
[
'include_groundtruth_in_features'
]:
labels
=
prepare_labels_for_eval
(
data
,
target_num_instances
=
MAX_NUM_INSTANCES
,
target_polygon_list_len
=
MAX_NUM_POLYGON_LIST_LEN
,
use_instance_mask
=
params
[
'include_mask'
]
)
return
{
'features'
:
features
,
'labels'
:
labels
}
else
:
return
{
'features'
:
features
}
elif
mode
==
tf
.
estimator
.
ModeKeys
.
TRAIN
:
labels
=
{}
features
=
{
'source_ids'
:
source_id
}
boxes
,
classes
,
indices
,
instance_masks
=
process_boxes_classes_indices_for_training
(
data
,
skip_crowd_during_training
=
params
[
'skip_crowd_during_training'
],
use_category
=
params
[
'use_category'
],
use_instance_mask
=
use_instance_mask
)
image
,
image_info
,
boxes
,
instance_masks
=
preprocess_image
(
image
,
boxes
=
boxes
,
instance_masks
=
instance_masks
,
image_size
=
params
[
'image_size'
],
max_level
=
params
[
'max_level'
],
augment_input_data
=
params
[
'augment_input_data'
],
seed
=
seed
)
features
.
update
({
'images'
:
image
,
'image_info'
:
image_info
,
})
padded_image_size
=
image
.
get_shape
().
as_list
()[:
2
]
# Pads cropped_gt_masks.
if
use_instance_mask
:
labels
[
'cropped_gt_masks'
]
=
process_gt_masks_for_training
(
instance_masks
,
boxes
,
gt_mask_size
=
params
[
'gt_mask_size'
],
padded_image_size
=
padded_image_size
,
max_num_instances
=
MAX_NUM_INSTANCES
)
with
tf
.
xla
.
experimental
.
jit_scope
(
compile_ops
=
False
):
# Assign anchors.
(
score_targets
,
box_targets
),
input_anchor
=
process_targets_for_training
(
padded_image_size
=
padded_image_size
,
boxes
=
boxes
,
classes
=
classes
,
params
=
params
)
additional_labels
=
process_labels_for_training
(
image_info
,
boxes
,
classes
,
score_targets
,
box_targets
,
max_num_instances
=
MAX_NUM_INSTANCES
,
min_level
=
params
[
"min_level"
],
max_level
=
params
[
"max_level"
]
)
labels
.
update
(
additional_labels
)
# labels["input_anchor"] = input_anchor
# Features
# {
# 'source_ids': <tf.Tensor 'parser/StringToNumber:0' shape=() dtype=float32>,
# 'images': <tf.Tensor 'parser/pad_to_bounding_box/Squeeze:0' shape=(1024, 1024, 3) dtype=float32>,
# 'image_info': <tf.Tensor 'parser/stack_1:0' shape=(5,) dtype=float32>
# }
FAKE_FEATURES
=
False
if
FAKE_FEATURES
:
labels
[
"source_ids"
]
=
tf
.
ones
(
shape
=
(),
dtype
=
tf
.
float32
)
labels
[
"images"
]
=
tf
.
ones
(
shape
=
(
1024
,
1024
,
3
),
dtype
=
tf
.
float32
)
labels
[
"image_info"
]
=
tf
.
ones
(
shape
=
(
5
,),
dtype
=
tf
.
float32
)
# Labels
# {
# 'cropped_gt_masks': <tf.Tensor 'parser/Reshape_4:0' shape=(100, 116, 116) dtype=float32>,
# 'score_targets_2': <tf.Tensor 'parser/Reshape_9:0' shape=(256, 256, 3) dtype=int32>,
# 'box_targets_2': <tf.Tensor 'parser/Reshape_14:0' shape=(256, 256, 12) dtype=float32>,
# 'score_targets_3': <tf.Tensor 'parser/Reshape_10:0' shape=(128, 128, 3) dtype=int32>,
# 'box_targets_3': <tf.Tensor 'parser/Reshape_15:0' shape=(128, 128, 12) dtype=float32>,
# 'score_targets_4': <tf.Tensor 'parser/Reshape_11:0' shape=(64, 64, 3) dtype=int32>,
# 'box_targets_4': <tf.Tensor 'parser/Reshape_16:0' shape=(64, 64, 12) dtype=float32>,
# 'score_targets_5': <tf.Tensor 'parser/Reshape_12:0' shape=(32, 32, 3) dtype=int32>,
# 'box_targets_5': <tf.Tensor 'parser/Reshape_17:0' shape=(32, 32, 12) dtype=float32>,
# 'score_targets_6': <tf.Tensor 'parser/Reshape_13:0' shape=(16, 16, 3) dtype=int32>,
# 'box_targets_6': <tf.Tensor 'parser/Reshape_18:0' shape=(16, 16, 12) dtype=float32>,
# 'gt_boxes': <tf.Tensor 'parser/Reshape_20:0' shape=(100, 4) dtype=float32>,
# 'gt_classes': <tf.Tensor 'parser/Reshape_22:0' shape=(100, 1) dtype=float32>
# }
FAKE_LABELS
=
False
if
FAKE_LABELS
:
labels
[
"cropped_gt_masks"
]
=
tf
.
ones
(
shape
=
(
100
,
116
,
116
),
dtype
=
tf
.
float32
)
labels
[
"gt_boxes"
]
=
tf
.
ones
(
shape
=
(
100
,
4
),
dtype
=
tf
.
float32
)
labels
[
"gt_classes"
]
=
tf
.
ones
(
shape
=
(
100
,
1
),
dtype
=
tf
.
float32
)
idx
=
1
for
dim
in
[
256
,
128
,
64
,
32
,
16
]:
idx
+=
1
# Starts at 2
labels
[
"score_targets_%d"
%
idx
]
=
tf
.
ones
(
shape
=
(
dim
,
dim
,
3
),
dtype
=
tf
.
float32
)
labels
[
"box_targets_%d"
%
idx
]
=
tf
.
ones
(
shape
=
(
dim
,
dim
,
12
),
dtype
=
tf
.
float32
)
return
features
,
labels
###############################################################################################################
# common functions
def
preprocess_image
(
image
,
boxes
,
instance_masks
,
image_size
,
max_level
,
augment_input_data
=
False
,
seed
=
None
):
image
=
preprocess_ops
.
normalize_image
(
image
)
if
augment_input_data
:
image
,
boxes
,
instance_masks
=
augment_image
(
image
=
image
,
boxes
=
boxes
,
instance_masks
=
instance_masks
,
seed
=
seed
)
# Scaling and padding.
image
,
image_info
,
boxes
,
instance_masks
=
preprocess_ops
.
resize_and_pad
(
image
=
image
,
target_size
=
image_size
,
stride
=
2
**
max_level
,
boxes
=
boxes
,
masks
=
instance_masks
)
return
image
,
image_info
,
boxes
,
instance_masks
def
process_groundtruth_is_crowd
(
data
):
return
tf
.
cond
(
pred
=
tf
.
greater
(
tf
.
size
(
input
=
data
[
'groundtruth_is_crowd'
]),
0
),
true_fn
=
lambda
:
data
[
'groundtruth_is_crowd'
],
false_fn
=
lambda
:
tf
.
zeros_like
(
data
[
'groundtruth_classes'
],
dtype
=
tf
.
bool
)
)
# def process_source_id(data):
# source_id = tf.where(tf.equal(source_id, tf.constant('')), '-1', source_id)
# source_id = tf.strings.to_number(source_id)
# return source_id
def
process_source_id
(
source_id
):
"""Processes source_id to the right format."""
if
source_id
.
dtype
==
tf
.
string
:
source_id
=
tf
.
cast
(
tf
.
strings
.
to_number
(
source_id
),
tf
.
int64
)
with
tf
.
control_dependencies
([
source_id
]):
source_id
=
tf
.
cond
(
tf
.
equal
(
tf
.
size
(
source_id
),
0
),
lambda
:
tf
.
cast
(
tf
.
constant
(
-
1
),
tf
.
int64
),
lambda
:
tf
.
identity
(
source_id
)
)
return
source_id
# eval
def
prepare_labels_for_eval
(
data
,
target_num_instances
=
MAX_NUM_INSTANCES
,
target_polygon_list_len
=
MAX_NUM_POLYGON_LIST_LEN
,
use_instance_mask
=
False
):
"""Create labels dict for infeed from data of tf.Example."""
image
=
data
[
'image'
]
height
,
width
=
tf
.
shape
(
input
=
image
)[:
2
]
boxes
=
data
[
'groundtruth_boxes'
]
classes
=
tf
.
cast
(
data
[
'groundtruth_classes'
],
dtype
=
tf
.
float32
)
num_labels
=
tf
.
shape
(
input
=
classes
)[
0
]
boxes
=
preprocess_ops
.
pad_to_fixed_size
(
boxes
,
-
1
,
[
target_num_instances
,
4
])
classes
=
preprocess_ops
.
pad_to_fixed_size
(
classes
,
-
1
,
[
target_num_instances
,
1
])
is_crowd
=
tf
.
cast
(
data
[
'groundtruth_is_crowd'
],
dtype
=
tf
.
float32
)
is_crowd
=
preprocess_ops
.
pad_to_fixed_size
(
is_crowd
,
0
,
[
target_num_instances
,
1
])
labels
=
dict
()
labels
[
'width'
]
=
width
labels
[
'height'
]
=
height
labels
[
'groundtruth_boxes'
]
=
boxes
labels
[
'groundtruth_classes'
]
=
classes
labels
[
'num_groundtruth_labels'
]
=
num_labels
labels
[
'groundtruth_is_crowd'
]
=
is_crowd
if
use_instance_mask
:
data
[
'groundtruth_polygons'
]
=
preprocess_ops
.
pad_to_fixed_size
(
data
=
data
[
'groundtruth_polygons'
],
pad_value
=
POLYGON_PAD_VALUE
,
output_shape
=
[
target_polygon_list_len
,
1
]
)
if
'groundtruth_area'
in
data
:
labels
[
'groundtruth_area'
]
=
preprocess_ops
.
pad_to_fixed_size
(
data
=
labels
[
'groundtruth_area'
],
pad_value
=
0
,
output_shape
=
[
target_num_instances
,
1
]
)
return
labels
# training
def
augment_image
(
image
,
boxes
,
instance_masks
,
seed
):
flipped_results
=
preprocess_ops
.
random_horizontal_flip
(
image
,
boxes
=
boxes
,
masks
=
instance_masks
,
seed
=
seed
)
if
instance_masks
is
not
None
:
image
,
boxes
,
instance_masks
=
flipped_results
else
:
image
,
boxes
=
flipped_results
# image = tf.image.random_brightness(image, max_delta=0.1, seed=seed)
# image = tf.image.random_contrast(image, lower=0.9, upper=1.1, seed=seed)
# image = tf.image.random_saturation(image, lower=0.9, upper=1.1, seed=seed)
# image = tf.image.random_jpeg_quality(image, min_jpeg_quality=80, max_jpeg_quality=100, seed=seed)
return
image
,
boxes
,
instance_masks
def
process_boxes_classes_indices_for_training
(
data
,
skip_crowd_during_training
,
use_category
,
use_instance_mask
):
boxes
=
data
[
'groundtruth_boxes'
]
classes
=
data
[
'groundtruth_classes'
]
classes
=
tf
.
reshape
(
tf
.
cast
(
classes
,
dtype
=
tf
.
float32
),
[
-
1
,
1
])
indices
=
None
instance_masks
=
None
if
not
use_category
:
classes
=
tf
.
cast
(
tf
.
greater
(
classes
,
0
),
dtype
=
tf
.
float32
)
if
skip_crowd_during_training
:
indices
=
tf
.
where
(
tf
.
logical_not
(
data
[
'groundtruth_is_crowd'
]))
classes
=
tf
.
gather_nd
(
classes
,
indices
)
boxes
=
tf
.
gather_nd
(
boxes
,
indices
)
if
use_instance_mask
:
instance_masks
=
tf
.
gather_nd
(
data
[
'groundtruth_instance_masks'
],
indices
)
return
boxes
,
classes
,
indices
,
instance_masks
def
process_gt_masks_for_training
(
instance_masks
,
boxes
,
gt_mask_size
,
padded_image_size
,
max_num_instances
):
cropped_gt_masks
=
preprocess_ops
.
crop_gt_masks
(
instance_masks
=
instance_masks
,
boxes
=
boxes
,
gt_mask_size
=
gt_mask_size
,
image_size
=
padded_image_size
)
# cropped_gt_masks = tf.reshape(cropped_gt_masks, [max_num_instances, -1])
cropped_gt_masks
=
preprocess_ops
.
pad_to_fixed_size
(
data
=
cropped_gt_masks
,
pad_value
=-
1
,
output_shape
=
[
max_num_instances
,
(
gt_mask_size
+
4
)
**
2
]
)
return
tf
.
reshape
(
cropped_gt_masks
,
[
max_num_instances
,
gt_mask_size
+
4
,
gt_mask_size
+
4
])
def
process_labels_for_training
(
image_info
,
boxes
,
classes
,
score_targets
,
box_targets
,
max_num_instances
,
min_level
,
max_level
):
labels
=
{}
# Pad groundtruth data.
# boxes *= image_info[2]
boxes
=
preprocess_ops
.
pad_to_fixed_size
(
boxes
,
-
1
,
[
max_num_instances
,
4
])
classes
=
preprocess_ops
.
pad_to_fixed_size
(
classes
,
-
1
,
[
max_num_instances
,
1
])
for
level
in
range
(
min_level
,
max_level
+
1
):
labels
[
'score_targets_%d'
%
level
]
=
score_targets
[
level
]
labels
[
'box_targets_%d'
%
level
]
=
box_targets
[
level
]
labels
[
'gt_boxes'
]
=
boxes
labels
[
'gt_classes'
]
=
classes
return
labels
def
process_targets_for_training
(
padded_image_size
,
boxes
,
classes
,
params
):
input_anchors
=
anchors
.
Anchors
(
params
[
'min_level'
],
params
[
'max_level'
],
params
[
'num_scales'
],
params
[
'aspect_ratios'
],
params
[
'anchor_scale'
],
padded_image_size
)
anchor_labeler
=
anchors
.
AnchorLabeler
(
input_anchors
,
params
[
'num_classes'
],
params
[
'rpn_positive_overlap'
],
params
[
'rpn_negative_overlap'
],
params
[
'rpn_batch_size_per_im'
],
params
[
'rpn_fg_fraction'
]
)
return
anchor_labeler
.
label_anchors
(
boxes
,
classes
),
input_anchors
Prev
1
2
3
4
5
…
10
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment