Commit 3c15726c authored by yangzhong's avatar yangzhong
Browse files

git init

parents
## Contributing
The best way to contribute to the MLCommons is to get involved with one of our many project communities. You find more information about getting involved with MLCommons [here](https://mlcommons.org/en/get-involved/#getting-started).
Generally we encourage people to become a MLCommons member if they wish to contribute to MLCommons projects, but outside pull requests are very welcome too.
To get started contributing code, you or your organization needs to sign the MLCommons CLA found at the [MLC policies page](https://mlcommons.org/en/policies/). Once you or your organization has signed the corporate CLA, please fill out this [CLA sign up form](https://forms.gle/Ew1KkBVpyeJDuRw67) form to get your specific GitHub handle authorized so that you can start contributing code under the proper license.
MLCommons project work is tracked with issue trackers and pull requests. Modify the project in your own fork and issue a pull request once you want other developers to take a look at what you have done and discuss the proposed changes. Ensure that cla-bot and other checks pass for your Pull requests.
vars = {
# Pull in chromium build files and tools for multi-platform build support.
'chromium_git': 'https://chromium.googlesource.com/chromium/src',
'mlpth_root': 'src',
}
deps = {
'{mlpth_root}/build': {
'url': '{chromium_git}/build@e3ed5e43c305b353b49e08ac69e7f4d1c2d88ad2'
},
'{mlpth_root}/buildtools': {
'url': '{chromium_git}/buildtools@106e9fce3799633f42b45ca8bbe9e84e1e235603'
},
'{mlpth_root}/tools/clang': {
'url': '{chromium_git}/tools/clang.git@3114fbc11f9644c54dd0a4cdbfa867bac50ff983',
},
'{mlpth_root}/third_party/pybind': {
'url': 'https://github.com/pybind/pybind11.git@v2.2',
},
}
recursedeps = [
'{mlpth_root}/buildtools',
]
hooks = [
# Pull clang-format binaries using checked-in hashes.
{
'name': 'clang_format_win',
'pattern': '.',
'condition': 'host_os == "win"',
'action': [ 'download_from_google_storage',
'--no_resume',
'--platform=win32',
'--no_auth',
'--bucket', 'chromium-clang-format',
'-s', '{mlpth_root}/buildtools/win/clang-format.exe.sha1',
],
},
{
'name': 'clang_format_mac',
'pattern': '.',
'condition': 'host_os == "mac"',
'action': [ 'download_from_google_storage',
'--no_resume',
'--platform=darwin',
'--no_auth',
'--bucket', 'chromium-clang-format',
'-s', '{mlpth_root}/buildtools/mac/clang-format.sha1',
],
},
{
'name': 'clang_format_linux',
'pattern': '.',
'condition': 'host_os == "linux"',
'action': [ 'download_from_google_storage',
'--no_resume',
'--platform=linux*',
'--no_auth',
'--bucket', 'chromium-clang-format',
'-s', '{mlpth_root}/buildtools/linux64/clang-format.sha1',
],
},
# Pull GN using checked-in hashes.
{
'name': 'gn_win',
'pattern': '.',
'condition': 'host_os == "win"',
'action': [ 'download_from_google_storage',
'--no_resume',
'--platform=win32',
'--no_auth',
'--bucket', 'chromium-gn',
'-s', '{mlpth_root}/buildtools/win/gn.exe.sha1',
],
},
{
'name': 'gn_mac',
'pattern': '.',
'condition': 'host_os == "mac"',
'action': [ 'download_from_google_storage',
'--no_resume',
'--platform=darwin',
'--no_auth',
'--bucket', 'chromium-gn',
'-s', '{mlpth_root}/buildtools/mac/gn.sha1',
],
},
{
'name': 'gn_linux',
'pattern': '.',
'condition': 'host_os == "linux"',
'action': [ 'download_from_google_storage',
'--no_resume',
'--platform=linux*',
'--no_auth',
'--bucket', 'chromium-gn',
'-s', '{mlpth_root}/buildtools/linux64/gn.sha1',
],
},
# Pull sysroots.
{
'name': 'sysroot_arm',
'pattern': '.',
'condition': '(checkout_linux and checkout_arm)',
'action': ['python', '{mlpth_root}/build/linux/sysroot_scripts/install-sysroot.py',
'--arch=arm'],
},
{
'name': 'sysroot_arm64',
'pattern': '.',
'condition': '(checkout_linux and checkout_arm64)',
'action': ['python', '{mlpth_root}/build/linux/sysroot_scripts/install-sysroot.py',
'--arch=arm64'],
},
{
'name': 'sysroot_x86',
'pattern': '.',
'condition': '(checkout_linux and (checkout_x86 or checkout_x64))',
'action': ['python', '{mlpth_root}/build/linux/sysroot_scripts/install-sysroot.py',
'--arch=x86'],
},
{
'name': 'sysroot_mips',
'pattern': '.',
'condition': '(checkout_linux and checkout_mips)',
'action': ['python', '{mlpth_root}/build/linux/sysroot_scripts/install-sysroot.py',
'--arch=mips'],
},
{
'name': 'sysroot_x64',
'pattern': '.',
'condition': 'checkout_linux and checkout_x64',
'action': ['python', '{mlpth_root}/build/linux/sysroot_scripts/install-sysroot.py',
'--arch=x64'],
},
{
# Update the Windows toolchain if necessary.
'name': 'win_toolchain',
'pattern': '.',
'condition': 'checkout_win',
'action': ['python', '{mlpth_root}/build/vs_toolchain.py', 'update'],
},
{
'name': 'fuchsia_sdk',
'pattern': '.',
'condition': 'checkout_fuchsia',
'action': [
'python',
'{mlpth_root}/build/fuchsia/update_sdk.py',
],
},
{
# Note: On Win, this should run after win_toolchain, as it may use it.
'name': 'clang',
'pattern': '.',
# clang not supported on aix
'condition': 'host_os != "aix"',
'action': ['python', '{mlpth_root}//tools/clang/scripts/update.py'],
},
]
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
### Retinanet-pytorch inference
##### 1.修改配置文件
配置路径(根据你的实际路径修改)
数据集:open-images-v6-mlperf validation
例:/public/opendas/DL_DATA/open-images-v6-mlperf/validation
开源权重模型文件:resnext50_32x4d_fpn.pth
在/retinanet_infer_pytorch/vision/classification_and_detection/retinanet_acc.sh启动脚本中修改
##### 2.推理
```
cd /retinanet_infer_pytorch/vision/classification_and_detection/
chmod +x retinanet_acc.sh
bash retinanet_acc.sh
```
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# End to End MLPerf Submission example\n",
"\n",
"This is following the [General MLPerf Submission Rules](https://github.com/mlcommons/policies/blob/master/submission_rules.adoc).\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Get the MLPerf Inference Benchmark Suite source code\n",
"\n",
"You run this notebook from the root of the 'mlcommons/inference' repo that you cloned with\n",
"```\n",
"git clone --recurse-submodules https://github.com/mlcommons/inference.git --depth 1\n",
"```"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Build loadgen"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# build loadgen\n",
"!pip install pybind11\n",
"!cd loadgen; CFLAGS=\"-std=c++14 -O3\" python setup.py develop"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"!cd vision/classification_and_detection; python setup.py develop"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Set Working Directory"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%cd vision/classification_and_detection"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Download data\n",
"\n",
"For this example, the ImageNet and/or COCO validation data should already be on the host system. See the [MLPerf Image Classification task](https://github.com/mlcommons/inference/tree/master/vision/classification_and_detection#datasets) for more details on obtaining this. For the following step each validation dataset is stored in /workspace/data/. You should change this to the location in your setup."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%bash\n",
"\n",
"mkdir data\n",
"ln -s /workspace/data/imagenet2012 data/\n",
"ln -s /workspace/data/coco data/"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Download models"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%bash\n",
"\n",
"mkdir models\n",
"\n",
"# resnet50\n",
"wget -q https://zenodo.org/record/2535873/files/resnet50_v1.pb -O models/resnet50_v1.pb \n",
"wget -q https://zenodo.org/record/2592612/files/resnet50_v1.onnx -O models/resnet50_v1.onnx\n",
"\n",
"# ssd-mobilenet\n",
"wget -q http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_coco_2018_01_28.tar.gz -O models/ssd_mobilenet_v1_coco_2018_01_28.tar.gz\n",
"tar zxvf ./models/ssd_mobilenet_v1_coco_2018_01_28.tar.gz -C ./models; mv models/ssd_mobilenet_v1_coco_2018_01_28/frozen_inference_graph.pb ./models/ssd_mobilenet_v1_coco_2018_01_28.pb\n",
"wget -q https://zenodo.org/record/3163026/files/ssd_mobilenet_v1_coco_2018_01_28.onnx -O models/ssd_mobilenet_v1_coco_2018_01_28.onnx \n",
"\n",
"# ssd-resnet34\n",
"wget -q https://zenodo.org/record/3345892/files/tf_ssd_resnet34_22.1.zip -O models/tf_ssd_resnet34_22.1.zip\n",
"unzip ./models/tf_ssd_resnet34_22.1.zip -d ./models; mv models/tf_ssd_resnet34_22.1/resnet34_tf.22.1.pb ./models\n",
"wget -q https://zenodo.org/record/3228411/files/resnet34-ssd1200.onnx -O models/resnet34-ssd1200.onnx"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Run benchmarks using the reference implementation\n",
"\n",
"Lets prepare a submission for ResNet-50 on a cloud datacenter server with a NVIDIA T4 GPU using TensorFlow. \n",
"\n",
"The following script will run those combinations and prepare a submission directory, following the general submission rules documented [here](https://github.com/mlcommons/policies/blob/master/submission_rules.adoc)."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import logging\n",
"import os\n",
"logger = logging.getLogger()\n",
"logger.setLevel(logging.CRITICAL)\n",
"\n",
"os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' \n",
"os.environ['CUDA_VISIBLE_DEVICES'] = \"0\"\n",
"\n",
"# final results go here\n",
"ORG = \"mlperf-org\"\n",
"DIVISION = \"closed\"\n",
"SUBMISSION_ROOT = \"/tmp/mlperf-submission\"\n",
"SUBMISSION_DIR = os.path.join(SUBMISSION_ROOT, DIVISION, ORG)\n",
"os.environ['SUBMISSION_ROOT'] = SUBMISSION_ROOT\n",
"os.environ['SUBMISSION_DIR'] = SUBMISSION_DIR\n",
"os.makedirs(SUBMISSION_DIR, exist_ok=True)\n",
"os.makedirs(os.path.join(SUBMISSION_DIR, \"measurements\"), exist_ok=True)\n",
"os.makedirs(os.path.join(SUBMISSION_DIR, \"code\"), exist_ok=True)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"====== resnet50/SingleStream =====\n",
"TestScenario.SingleStream qps=7322.28, mean=0.0078, time=6.828, acc=76.456%, queries=50000, tiles=50.0:0.0077,80.0:0.0078,90.0:0.0078,95.0:0.0079,99.0:0.0131,99.9:0.0135\n",
"accuracy=76.456%, good=38228, total=50000\n",
"TestScenario.SingleStream qps=125.88, mean=0.0079, time=600.138, queries=75546, tiles=50.0:0.0079,80.0:0.0080,90.0:0.0080,95.0:0.0081,99.0:0.0081,99.9:0.0082\n",
"====== resnet50/Server =====\n",
"TestScenario.Server qps=7528.79, mean=0.0832, time=6.641, acc=76.456%, queries=50000, tiles=50.0:0.0809,80.0:0.0922,90.0:0.0932,95.0:0.0941,99.0:0.0963,99.9:0.1022\n",
"accuracy=76.456%, good=38228, total=50000\n",
"TestScenario.Server qps=128.84, mean=116.7138, time=2098.285, queries=270336, tiles=50.0:115.9511,80.0:185.2868,90.0:209.0362,95.0:220.8464,99.0:230.0520,99.9:231.5965\n",
"====== resnet50/Offline =====\n",
"TestScenario.Offline qps=2008.52, mean=0.3050, time=3.112, acc=76.456%, queries=6250, tiles=50.0:0.3017,80.0:0.3416,90.0:0.3465,95.0:0.3525,99.0:0.3646,99.9:1.2464\n",
"accuracy=76.456%, good=38228, total=50000\n",
"TestScenario.Offline qps=285.33, mean=1157.2775, time=2313.086, queries=660000, tiles=50.0:1157.2701,80.0:1850.5871,90.0:2081.7068,95.0:2197.3040,99.0:2289.7431,99.9:2310.5646\n",
"====== resnet50/MultiStream =====\n",
"TestScenario.MultiStream qps=1891.35, mean=0.0879, time=3.357, acc=76.447%, queries=6350, tiles=50.0:0.1002,80.0:0.1265,90.0:0.1311,95.0:0.1321,99.0:0.1356,99.9:0.1422\n",
"accuracy=76.456%, good=38228, total=50000\n",
"TestScenario.MultiStream qps=266.63, mean=0.0904, time=40555.550, queries=10813440, tiles=50.0:0.1050,80.0:0.1289,90.0:0.1369,95.0:0.1376,99.0:0.1386,99.9:0.1399\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:main:Namespace(accuracy=True, backend='tensorflow', cache=0, count=None, data_format=None, dataset='imagenet', dataset_list=None, dataset_path='/workspace/inference/vision/classification_and_detection/data/imagenet2012', find_peak_performance=False, inputs=['input_tensor:0'], max_batchsize=8, max_latency=0.0005, mlperf_conf='../../mlperf.conf', model='/workspace/inference/vision/classification_and_detection/models/resnet50_v1.pb', model_name='resnet50', output='/tmp/mlperf-submission/closed/mlperf-org/results/tf-gpu/resnet/SingleStream/accuracy', outputs=['ArgMax:0'], profile='resnet50-tf', qps=145, samples_per_query=40, scenario='SingleStream', threads=2, time=None, user_conf='user.conf')\n",
"INFO:imagenet:loaded 50000 images, cache=0, took=419.7sec\n",
"INFO:main:starting TestScenario.SingleStream\n",
"INFO:main:Namespace(accuracy=False, backend='tensorflow', cache=0, count=None, data_format=None, dataset='imagenet', dataset_list=None, dataset_path='/workspace/inference/vision/classification_and_detection/data/imagenet2012', find_peak_performance=False, inputs=['input_tensor:0'], max_batchsize=8, max_latency=0.0005, mlperf_conf='../../mlperf.conf', model='/workspace/inference/vision/classification_and_detection/models/resnet50_v1.pb', model_name='resnet50', output='/tmp/mlperf-submission/closed/mlperf-org/results/tf-gpu/resnet/SingleStream/performance/run_1', outputs=['ArgMax:0'], profile='resnet50-tf', qps=145, samples_per_query=40, scenario='SingleStream', threads=2, time=None, user_conf='user.conf')\n",
"INFO:imagenet:loaded 50000 images, cache=0, took=1.1sec\n",
"INFO:main:starting TestScenario.SingleStream\n",
"INFO:main:Namespace(accuracy=True, backend='tensorflow', cache=0, count=None, data_format=None, dataset='imagenet', dataset_list=None, dataset_path='/workspace/inference/vision/classification_and_detection/data/imagenet2012', find_peak_performance=False, inputs=['input_tensor:0'], max_batchsize=8, max_latency=None, mlperf_conf='../../mlperf.conf', model='/workspace/inference/vision/classification_and_detection/models/resnet50_v1.pb', model_name='resnet50', output='/tmp/mlperf-submission/closed/mlperf-org/results/tf-gpu/resnet/Server/accuracy', outputs=['ArgMax:0'], profile='resnet50-tf', qps=145, samples_per_query=40, scenario='Server', threads=2, time=None, user_conf='user.conf')\n",
"INFO:imagenet:loaded 50000 images, cache=0, took=1.1sec\n",
"INFO:main:starting TestScenario.Server\n",
"INFO:main:Namespace(accuracy=False, backend='tensorflow', cache=0, count=None, data_format=None, dataset='imagenet', dataset_list=None, dataset_path='/workspace/inference/vision/classification_and_detection/data/imagenet2012', find_peak_performance=False, inputs=['input_tensor:0'], max_batchsize=8, max_latency=None, mlperf_conf='../../mlperf.conf', model='/workspace/inference/vision/classification_and_detection/models/resnet50_v1.pb', model_name='resnet50', output='/tmp/mlperf-submission/closed/mlperf-org/results/tf-gpu/resnet/Server/performance/run_1', outputs=['ArgMax:0'], profile='resnet50-tf', qps=145, samples_per_query=40, scenario='Server', threads=2, time=None, user_conf='user.conf')\n",
"INFO:imagenet:loaded 50000 images, cache=0, took=1.1sec\n",
"INFO:main:starting TestScenario.Server\n",
"INFO:main:Namespace(accuracy=True, backend='tensorflow', cache=0, count=None, data_format=None, dataset='imagenet', dataset_list=None, dataset_path='/workspace/inference/vision/classification_and_detection/data/imagenet2012', find_peak_performance=False, inputs=['input_tensor:0'], max_batchsize=8, max_latency=None, mlperf_conf='../../mlperf.conf', model='/workspace/inference/vision/classification_and_detection/models/resnet50_v1.pb', model_name='resnet50', output='/tmp/mlperf-submission/closed/mlperf-org/results/tf-gpu/resnet/Offline/accuracy', outputs=['ArgMax:0'], profile='resnet50-tf', qps=1000, samples_per_query=40, scenario='Offline', threads=2, time=None, user_conf='user.conf')\n",
"INFO:imagenet:loaded 50000 images, cache=0, took=1.1sec\n",
"INFO:main:starting TestScenario.Offline\n",
"INFO:main:Namespace(accuracy=False, backend='tensorflow', cache=0, count=None, data_format=None, dataset='imagenet', dataset_list=None, dataset_path='/workspace/inference/vision/classification_and_detection/data/imagenet2012', find_peak_performance=False, inputs=['input_tensor:0'], max_batchsize=8, max_latency=None, mlperf_conf='../../mlperf.conf', model='/workspace/inference/vision/classification_and_detection/models/resnet50_v1.pb', model_name='resnet50', output='/tmp/mlperf-submission/closed/mlperf-org/results/tf-gpu/resnet/Offline/performance/run_1', outputs=['ArgMax:0'], profile='resnet50-tf', qps=1000, samples_per_query=40, scenario='Offline', threads=2, time=None, user_conf='user.conf')\n",
"INFO:imagenet:loaded 50000 images, cache=0, took=1.1sec\n",
"INFO:main:starting TestScenario.Offline\n",
"INFO:main:Namespace(accuracy=True, backend='tensorflow', cache=0, count=None, data_format=None, dataset='imagenet', dataset_list=None, dataset_path='/workspace/inference/vision/classification_and_detection/data/imagenet2012', find_peak_performance=False, inputs=['input_tensor:0'], max_batchsize=8, max_latency=None, mlperf_conf='../../mlperf.conf', model='/workspace/inference/vision/classification_and_detection/models/resnet50_v1.pb', model_name='resnet50', output='/tmp/mlperf-submission/closed/mlperf-org/results/tf-gpu/resnet/MultiStream/accuracy', outputs=['ArgMax:0'], profile='resnet50-tf', qps=145, samples_per_query=40, scenario='MultiStream', threads=2, time=None, user_conf='user.conf')\n",
"INFO:imagenet:loaded 50000 images, cache=0, took=1.1sec\n",
"INFO:main:starting TestScenario.MultiStream\n",
"INFO:main:Namespace(accuracy=False, backend='tensorflow', cache=0, count=None, data_format=None, dataset='imagenet', dataset_list=None, dataset_path='/workspace/inference/vision/classification_and_detection/data/imagenet2012', find_peak_performance=False, inputs=['input_tensor:0'], max_batchsize=8, max_latency=None, mlperf_conf='../../mlperf.conf', model='/workspace/inference/vision/classification_and_detection/models/resnet50_v1.pb', model_name='resnet50', output='/tmp/mlperf-submission/closed/mlperf-org/results/tf-gpu/resnet/MultiStream/performance/run_1', outputs=['ArgMax:0'], profile='resnet50-tf', qps=145, samples_per_query=40, scenario='MultiStream', threads=2, time=None, user_conf='user.conf')\n",
"INFO:imagenet:loaded 50000 images, cache=0, took=1.1sec\n",
"INFO:main:starting TestScenario.MultiStream\n"
]
}
],
"source": [
"%%bash\n",
"\n",
"# where to find stuff\n",
"export DATA_ROOT=`pwd`/data\n",
"export MODEL_DIR=`pwd`/models\n",
"\n",
"# options for official runs\n",
"gopt=\"--max-batchsize 8 --samples-per-query 40 --threads 2 --qps 145\"\n",
"\n",
"\n",
"function one_run {\n",
" # args: mode count framework device model ...\n",
" scenario=$1; shift\n",
" count=$1; shift\n",
" framework=$1\n",
" device=$2\n",
" model=$3\n",
" system_id=$framework-$device\n",
" echo \"====== $model/$scenario =====\"\n",
"\n",
" case $model in \n",
" resnet50) \n",
" cmd=\"tools/accuracy-imagenet.py --imagenet-val-file $DATA_ROOT/imagenet2012/val_map.txt\"\n",
" offical_name=\"resnet\";;\n",
" ssd-mobilenet) \n",
" cmd=\"tools/accuracy-coco.py --coco-dir $DATA_ROOT/coco\"\n",
" offical_name=\"ssd-small\";;\n",
" ssd-resnet34) \n",
" cmd=\"tools/accuracy-coco.py --coco-dir $DATA_ROOT/coco\"\n",
" offical_name=\"ssd-large\";;\n",
" esac\n",
" output_dir=$SUBMISSION_DIR/results/$system_id/$offical_name\n",
" \n",
" # accuracy run\n",
" ./run_local.sh $@ --scenario $scenario --accuracy --output $output_dir/$scenario/accuracy\n",
" python $cmd --mlperf-accuracy-file $output_dir/$scenario/accuracy/mlperf_log_accuracy.json \\\n",
" > $output_dir/$scenario/accuracy/accuracy.txt\n",
" cat $output_dir/$scenario/accuracy/accuracy.txt\n",
"\n",
" # performance run\n",
" cnt=0\n",
" while [ $cnt -lt $count ]; do\n",
" let cnt=cnt+1\n",
" ./run_local.sh $@ --scenario $scenario --output $output_dir/$scenario/performance/run_$cnt\n",
" done\n",
" \n",
" # setup the measurements directory\n",
" mdir=$SUBMISSION_DIR/measurements/$system_id/$offical_name/$scenario\n",
" mkdir -p $mdir\n",
" cp ../../mlperf.conf $mdir\n",
"\n",
" # reference app uses command line instead of user.conf\n",
" echo \"# empty\" > $mdir/user.conf\n",
" touch $mdir/README.md\n",
" impid=\"reference\"\n",
" cat > $mdir/$system_id\"_\"$impid\"_\"$scenario\".json\" <<EOF\n",
" {\n",
" \"input_data_types\": \"fp32\",\n",
" \"retraining\": \"none\",\n",
" \"starting_weights_filename\": \"https://zenodo.org/record/2535873/files/resnet50_v1.pb\",\n",
" \"weight_data_types\": \"fp32\",\n",
" \"weight_transformations\": \"none\"\n",
" }\n",
"EOF\n",
"}\n",
"\n",
"function one_model {\n",
" # args: framework device model ...\n",
" one_run SingleStream 1 $@ --max-latency 0.0005\n",
" one_run Server 1 $@\n",
" one_run Offline 1 $@ --qps 1000\n",
" one_run MultiStream 1 $@\n",
"}\n",
"\n",
"\n",
"# run image classifier benchmarks \n",
"export DATA_DIR=$DATA_ROOT/imagenet2012\n",
"one_model tf gpu resnet50 $gopt"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"There might be large trace files in the submission directory - we can delete them."
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"!find {SUBMISSION_DIR}/ -name mlperf_log_trace.json -delete"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Complete submission directory\n",
"\n",
"Add the required meta data to the submission."
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"%%bash\n",
"\n",
"#\n",
"# setup systems directory\n",
"#\n",
"if [ ! -d ${SUBMISSION_DIR}/systems ]; then\n",
" mkdir ${SUBMISSION_DIR}/systems\n",
"fi\n",
"\n",
"cat > ${SUBMISSION_DIR}/systems/tf-gpu.json <<EOF\n",
"{\n",
" \"division\": \"closed\",\n",
" \"status\": \"available\",\n",
" \"submitter\": \"mlperf-org\",\n",
" \"system_name\": \"tf-gpu\",\n",
" \"system_type\": \"datacenter\",\n",
" \n",
" \"number_of_nodes\": 1,\n",
" \"host_memory_capacity\": \"32GB\",\n",
" \"host_processor_core_count\": 1,\n",
" \"host_processor_frequency\": \"3.50GHz\",\n",
" \"host_processor_model_name\": \"Intel(R) Xeon(R) CPU E5-1620 v3 @ 3.50GHz\",\n",
" \"host_processors_per_node\": 1,\n",
" \"host_storage_capacity\": \"512GB\",\n",
" \"host_storage_type\": \"SSD\",\n",
" \n",
" \"accelerator_frequency\": \"-\",\n",
" \"accelerator_host_interconnect\": \"-\",\n",
" \"accelerator_interconnect\": \"-\",\n",
" \"accelerator_interconnect_topology\": \"-\",\n",
" \"accelerator_memory_capacity\": \"16GB\",\n",
" \"accelerator_memory_configuration\": \"none\",\n",
" \"accelerator_model_name\": \"T4\",\n",
" \"accelerator_on-chip_memories\": \"-\",\n",
" \"accelerators_per_node\": 1,\n",
"\n",
" \"framework\": \"v1.14.0-rc1-22-gaf24dc9\",\n",
" \"operating_system\": \"ubuntu-18.04\",\n",
" \"other_software_stack\": \"cuda-11.2\",\n",
" \"sw_notes\": \"\"\n",
"}\n",
"EOF"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"%%bash\n",
"\n",
"#\n",
"# setup code directory\n",
"#\n",
"dir=${SUBMISSION_DIR}/code/resnet/reference\n",
"mkdir -p $dir\n",
"echo \"git clone https://github.com/mlcommons/inference.git\" > $dir/VERSION.txt\n",
"git rev-parse HEAD >> $dir/VERSION.txt"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### What's in the submission directory now ?\n"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/tmp/mlperf-submission/closed/mlperf-org/systems/tf-gpu.json\r\n",
"/tmp/mlperf-submission/closed/mlperf-org/measurements/tf-gpu/resnet/Offline/user.conf\r\n",
"/tmp/mlperf-submission/closed/mlperf-org/measurements/tf-gpu/resnet/Offline/mlperf.conf\r\n",
"/tmp/mlperf-submission/closed/mlperf-org/measurements/tf-gpu/resnet/Offline/README.md\r\n",
"/tmp/mlperf-submission/closed/mlperf-org/measurements/tf-gpu/resnet/Offline/tf-gpu_reference_Offline.json\r\n",
"/tmp/mlperf-submission/closed/mlperf-org/measurements/tf-gpu/resnet/SingleStream/user.conf\r\n",
"/tmp/mlperf-submission/closed/mlperf-org/measurements/tf-gpu/resnet/SingleStream/mlperf.conf\r\n",
"/tmp/mlperf-submission/closed/mlperf-org/measurements/tf-gpu/resnet/SingleStream/tf-gpu_reference_SingleStream.json\r\n",
"/tmp/mlperf-submission/closed/mlperf-org/measurements/tf-gpu/resnet/SingleStream/README.md\r\n",
"/tmp/mlperf-submission/closed/mlperf-org/measurements/tf-gpu/resnet/MultiStream/user.conf\r\n",
"/tmp/mlperf-submission/closed/mlperf-org/measurements/tf-gpu/resnet/MultiStream/mlperf.conf\r\n",
"/tmp/mlperf-submission/closed/mlperf-org/measurements/tf-gpu/resnet/MultiStream/tf-gpu_reference_MultiStream.json\r\n",
"/tmp/mlperf-submission/closed/mlperf-org/measurements/tf-gpu/resnet/MultiStream/README.md\r\n",
"/tmp/mlperf-submission/closed/mlperf-org/measurements/tf-gpu/resnet/Server/user.conf\r\n",
"/tmp/mlperf-submission/closed/mlperf-org/measurements/tf-gpu/resnet/Server/mlperf.conf\r\n",
"/tmp/mlperf-submission/closed/mlperf-org/measurements/tf-gpu/resnet/Server/README.md\r\n",
"/tmp/mlperf-submission/closed/mlperf-org/measurements/tf-gpu/resnet/Server/tf-gpu_reference_Server.json\r\n",
"/tmp/mlperf-submission/closed/mlperf-org/results/tf-gpu/resnet/Offline/performance/run_1/results.json\r\n",
"/tmp/mlperf-submission/closed/mlperf-org/results/tf-gpu/resnet/Offline/performance/run_1/mlperf_log_summary.txt\r\n",
"/tmp/mlperf-submission/closed/mlperf-org/results/tf-gpu/resnet/Offline/performance/run_1/mlperf_log_detail.txt\r\n",
"/tmp/mlperf-submission/closed/mlperf-org/results/tf-gpu/resnet/Offline/performance/run_1/mlperf_log_accuracy.json\r\n",
"/tmp/mlperf-submission/closed/mlperf-org/results/tf-gpu/resnet/Offline/accuracy/results.json\r\n",
"/tmp/mlperf-submission/closed/mlperf-org/results/tf-gpu/resnet/Offline/accuracy/accuracy.txt\r\n",
"/tmp/mlperf-submission/closed/mlperf-org/results/tf-gpu/resnet/Offline/accuracy/mlperf_log_summary.txt\r\n",
"/tmp/mlperf-submission/closed/mlperf-org/results/tf-gpu/resnet/Offline/accuracy/mlperf_log_detail.txt\r\n",
"/tmp/mlperf-submission/closed/mlperf-org/results/tf-gpu/resnet/Offline/accuracy/mlperf_log_accuracy.json\r\n",
"/tmp/mlperf-submission/closed/mlperf-org/results/tf-gpu/resnet/SingleStream/performance/run_1/results.json\r\n",
"/tmp/mlperf-submission/closed/mlperf-org/results/tf-gpu/resnet/SingleStream/performance/run_1/mlperf_log_summary.txt\r\n",
"/tmp/mlperf-submission/closed/mlperf-org/results/tf-gpu/resnet/SingleStream/performance/run_1/mlperf_log_detail.txt\r\n",
"/tmp/mlperf-submission/closed/mlperf-org/results/tf-gpu/resnet/SingleStream/performance/run_1/mlperf_log_accuracy.json\r\n",
"/tmp/mlperf-submission/closed/mlperf-org/results/tf-gpu/resnet/SingleStream/accuracy/results.json\r\n",
"/tmp/mlperf-submission/closed/mlperf-org/results/tf-gpu/resnet/SingleStream/accuracy/accuracy.txt\r\n",
"/tmp/mlperf-submission/closed/mlperf-org/results/tf-gpu/resnet/SingleStream/accuracy/mlperf_log_summary.txt\r\n",
"/tmp/mlperf-submission/closed/mlperf-org/results/tf-gpu/resnet/SingleStream/accuracy/mlperf_log_detail.txt\r\n",
"/tmp/mlperf-submission/closed/mlperf-org/results/tf-gpu/resnet/SingleStream/accuracy/mlperf_log_accuracy.json\r\n",
"/tmp/mlperf-submission/closed/mlperf-org/results/tf-gpu/resnet/MultiStream/performance/run_1/results.json\r\n",
"/tmp/mlperf-submission/closed/mlperf-org/results/tf-gpu/resnet/MultiStream/performance/run_1/mlperf_log_summary.txt\r\n",
"/tmp/mlperf-submission/closed/mlperf-org/results/tf-gpu/resnet/MultiStream/performance/run_1/mlperf_log_detail.txt\r\n",
"/tmp/mlperf-submission/closed/mlperf-org/results/tf-gpu/resnet/MultiStream/performance/run_1/mlperf_log_accuracy.json\r\n",
"/tmp/mlperf-submission/closed/mlperf-org/results/tf-gpu/resnet/MultiStream/accuracy/results.json\r\n",
"/tmp/mlperf-submission/closed/mlperf-org/results/tf-gpu/resnet/MultiStream/accuracy/accuracy.txt\r\n",
"/tmp/mlperf-submission/closed/mlperf-org/results/tf-gpu/resnet/MultiStream/accuracy/mlperf_log_summary.txt\r\n",
"/tmp/mlperf-submission/closed/mlperf-org/results/tf-gpu/resnet/MultiStream/accuracy/mlperf_log_detail.txt\r\n",
"/tmp/mlperf-submission/closed/mlperf-org/results/tf-gpu/resnet/MultiStream/accuracy/mlperf_log_accuracy.json\r\n",
"/tmp/mlperf-submission/closed/mlperf-org/results/tf-gpu/resnet/Server/performance/run_1/results.json\r\n",
"/tmp/mlperf-submission/closed/mlperf-org/results/tf-gpu/resnet/Server/performance/run_1/mlperf_log_summary.txt\r\n",
"/tmp/mlperf-submission/closed/mlperf-org/results/tf-gpu/resnet/Server/performance/run_1/mlperf_log_detail.txt\r\n",
"/tmp/mlperf-submission/closed/mlperf-org/results/tf-gpu/resnet/Server/performance/run_1/mlperf_log_accuracy.json\r\n",
"/tmp/mlperf-submission/closed/mlperf-org/results/tf-gpu/resnet/Server/accuracy/results.json\r\n",
"/tmp/mlperf-submission/closed/mlperf-org/results/tf-gpu/resnet/Server/accuracy/accuracy.txt\r\n",
"/tmp/mlperf-submission/closed/mlperf-org/results/tf-gpu/resnet/Server/accuracy/mlperf_log_summary.txt\r\n",
"/tmp/mlperf-submission/closed/mlperf-org/results/tf-gpu/resnet/Server/accuracy/mlperf_log_detail.txt\r\n",
"/tmp/mlperf-submission/closed/mlperf-org/results/tf-gpu/resnet/Server/accuracy/mlperf_log_accuracy.json\r\n",
"/tmp/mlperf-submission/closed/mlperf-org/code/resnet/reference/VERSION.txt\r\n"
]
}
],
"source": [
"!find {SUBMISSION_ROOT}/ -type f"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"If we look at some files:"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"-- SingleStream Accuracy\n",
"accuracy=76.456%, good=38228, total=50000\n",
"\n",
"-- SingleStream Summary\n",
"================================================\n",
"MLPerf Results Summary\n",
"================================================\n",
"SUT name : PySUT\n",
"Scenario : SingleStream\n",
"Mode : PerformanceOnly\n",
"90th percentile latency (ns) : 8030958\n",
"Result is : VALID\n",
" Min duration satisfied : Yes\n",
" Min queries satisfied : Yes\n",
"\n",
"-- Server Summary\n",
"================================================\n",
"MLPerf Results Summary\n",
"================================================\n",
"SUT name : PySUT\n",
"Scenario : Server\n",
"Mode : PerformanceOnly\n",
"Scheduled samples per second : 144.87\n",
"Result is : INVALID\n",
" Performance constraints satisfied : NO\n",
" Min duration satisfied : Yes\n"
]
}
],
"source": [
"!echo \"-- SingleStream Accuracy\"; head {SUBMISSION_DIR}/results/tf-gpu/resnet/SingleStream/accuracy/accuracy.txt\n",
"!echo \"\\n-- SingleStream Summary\"; head {SUBMISSION_DIR}/results/tf-gpu/resnet/SingleStream/performance/run_1/mlperf_log_summary.txt\n",
"!echo \"\\n-- Server Summary\"; head {SUBMISSION_DIR}/results/tf-gpu/resnet/Server/performance/run_1/mlperf_log_summary.txt"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Run the submission checker\n",
"\n",
"Finally, run the submission checker tool that does some sanity checking on your submission.\n",
"We run it at the end and attach the output to the submission."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!python ../../tools/submission/submission-checker.py --input {SUBMISSION_ROOT} > {SUBMISSION_DIR}/submission-checker.log 2>&1 \n",
"!cat {SUBMISSION_DIR}/submission-checker.log"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
},
"varInspector": {
"cols": {
"lenName": 16,
"lenType": 16,
"lenVar": 40
},
"kernels_config": {
"python": {
"delete_cmd_postfix": "",
"delete_cmd_prefix": "del ",
"library": "var_list.py",
"varRefreshCmd": "print(var_dic_list())"
},
"r": {
"delete_cmd_postfix": ") ",
"delete_cmd_prefix": "rm(",
"library": "var_list.r",
"varRefreshCmd": "cat(var_dic_list()) "
}
},
"types_to_exclude": [
"module",
"function",
"builtin_function_or_method",
"instance",
"_Feature"
],
"window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 2
}
## Submission Rules
The MLPerf inference submission rules are spread between the [MLCommons policies](https://github.com/mlcommons/policies/blob/master/submission_rules.adoc) and the [MLCommons Inference policies](https://github.com/mlcommons/inference_policies/blob/master/inference_rules.adoc) documents. Further, the rules related to power submissions are given [here](https://github.com/mlcommons/inference_policies/blob/master/power_measurement.adoc). The below points are a summary taken from the official rules to act as a checklist for the submitter - please see the original rules for any clarification.
## Hardware requirements
1. MLCommons inference results can be submitted on any hardware and we have past results from Raspberry Pi to high-end inference servers.
2. Closed category submission for datacenter category needs **ECC RAM** and also needs to have the **networking** capabilities as detailed [here](https://github.com/mlcommons/inference_policies/blob/master/inference_rules.adoc#networking-from-the-v30-round)
3. Power submissions need an [approved power analyzer](https://github.com/mlcommons/inference_policies/blob/master/power_measurement.adoc#74-which-power-analyzers-aka-meters-are-supported).
## Things to Know
1. Closed submission needs performance and accuracy run for all the required scenarios (as per edge/datacenter category) with accuracy within 99% or 99.9% as given in the respective task READMEs. Further, the model weights are not supposed to be altered except for quantization. If any of these constraints are not met, the submission cannot go under closed division but can still be submitted under open division.
2. Reference models are mostly fp32 and reference implementations are just for reference and not meant to be directly used by submitters as they are not optimized for performance.
3. Calibration document due **one week** before the submission deadline
4. Power submission needs a power analyzer (approved by SPEC Power) and EULA signature to get access to SPEC PTDaemon
5. To submit under the `available` category your submission system must be available (in whole or in parts and either publicly or to customers) and the software used must be either open source or an **official or beta release** as on the submission deadline. Submissions using nightly release for example cannot be submitted under the available category.
### Is there an automatic way to run the MLPerf inference benchmarks?
MLPerf inference submissions are expected to be run on various hardware and supported software stacks. Therefore, MLCommons provides only reference implementations to guide submitters in creating optimal implementations for their specific software and hardware configurations. Additionally, all implementations used for MLPerf inference submissions are available in the MLCommons [Inference results](https://github.com/orgs/mlcommons/repositories?q=inference_results_v+sort%3Aname) repositories (under `closed/<submitter>/code` directory), offering further guidance for submitters developing their own implementations.
### Expected time to do benchmark runs
1. Closed submission under datacenter needs offline and server scenario runs with a minimum of ten minutes needed for both.
2. Closed submission under the edge category needs single stream, multi-stream (only for R50 and retinanet), and offline scenarios. A minimum of ten minutes is needed for each scenario.
3. Further two (three for ResNet50) compliance runs are needed for closed division, each taking at least 10 minutes for each scenario.
4. SingleStream, MultiStream and Server scenarios use early stopping and so can always finish around 10 minutes
5. Offline scenario needs a minimum of 24756 input queries to be processed -- can take hours for low-performing models like 3dunet, LLMs, etc.
6. Open division has no accuracy constraints, no required compliance runs, and can be submitted for any single scenario. There is no constraint on the model used except that the model accuracy must be validated on the accuracy dataset used in the corresponding MLPerf inference task [or must be preapproved](https://github.com/mlcommons/inference_policies/blob/master/inference_rules.adoc#412-relaxed-constraints-for-the-open-division).
7. Power submission needs an extra ranging mode to determine the peak current usage and this often doubles the overall experiment run time. If this overhead is too much, ranging run can be reduced to 5 minutes run using mechanisms like [this](https://github.com/mlcommons/cm4mlops/blob/main/script/benchmark-program-mlperf/customize.py#L18).
## Validity of the submission
1. [MLCommons Inference submission checker](https://github.com/mlcommons/inference/blob/master/tools/submission/submission_checker.py) is provided to ensure that all submissions are passing the required checks.
2. In the unlikely event that there is an error on the submission checker for your submission, please raise a GitHub issue [here](https://github.com/mlcommons/inference/issues)
3. Any submission passing the submission checker is valid to go to the review discussions but submitters are still required to answer any queries and fix any issues being reported by other submitters.
### Reviewing other submissions
1. Ensure that the `system_desc_id.json` file is having meaningful responses - `submission_checker` only checks for the existence of the fields.
2. For power submissions, `power settings` and `analyzer table` files are to be submitted, and even though the submission checker checks for the existence of these files, the content of [these files](https://github.com/mlcommons/inference_policies/blob/master/power_measurement.adoc#64-power-management-settings) must be checked manually for validity.
3. README files in the submission directory must be checked to make sure that the instructions are reproducible.
4. For closed datacenter submissions, [ECC RAM and Networking requirements](https://github.com/mlcommons/inference_policies/blob/master/inference_rules.adoc#constraints-for-the-closed-division) must be ensured.
5. Submission checker might be reporting warnings and some of these warnings can warrant an answer from the submitter.
## Changes from MLCommons Inference 4.0
1. One new benchmark in the datacenter category: Mixtral-8x7B. No changes in the edge category.
2. For power submissions, there is no code change.
## Reference implementation fo automotive 3D detection benchmark
## Dataset and model checkpoints
Contact MLCommons support for accessing the Waymo Open Dataset along with the model checkpoints for the reference implementation. You will need to accept a license agreement and will be given directions to download the data. You will need to place the kitti_format folder under a directory named waymo. There are four total checkpoints 2 for pytorch and 2 for onnx.
## Running with docker
Build the container and mount the inference repo and Waymo dataset directory.
```
docker build -t auto_inference -f dockerfile.gpu .
docker run --gpus=all -it -v <directory to inference repo>/inference/:/inference -v <path to waymo dataset>/waymo:/waymo --rm auto_inference
```
### Run with GPU
```
cd /inference/automotive/3d-object-detection
python main.py --dataset waymo --dataset-path /waymo/kitti_format/ --lidar-path <checkpoint_path>/pp_ep36.pth --segmentor-path <checkpoint_path>/best_deeplabv3plus_resnet50_waymo_os16.pth --mlperf_conf /inference/mlperf.conf
```
### Run with CPU and ONNX
```
python main.py --dataset waymo --dataset-path /waymo/kitti_format/ --lidar-path <checkpoint_path>/pp.onnx --segmentor-path <checkpoint_path>/deeplabv3+.onnx --mlperf_conf /inference/mlperf.conf
```
### Run the accuracy checker
```
python accuracy_waymo.py --mlperf-accuracy-file <path to accuracy file>/mlperf_log_accuracy.json --waymo-dir /waymo/kitti_format/
```
"""
Tool to calculate accuracy for loadgen accuracy output found in mlperf_log_accuracy.json
We assume that loadgen's query index is in the same order as
the images in coco's annotations/instances_val2017.json.
"""
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import argparse
import json
import os
import numpy as np
from waymo import Waymo
from tools.evaluate import do_eval
# pylint: disable=missing-docstring
CLASSES = Waymo.CLASSES
LABEL2CLASSES = {v: k for k, v in CLASSES.items()}
def get_args():
"""Parse commandline."""
parser = argparse.ArgumentParser()
parser.add_argument(
"--mlperf-accuracy-file",
required=True,
help="path to mlperf_log_accuracy.json")
parser.add_argument(
"--waymo-dir",
required=True,
help="waymo dataset directory")
parser.add_argument(
"--verbose",
action="store_true",
help="verbose messages")
parser.add_argument(
"--output-file",
default="openimages-results.json",
help="path to output file")
parser.add_argument(
"--use-inv-map",
action="store_true",
help="use inverse label map")
args = parser.parse_args()
return args
def main():
args = get_args()
with open(args.mlperf_accuracy_file, "r") as f:
results = json.load(f)
detections = {}
image_ids = set()
seen = set()
no_results = 0
val_dataset = Waymo(
data_root=args.waymo_dir,
split='val',
painted=True,
cam_sync=False)
for j in results:
idx = j['qsl_idx']
# de-dupe in case loadgen sends the same image multiple times
if idx in seen:
continue
seen.add(idx)
# reconstruct from mlperf accuracy log
# what is written by the benchmark is an array of float32's:
# id, box[0], box[1], box[2], box[3], score, detection_class
# note that id is a index into instances_val2017.json, not the actual
# image_id
data = np.frombuffer(bytes.fromhex(j['data']), np.float32)
for i in range(0, len(data), 14):
dimension = [float(x) for x in data[i:i + 3]]
location = [float(x) for x in data[i + 3:i + 6]]
rotation_y = float(data[i + 6])
bbox = [float(x) for x in data[i + 7:i + 11]]
label = int(data[i + 11])
score = float(data[i + 12])
image_idx = int(data[i + 13])
if image_idx not in detections:
detections[image_idx] = {
'name': [],
'dimensions': [],
'location': [],
'rotation_y': [],
'bbox': [],
'score': []
}
if dimension[0] > 0:
detections[image_idx]['name'].append(LABEL2CLASSES[label])
detections[image_idx]['dimensions'].append(dimension)
detections[image_idx]['location'].append(location)
detections[image_idx]['rotation_y'].append(rotation_y)
detections[image_idx]['bbox'].append(bbox)
detections[image_idx]['score'].append(score)
image_ids.add(image_idx)
with open(args.output_file, "w") as fp:
json.dump(detections, fp, sort_keys=True, indent=4)
format_results = {}
for key in detections.keys():
format_results[key] = {k: np.array(v)
for k, v in detections[key].items()}
map_stats = do_eval(
format_results,
val_dataset.data_infos,
CLASSES,
cam_sync=False)
map_stats['Total'] = np.mean(list(map_stats.values()))
print(map_stats)
if args.verbose:
print("found {} results".format(len(results)))
print("found {} images".format(len(image_ids)))
print("found {} images with no results".format(no_results))
print("ignored {} dupes".format(len(results) - len(seen)))
if __name__ == "__main__":
main()
"""
abstract backend class
"""
class Backend:
def __init__(self):
self.inputs = []
self.outputs = []
def version(self):
raise NotImplementedError("Backend:version")
def name(self):
raise NotImplementedError("Backend:name")
def load(self, model_path, inputs=None, outputs=None):
raise NotImplementedError("Backend:load")
def predict(self, feed):
raise NotImplementedError("Backend:predict")
import torch
import backend
class BackendDebug(backend.Backend):
def __init__(self, image_size=[3, 1024, 1024], **kwargs):
super(BackendDebug, self).__init__()
self.image_size = image_size
def version(self):
return torch.__version__
def name(self):
return "debug-SUT"
def image_format(self):
return "NCHW"
def load(self):
return self
def predict(self, prompts):
images = []
return images
from typing import Optional, List, Union
import os
import torch
import logging
import backend
from collections import namedtuple
from model.painter import Painter
from model.pointpillars import PointPillars
import numpy as np
from tools.process import keep_bbox_from_image_range
from waymo import Waymo
logging.basicConfig(level=logging.INFO)
log = logging.getLogger("backend-pytorch")
def change_calib_device(calib, cuda):
result = {}
if cuda:
device = 'cuda'
else:
device = 'cpu'
result['R0_rect'] = calib['R0_rect'].to(device=device, dtype=torch.float)
for i in range(5):
result['P' + str(i)] = calib['P' + str(i)
].to(device=device, dtype=torch.float)
result['Tr_velo_to_cam_' +
str(i)] = calib['Tr_velo_to_cam_' +
str(i)].to(device=device, dtype=torch.float)
return result
class BackendDeploy(backend.Backend):
def __init__(
self,
segmentor_path,
lidar_detector_path,
data_path
):
super(BackendDeploy, self).__init__()
self.segmentor_path = segmentor_path
self.lidar_detector_path = lidar_detector_path
# self.segmentation_classes = 18
self.detection_classes = 3
self.data_root = data_path
CLASSES = Waymo.CLASSES
self.LABEL2CLASSES = {v: k for k, v in CLASSES.items()}
def version(self):
return torch.__version__
def name(self):
return "python-SUT"
def load(self):
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
PaintArgs = namedtuple(
'PaintArgs', [
'training_path', 'model_path', 'cam_sync'])
painting_args = PaintArgs(
os.path.join(
self.data_root,
'training'),
self.segmentor_path,
False)
self.painter = Painter(painting_args)
self.segmentor = self.painter.model
model = PointPillars(
nclasses=self.detection_classes,
painted=True).to(
device=device)
model.eval()
checkpoint = torch.load(self.lidar_detector_path)
model.load_state_dict(checkpoint["model_state_dict"])
self.lidar_detector = model
return self
def predict(self, inputs):
dimensions, locations, rotation_y, box2d, class_labels, class_scores, ids = [
], [], [], [], [], [], []
with torch.inference_mode():
device = torch.device(
"cuda:0" if torch.cuda.is_available() else "cpu")
model_input = inputs[0]
batched_pts = model_input['pts']
scores_from_cam = []
for i in range(len(model_input['images'])):
segmentation_score = self.segmentor(
model_input['images'][i].to(device))[0]
scores_from_cam.append(
self.painter.get_score(segmentation_score).cpu())
points = self.painter.augment_lidar_class_scores_both(
scores_from_cam, batched_pts, model_input['calib_info'])
batch_results = self.lidar_detector(
batched_pts=[points.to(device=device)], mode='val')
for j, result in enumerate(batch_results):
format_result = {
'class': [],
'truncated': [],
'occluded': [],
'alpha': [],
'bbox': [],
'dimensions': [],
'location': [],
'rotation_y': [],
'score': [],
'idx': -1
}
calib_info = model_input['calib_info']
image_info = model_input['image_info']
idx = model_input['image_info']['image_idx']
format_result['idx'] = idx
calib_info = change_calib_device(calib_info, False)
result_filter = keep_bbox_from_image_range(
result, calib_info, 5, image_info, False)
lidar_bboxes = result_filter['lidar_bboxes']
labels, scores = result_filter['labels'], result_filter['scores']
bboxes2d, camera_bboxes = result_filter['bboxes2d'], result_filter['camera_bboxes']
for lidar_bbox, label, score, bbox2d, camera_bbox in \
zip(lidar_bboxes, labels, scores, bboxes2d, camera_bboxes):
format_result['class'].append(label.item())
format_result['truncated'].append(0.0)
format_result['occluded'].append(0)
alpha = camera_bbox[6] - \
np.arctan2(camera_bbox[0], camera_bbox[2])
format_result['alpha'].append(alpha.item())
format_result['bbox'].append(bbox2d.tolist())
format_result['dimensions'].append(camera_bbox[3:6])
format_result['location'].append(camera_bbox[:3])
format_result['rotation_y'].append(camera_bbox[6].item())
format_result['score'].append(score.item())
if len(format_result['dimensions']) > 0:
format_result['dimensions'] = torch.stack(
format_result['dimensions'])
format_result['location'] = torch.stack(
format_result['location'])
dimensions.append(format_result['dimensions'])
locations.append(format_result['location'])
rotation_y.append(format_result['rotation_y'])
class_labels.append(format_result['class'])
class_scores.append(format_result['score'])
box2d.append(format_result['bbox'])
ids.append(format_result['idx'])
return dimensions, locations, rotation_y, box2d, class_labels, class_scores, ids
from typing import Optional, List, Union
import os
import torch
import logging
import backend
from collections import namedtuple
from model.painter import Painter
from model.pointpillars_core import PointPillarsPre, PointPillarsPos
import numpy as np
from tools.process import keep_bbox_from_image_range
from waymo import Waymo
import onnxruntime as ort
logging.basicConfig(level=logging.INFO)
log = logging.getLogger("backend-onnx")
def change_calib_device(calib, cuda):
result = {}
if cuda:
device = 'cuda'
else:
device = 'cpu'
result['R0_rect'] = calib['R0_rect'].to(device=device, dtype=torch.float)
for i in range(5):
result['P' + str(i)] = calib['P' + str(i)
].to(device=device, dtype=torch.float)
result['Tr_velo_to_cam_' +
str(i)] = calib['Tr_velo_to_cam_' +
str(i)].to(device=device, dtype=torch.float)
return result
def to_numpy(tensor):
return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()
class BackendOnnx(backend.Backend):
def __init__(
self,
segmentor_path,
lidar_detector_path,
data_path
):
super(BackendOnnx, self).__init__()
self.segmentor_path = segmentor_path
self.lidar_detector_path = lidar_detector_path
# self.segmentation_classes = 18
self.detection_classes = 3
self.data_root = data_path
CLASSES = Waymo.CLASSES
self.LABEL2CLASSES = {v: k for k, v in CLASSES.items()}
def version(self):
return torch.__version__
def name(self):
return "python-SUT"
def load(self):
device = torch.device("cpu")
PaintArgs = namedtuple(
'PaintArgs', [
'training_path', 'model_path', 'cam_sync'])
painting_args = PaintArgs(
os.path.join(
self.data_root,
'training'),
self.segmentor_path,
False)
self.painter = Painter(painting_args, onnx=True)
self.segmentor = self.painter.model
model_pre = PointPillarsPre()
model_post = PointPillarsPos(self.detection_classes)
model_pre.eval()
model_post.eval()
ort_sess = ort.InferenceSession(self.lidar_detector_path)
self.lidar_detector = ort_sess
self.model_pre = model_pre
self.model_post = model_post
return self
def predict(self, inputs):
dimensions, locations, rotation_y, box2d, class_labels, class_scores, ids = [
], [], [], [], [], [], []
with torch.inference_mode():
model_input = inputs[0]
batched_pts = model_input['pts']
scores_from_cam = []
for i in range(len(model_input['images'])):
input_image_name = self.segmentor.get_inputs()[0].name
input_data = {
input_image_name: to_numpy(
model_input['images'][i])}
segmentation_score = self.segmentor.run(None, input_data)
segmentation_score = [
torch.from_numpy(item) for item in segmentation_score]
scores_from_cam.append(
self.painter.get_score(
segmentation_score[0].squeeze(0)).cpu())
points = self.painter.augment_lidar_class_scores_both(
scores_from_cam, batched_pts, model_input['calib_info'])
pillars, coors_batch, npoints_per_pillar = self.model_pre(batched_pts=[
points])
input_pillars_name = self.lidar_detector.get_inputs()[0].name
input_coors_batch_name = self.lidar_detector.get_inputs()[1].name
input_npoints_per_pillar_name = self.lidar_detector.get_inputs()[
2].name
input_data = {input_pillars_name: to_numpy(pillars),
input_coors_batch_name: to_numpy(coors_batch),
input_npoints_per_pillar_name: to_numpy(npoints_per_pillar)}
result = self.lidar_detector.run(None, input_data)
result = [torch.from_numpy(item) for item in result]
batch_results = self.model_post(result)
for j, result in enumerate(batch_results):
format_result = {
'class': [],
'truncated': [],
'occluded': [],
'alpha': [],
'bbox': [],
'dimensions': [],
'location': [],
'rotation_y': [],
'score': [],
'idx': -1
}
calib_info = model_input['calib_info']
image_info = model_input['image_info']
idx = model_input['image_info']['image_idx']
format_result['idx'] = idx
calib_info = change_calib_device(calib_info, False)
result_filter = keep_bbox_from_image_range(
result, calib_info, 5, image_info, False)
lidar_bboxes = result_filter['lidar_bboxes']
labels, scores = result_filter['labels'], result_filter['scores']
bboxes2d, camera_bboxes = result_filter['bboxes2d'], result_filter['camera_bboxes']
for lidar_bbox, label, score, bbox2d, camera_bbox in \
zip(lidar_bboxes, labels, scores, bboxes2d, camera_bboxes):
format_result['class'].append(label.item())
format_result['truncated'].append(0.0)
format_result['occluded'].append(0)
alpha = camera_bbox[6] - \
np.arctan2(camera_bbox[0], camera_bbox[2])
format_result['alpha'].append(alpha.item())
format_result['bbox'].append(bbox2d.tolist())
format_result['dimensions'].append(camera_bbox[3:6])
format_result['location'].append(camera_bbox[:3])
format_result['rotation_y'].append(camera_bbox[6].item())
format_result['score'].append(score.item())
if len(format_result['dimensions']) > 0:
format_result['dimensions'] = torch.stack(
format_result['dimensions'])
format_result['location'] = torch.stack(
format_result['location'])
dimensions.append(format_result['dimensions'])
locations.append(format_result['location'])
rotation_y.append(format_result['rotation_y'])
class_labels.append(format_result['class'])
class_scores.append(format_result['score'])
box2d.append(format_result['bbox'])
ids.append(format_result['idx'])
return dimensions, locations, rotation_y, box2d, class_labels, class_scores, ids
"""
dataset related classes and methods
"""
# pylint: disable=unused-argument,missing-docstring
import logging
import sys
import time
import numpy as np
import torch
logging.basicConfig(level=logging.INFO)
log = logging.getLogger("dataset")
class Dataset:
def __init__(self):
self.items_inmemory = {}
def preprocess(self, use_cache=True):
raise NotImplementedError("Dataset:preprocess")
def get_item_count(self):
raise NotImplementedError("Dataset:get_item_count")
def get_list(self):
raise NotImplementedError("Dataset:get_list")
def load_query_samples(self, sample_list):
raise NotImplementedError("Dataset:load_query_samples")
def unload_query_samples(self, sample_list):
raise NotImplementedError("Dataset:unload_query_samples")
def get_samples(self, id_list):
raise NotImplementedError("Dataset:get_samples")
def get_item(self, id):
raise NotImplementedError("Dataset:get_item")
def preprocess(list_data):
batched_pts_list, batched_gt_bboxes_list = [], []
batched_labels_list, batched_names_list = [], []
batched_difficulty_list = []
batched_img_list, batched_calib_list = [], []
batched_images = []
for data_dict in list_data:
pts, gt_bboxes_3d = data_dict['pts'], data_dict['gt_bboxes_3d']
gt_labels, gt_names = data_dict['gt_labels'], data_dict['gt_names']
difficulty = data_dict['difficulty']
image_info, calib_info = data_dict['image_info'], data_dict['calib_info']
batched_pts_list.append(torch.from_numpy(pts))
batched_gt_bboxes_list.append(torch.from_numpy(gt_bboxes_3d))
batched_labels_list.append(torch.from_numpy(gt_labels))
batched_names_list.append(gt_names) # List(str)
batched_difficulty_list.append(torch.from_numpy(difficulty))
batched_img_list.append(image_info)
batched_calib_list.append(calib_info)
batched_images.append(data_dict['images'])
rt_data_dict = dict(
batched_pts=batched_pts_list,
batched_gt_bboxes=batched_gt_bboxes_list,
batched_labels=batched_labels_list,
batched_names=batched_names_list,
batched_difficulty=batched_difficulty_list,
batched_img_info=batched_img_list,
batched_calib_info=batched_calib_list,
batched_images=batched_images
)
return rt_data_dict
ARG FROM_IMAGE_NAME=nvcr.io/nvidia/pytorch:23.08-py3
FROM ${FROM_IMAGE_NAME}
ENV DEBIAN_FRONTEND=noninteractive
# apt dependencies
RUN apt-get update
RUN apt-get install -y ffmpeg libsm6 libxext6
# install LDM
COPY . /diffusion
RUN cd /diffusion && \
pip install --no-cache-dir -r requirements.txt
# install loadgen
RUN cd /tmp && \
git clone --recursive https://github.com/mlcommons/inference && \
cd inference/loadgen && \
pip install pybind11 && \
CFLAGS="-std=c++14" python setup.py install && \
rm -rf mlperf
\ No newline at end of file
ARG FROM_IMAGE_NAME=pytorch/pytorch:2.2.2-cuda11.8-cudnn8-devel
FROM ${FROM_IMAGE_NAME}
ENV DEBIAN_FRONTEND=noninteractive
# apt dependencies
RUN apt-get update
RUN apt-get install -y ffmpeg libsm6 libxext6 git
# install LDM
COPY . /diffusion
RUN cd /diffusion && \
pip install --no-cache-dir -r requirements.txt
# install loadgen
RUN cd /tmp && \
git clone --recursive https://github.com/mlcommons/inference && \
cd inference/loadgen && \
pip install pybind11 && \
CFLAGS="-std=c++14" python setup.py install && \
rm -rf mlperf
RUN pip install tqdm==4.65.0
RUN pip install numba==0.60.0
RUN pip install opencv-python==4.11.0.86
RUN pip install open3d==0.19.0
RUN pip install scikit-image==0.25.0
RUN pip install ninja==1.11.1
RUN pip install shapely==2.0.6
RUN pip install tensorboard==2.18.0
RUN pip install onnxruntime==1.20.1
\ No newline at end of file
"""
mlperf inference benchmarking tool
"""
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import argparse
import array
import collections
import json
import logging
import os
import sys
import threading
import time
from queue import Queue
import mlperf_loadgen as lg
import numpy as np
import torch
import dataset
import waymo
logging.basicConfig(level=logging.INFO)
log = logging.getLogger("main")
NANO_SEC = 1e9
MILLI_SEC = 1000
SUPPORTED_DATASETS = {
"waymo": (
waymo.Waymo,
dataset.preprocess,
waymo.PostProcessWaymo(),
{} # "image_size": [3, 1024, 1024]},
)
}
SUPPORTED_PROFILES = {
"defaults": {
"dataset": "waymo",
"backend": "pytorch",
"model-name": "pointpainting",
},
}
SCENARIO_MAP = {
"SingleStream": lg.TestScenario.SingleStream,
"MultiStream": lg.TestScenario.MultiStream,
"Server": lg.TestScenario.Server,
"Offline": lg.TestScenario.Offline,
}
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--dataset",
choices=SUPPORTED_DATASETS.keys(),
help="dataset")
parser.add_argument(
"--dataset-path",
required=True,
help="path to the dataset")
parser.add_argument(
"--profile", choices=SUPPORTED_PROFILES.keys(), help="standard profiles"
)
parser.add_argument(
"--scenario",
default="SingleStream",
help="mlperf benchmark scenario, one of " +
str(list(SCENARIO_MAP.keys())),
)
parser.add_argument(
"--max-batchsize",
type=int,
default=1,
help="max batch size in a single inference",
)
parser.add_argument("--threads", default=1, type=int, help="threads")
parser.add_argument(
"--accuracy",
action="store_true",
help="enable accuracy pass")
parser.add_argument(
"--find-peak-performance",
action="store_true",
help="enable finding peak performance pass",
)
parser.add_argument("--backend", help="Name of the backend")
parser.add_argument("--model-name", help="Name of the model")
parser.add_argument("--output", default="output", help="test results")
parser.add_argument("--qps", type=int, help="target qps")
parser.add_argument("--lidar-path", help="Path to model weights")
parser.add_argument("--segmentor-path", help="Path to model weights")
parser.add_argument(
"--dtype",
default="fp32",
choices=["fp32", "fp16", "bf16"],
help="dtype of the model",
)
parser.add_argument(
"--device",
default="cuda",
choices=["cuda", "cpu"],
help="device to run the benchmark",
)
# file to use mlperf rules compliant parameters
parser.add_argument(
"--mlperf_conf", default="mlperf.conf", help="mlperf rules config"
)
# file for user LoadGen settings such as target QPS
parser.add_argument(
"--user_conf",
default="user.conf",
help="user config for user LoadGen settings such as target QPS",
)
# file for LoadGen audit settings
parser.add_argument(
"--audit_conf", default="audit.config", help="config for LoadGen audit settings"
)
# below will override mlperf rules compliant settings - don't use for
# official submission
parser.add_argument("--time", type=int, help="time to scan in seconds")
parser.add_argument("--count", type=int, help="dataset items to use")
parser.add_argument("--debug", action="store_true", help="debug")
parser.add_argument(
"--performance-sample-count", type=int, help="performance sample count", default=5000
)
parser.add_argument(
"--max-latency", type=float, help="mlperf max latency in pct tile"
)
parser.add_argument(
"--samples-per-query",
default=8,
type=int,
help="mlperf multi-stream samples per query",
)
args = parser.parse_args()
# don't use defaults in argparser. Instead we default to a dict, override that with a profile
# and take this as default unless command line give
defaults = SUPPORTED_PROFILES["defaults"]
if args.profile:
profile = SUPPORTED_PROFILES[args.profile]
defaults.update(profile)
for k, v in defaults.items():
kc = k.replace("-", "_")
if getattr(args, kc) is None:
setattr(args, kc, v)
if args.scenario not in SCENARIO_MAP:
parser.error("valid scanarios:" + str(list(SCENARIO_MAP.keys())))
return args
def get_backend(backend, **kwargs):
if backend == "pytorch":
from backend_deploy import BackendDeploy
backend = BackendDeploy(**kwargs)
elif backend == 'onnx':
from backend_onnx import BackendOnnx
backend = BackendOnnx(**kwargs)
elif backend == "debug":
from backend_debug import BackendDebug
backend = BackendDebug()
else:
raise ValueError("unknown backend: " + backend)
return backend
class Item:
"""An item that we queue for processing by the thread pool."""
def __init__(self, query_id, content_id, inputs, img=None):
self.query_id = query_id
self.content_id = content_id
self.img = img
self.inputs = inputs
self.start = time.time()
class RunnerBase:
def __init__(self, model, ds, threads, post_proc=None, max_batchsize=128):
self.take_accuracy = False
self.ds = ds
self.model = model
self.post_process = post_proc
self.threads = threads
self.take_accuracy = False
self.max_batchsize = max_batchsize
self.result_timing = []
def handle_tasks(self, tasks_queue):
pass
def start_run(self, result_dict, take_accuracy):
self.result_dict = result_dict
self.result_timing = []
self.take_accuracy = take_accuracy
self.post_process.start()
def run_one_item(self, qitem: Item):
# run the prediction
processed_results = []
try:
results = self.model.predict(qitem.inputs)
processed_results = self.post_process(
results, qitem.content_id, qitem.inputs, self.result_dict)
if self.take_accuracy:
self.post_process.add_results(processed_results)
self.result_timing.append(time.time() - qitem.start)
except Exception as ex: # pylint: disable=broad-except
src = [self.ds.get_item_loc(i) for i in qitem.content_id]
log.error("thread: failed on contentid=%s, %s", src, ex)
# since post_process will not run, fake empty responses
processed_results = [[]] * len(qitem.query_id)
finally:
response_array_refs = []
response = []
for idx, query_id in enumerate(qitem.query_id):
response_array = array.array("B", np.array(
processed_results[idx], np.float32).tobytes())
response_array_refs.append(response_array)
bi = response_array.buffer_info()
response.append(lg.QuerySampleResponse(query_id, bi[0], bi[1]))
lg.QuerySamplesComplete(response)
def enqueue(self, query_samples):
idx = [q.index for q in query_samples]
query_id = [q.id for q in query_samples]
if len(query_samples) < self.max_batchsize:
data, label = self.ds.get_samples(idx)
self.run_one_item(Item(query_id, idx, data, label))
else:
bs = self.max_batchsize
for i in range(0, len(idx), bs):
data, label = self.ds.get_samples(idx[i: i + bs])
self.run_one_item(
Item(query_id[i: i + bs], idx[i: i + bs], data, label)
)
def finish(self):
pass
class QueueRunner(RunnerBase):
def __init__(self, model, ds, threads, post_proc=None, max_batchsize=128):
super().__init__(model, ds, threads, post_proc, max_batchsize)
self.tasks = Queue(maxsize=threads * 4)
self.workers = []
self.result_dict = {}
for _ in range(self.threads):
worker = threading.Thread(
target=self.handle_tasks, args=(
self.tasks,))
worker.daemon = True
self.workers.append(worker)
worker.start()
def handle_tasks(self, tasks_queue):
"""Worker thread."""
while True:
qitem = tasks_queue.get()
if qitem is None:
# None in the queue indicates the parent want us to exit
tasks_queue.task_done()
break
self.run_one_item(qitem)
tasks_queue.task_done()
def enqueue(self, query_samples):
idx = [q.index for q in query_samples]
query_id = [q.id for q in query_samples]
if len(query_samples) < self.max_batchsize:
data, label = self.ds.get_samples(idx)
self.tasks.put(Item(query_id, idx, data, label))
else:
bs = self.max_batchsize
for i in range(0, len(idx), bs):
ie = i + bs
data, label = self.ds.get_samples(idx[i:ie])
self.tasks.put(Item(query_id[i:ie], idx[i:ie], data, label))
def finish(self):
# exit all threads
for _ in self.workers:
self.tasks.put(None)
for worker in self.workers:
worker.join()
def main():
args = get_args()
log.info(args)
# find backend
backend = get_backend(
# TODO: pass model, inference and backend arguments
args.backend,
lidar_detector_path=args.lidar_path,
segmentor_path=args.segmentor_path,
data_path=args.dataset_path
)
if args.dtype == "fp16":
dtype = torch.float16
elif args.dtype == "bf16":
dtype = torch.bfloat16
else:
dtype = torch.float32
# --count applies to accuracy mode only and can be used to limit the number of images
# for testing.
count_override = False
count = args.count
if count:
count_override = True
# load model to backend
model = backend.load()
# dataset to use
dataset_class, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[args.dataset]
ds = dataset_class(
data_root=args.dataset_path,
split='val',
painted=True,
cam_sync=False)
final_results = {
"runtime": model.name(),
"version": model.version(),
"time": int(time.time()),
"args": vars(args),
"cmdline": str(args),
}
mlperf_conf = os.path.abspath(args.mlperf_conf)
if not os.path.exists(mlperf_conf):
log.error("{} not found".format(mlperf_conf))
sys.exit(1)
user_conf = os.path.abspath(args.user_conf)
if not os.path.exists(user_conf):
log.error("{} not found".format(user_conf))
sys.exit(1)
audit_config = os.path.abspath(args.audit_conf)
if args.output:
output_dir = os.path.abspath(args.output)
os.makedirs(output_dir, exist_ok=True)
os.chdir(output_dir)
#
# make one pass over the dataset to validate accuracy
#
count = ds.get_item_count()
# warmup
# TODO: Load warmup samples, the following code is a general
# way of doing this, but might need some fixing
ds.load_query_samples([0])
for i in range(5):
input = ds.get_samples([0])
_ = backend.predict(input[0])
scenario = SCENARIO_MAP[args.scenario]
runner_map = {
lg.TestScenario.SingleStream: RunnerBase,
lg.TestScenario.MultiStream: QueueRunner,
lg.TestScenario.Server: QueueRunner,
lg.TestScenario.Offline: QueueRunner,
}
runner = runner_map[scenario](
model, ds, args.threads, post_proc=post_proc, max_batchsize=args.max_batchsize
)
def issue_queries(query_samples):
runner.enqueue(query_samples)
def flush_queries():
pass
log_output_settings = lg.LogOutputSettings()
log_output_settings.outdir = output_dir
log_output_settings.copy_summary_to_stdout = False
log_settings = lg.LogSettings()
log_settings.enable_trace = args.debug
log_settings.log_output = log_output_settings
settings = lg.TestSettings()
settings.FromConfig(user_conf, args.model_name, args.scenario)
settings.scenario = scenario
settings.mode = lg.TestMode.PerformanceOnly
if args.accuracy:
settings.mode = lg.TestMode.AccuracyOnly
if args.find_peak_performance:
settings.mode = lg.TestMode.FindPeakPerformance
if args.time:
# override the time we want to run
settings.min_duration_ms = args.time * MILLI_SEC
settings.max_duration_ms = args.time * MILLI_SEC
if args.qps:
qps = float(args.qps)
settings.server_target_qps = qps
settings.offline_expected_qps = qps
if count_override:
settings.min_query_count = count
settings.max_query_count = count
if args.samples_per_query:
settings.multi_stream_samples_per_query = args.samples_per_query
if args.max_latency:
settings.server_target_latency_ns = int(args.max_latency * NANO_SEC)
settings.multi_stream_expected_latency_ns = int(
args.max_latency * NANO_SEC)
performance_sample_count = (
args.performance_sample_count
if args.performance_sample_count
else min(count, 500)
)
sut = lg.ConstructSUT(issue_queries, flush_queries)
qsl = lg.ConstructQSL(
count, performance_sample_count, ds.load_query_samples, ds.unload_query_samples
)
log.info("starting {}".format(scenario))
result_dict = {"scenario": str(scenario)}
runner.start_run(result_dict, args.accuracy)
lg.StartTestWithLogSettings(sut, qsl, settings, log_settings, audit_config)
if args.accuracy:
post_proc.finalize(result_dict, ds)
final_results["accuracy_results"] = result_dict
runner.finish()
lg.DestroyQSL(qsl)
lg.DestroySUT(sut)
#
# write final results
#
if args.output:
with open("results.json", "w") as f:
json.dump(final_results, f, sort_keys=True, indent=4)
if __name__ == "__main__":
main()
from .anchors import Anchors, anchors2bboxes, bboxes2deltas
from .pointpillars import PointPillars
import torch
import math
from tools.process import limit_period, iou2d_nearest
class Anchors():
def __init__(self, ranges, sizes, rotations):
assert len(ranges) == len(sizes)
self.ranges = ranges
self.sizes = sizes
self.rotations = rotations
def get_anchors(self, feature_map_size, anchor_range,
anchor_size, rotations):
'''
feature_map_size: (y_l, x_l)
anchor_range: [x1, y1, z1, x2, y2, z2]
anchor_size: [w, l, h]
rotations: [0, 1.57]
return: shape=(y_l, x_l, 2, 7)
'''
device = feature_map_size.device
x_centers = torch.linspace(
anchor_range[0],
anchor_range[3],
feature_map_size[1] + 1,
device=device)
y_centers = torch.linspace(
anchor_range[1],
anchor_range[4],
feature_map_size[0] + 1,
device=device)
z_centers = torch.linspace(
anchor_range[2],
anchor_range[5],
1 + 1,
device=device)
x_shift = (x_centers[1] - x_centers[0]) / 2
y_shift = (y_centers[1] - y_centers[0]) / 2
z_shift = (z_centers[1] - z_centers[0]) / 2
x_centers = x_centers[:feature_map_size[1]] + \
x_shift # (feature_map_size[1], )
y_centers = y_centers[:feature_map_size[0]] + \
y_shift # (feature_map_size[0], )
z_centers = z_centers[:1] + z_shift # (1, )
# [feature_map_size[1], feature_map_size[0], 1, 2] * 4
meshgrids = torch.meshgrid(x_centers, y_centers, z_centers, rotations)
meshgrids = list(meshgrids)
for i in range(len(meshgrids)):
# [feature_map_size[1], feature_map_size[0], 1, 2, 1]
meshgrids[i] = meshgrids[i][..., None]
anchor_size = anchor_size[None, None, None, None, :]
repeat_shape = [
feature_map_size[1],
feature_map_size[0],
1,
len(rotations),
1]
# [feature_map_size[1], feature_map_size[0], 1, 2, 3]
anchor_size = anchor_size.repeat(repeat_shape)
meshgrids.insert(3, anchor_size)
# [1, feature_map_size[0], feature_map_size[1], 2, 7]
anchors = torch.cat(
meshgrids,
dim=-
1).permute(
2,
1,
0,
3,
4).contiguous()
return anchors.squeeze(0)
def get_multi_anchors(self, feature_map_size):
'''
feature_map_size: (y_l, x_l)
ranges: [[x1, y1, z1, x2, y2, z2], [x1, y1, z1, x2, y2, z2], [x1, y1, z1, x2, y2, z2]]
sizes: [[w, l, h], [w, l, h], [w, l, h]]
rotations: [0, 1.57]
return: shape=(y_l, x_l, 3, 2, 7)
'''
device = feature_map_size.device
ranges = torch.tensor(self.ranges, device=device)
sizes = torch.tensor(self.sizes, device=device)
rotations = torch.tensor(self.rotations, device=device)
multi_anchors = []
for i in range(len(ranges)):
anchors = self.get_anchors(feature_map_size=feature_map_size,
anchor_range=ranges[i],
anchor_size=sizes[i],
rotations=rotations)
multi_anchors.append(anchors[:, :, None, :, :])
multi_anchors = torch.cat(multi_anchors, dim=2)
return multi_anchors
def anchors2bboxes(anchors, deltas):
'''
anchors: (M, 7), (x, y, z, w, l, h, theta)
deltas: (M, 7)
return: (M, 7)
'''
da = torch.sqrt(anchors[:, 3] ** 2 + anchors[:, 4] ** 2)
x = deltas[:, 0] * da + anchors[:, 0]
y = deltas[:, 1] * da + anchors[:, 1]
z = deltas[:, 2] * anchors[:, 5] + anchors[:, 2] + anchors[:, 5] / 2
w = anchors[:, 3] * torch.exp(deltas[:, 3])
l = anchors[:, 4] * torch.exp(deltas[:, 4])
h = anchors[:, 5] * torch.exp(deltas[:, 5])
z = z - h / 2
theta = anchors[:, 6] + deltas[:, 6]
bboxes = torch.stack([x, y, z, w, l, h, theta], dim=1)
return bboxes
def bboxes2deltas(bboxes, anchors):
'''
bboxes: (M, 7), (x, y, z, w, l, h, theta)
anchors: (M, 7)
return: (M, 7)
'''
da = torch.sqrt(anchors[:, 3] ** 2 + anchors[:, 4] ** 2)
dx = (bboxes[:, 0] - anchors[:, 0]) / da
dy = (bboxes[:, 1] - anchors[:, 1]) / da
zb = bboxes[:, 2] + bboxes[:, 5] / 2 # bottom center
za = anchors[:, 2] + anchors[:, 5] / 2 # bottom center
dz = (zb - za) / anchors[:, 5] # bottom center
dw = torch.log(bboxes[:, 3] / anchors[:, 3])
dl = torch.log(bboxes[:, 4] / anchors[:, 4])
dh = torch.log(bboxes[:, 5] / anchors[:, 5])
dtheta = bboxes[:, 6] - anchors[:, 6]
deltas = torch.stack([dx, dy, dz, dw, dl, dh, dtheta], dim=1)
return deltas
def anchor_target(batched_anchors, batched_gt_bboxes,
batched_gt_labels, assigners, nclasses):
'''
batched_anchors: [(y_l, x_l, 3, 2, 7), (y_l, x_l, 3, 2, 7), ... ]
batched_gt_bboxes: [(n1, 7), (n2, 7), ...]
batched_gt_labels: [(n1, ), (n2, ), ...]
return:
dict = {batched_anchors_labels: (bs, n_anchors),
batched_labels_weights: (bs, n_anchors),
batched_anchors_reg: (bs, n_anchors, 7),
batched_reg_weights: (bs, n_anchors),
batched_anchors_dir: (bs, n_anchors),
batched_dir_weights: (bs, n_anchors)}
'''
assert len(batched_anchors) == len(
batched_gt_bboxes) == len(batched_gt_labels)
batch_size = len(batched_anchors)
n_assigners = len(assigners)
batched_labels, batched_label_weights = [], []
batched_bbox_reg, batched_bbox_reg_weights = [], []
batched_dir_labels, batched_dir_labels_weights = [], []
for i in range(batch_size):
anchors = batched_anchors[i]
gt_bboxes, gt_labels = batched_gt_bboxes[i], batched_gt_labels[i]
# what we want to get next ?
# 1. identify positive anchors and negative anchors -> cls
# 2. identify the regresstion values -> reg
# 3. indentify the direction -> dir_cls
multi_labels, multi_label_weights = [], []
multi_bbox_reg, multi_bbox_reg_weights = [], []
multi_dir_labels, multi_dir_labels_weights = [], []
d1, d2, d3, d4, d5 = anchors.size()
for j in range(n_assigners): # multi anchors
assigner = assigners[j]
pos_iou_thr, neg_iou_thr, min_iou_thr = \
assigner['pos_iou_thr'], assigner['neg_iou_thr'], assigner['min_iou_thr']
cur_anchors = anchors[:, :, j, :, :].reshape(-1, 7)
overlaps = iou2d_nearest(gt_bboxes, cur_anchors)
if overlaps.shape[0] == 0:
max_overlaps = torch.zeros_like(
cur_anchors[:, 0], dtype=cur_anchors.dtype)
max_overlaps_idx = torch.zeros_like(
cur_anchors[:, 0], dtype=torch.long)
else:
max_overlaps, max_overlaps_idx = torch.max(overlaps, dim=0)
gt_max_overlaps, _ = torch.max(overlaps, dim=1)
assigned_gt_inds = - \
torch.ones_like(cur_anchors[:, 0], dtype=torch.long)
# a. negative anchors
assigned_gt_inds[max_overlaps < neg_iou_thr] = 0
# b. positive anchors
# rule 1
assigned_gt_inds[max_overlaps >=
pos_iou_thr] = max_overlaps_idx[max_overlaps >= pos_iou_thr] + 1
# rule 2
# support one bbox to multi anchors, only if the anchors are with the highest iou.
# rule2 may modify the labels generated by rule 1
for i in range(len(gt_bboxes)):
if gt_max_overlaps[i] >= min_iou_thr:
assigned_gt_inds[overlaps[i] == gt_max_overlaps[i]] = i + 1
pos_flag = assigned_gt_inds > 0
neg_flag = assigned_gt_inds == 0
# 1. anchor labels
# -1 is not optimal, for some bboxes are with labels -1
assigned_gt_labels = torch.zeros_like(
cur_anchors[:, 0], dtype=torch.long) + nclasses
assigned_gt_labels[pos_flag] = gt_labels[assigned_gt_inds[pos_flag] - 1].long()
assigned_gt_labels_weights = torch.zeros_like(cur_anchors[:, 0])
assigned_gt_labels_weights[pos_flag] = 1
assigned_gt_labels_weights[neg_flag] = 1
# 2. anchor regression
assigned_gt_reg_weights = torch.zeros_like(cur_anchors[:, 0])
assigned_gt_reg_weights[pos_flag] = 1
assigned_gt_reg = torch.zeros_like(cur_anchors)
positive_anchors = cur_anchors[pos_flag]
corr_gt_bboxes = gt_bboxes[assigned_gt_inds[pos_flag] - 1]
assigned_gt_reg[pos_flag] = bboxes2deltas(
corr_gt_bboxes, positive_anchors)
# 3. anchor direction
assigned_gt_dir_weights = torch.zeros_like(cur_anchors[:, 0])
assigned_gt_dir_weights[pos_flag] = 1
assigned_gt_dir = torch.zeros_like(
cur_anchors[:, 0], dtype=torch.long)
dir_cls_targets = limit_period(
corr_gt_bboxes[:, 6].cpu(), 0, 2 * math.pi).to(corr_gt_bboxes)
dir_cls_targets = torch.floor(dir_cls_targets / math.pi).long()
assigned_gt_dir[pos_flag] = torch.clamp(
dir_cls_targets, min=0, max=1)
multi_labels.append(assigned_gt_labels.reshape(d1, d2, 1, d4))
multi_label_weights.append(
assigned_gt_labels_weights.reshape(
d1, d2, 1, d4))
multi_bbox_reg.append(assigned_gt_reg.reshape(d1, d2, 1, d4, -1))
multi_bbox_reg_weights.append(
assigned_gt_reg_weights.reshape(
d1, d2, 1, d4))
multi_dir_labels.append(assigned_gt_dir.reshape(d1, d2, 1, d4))
multi_dir_labels_weights.append(
assigned_gt_dir_weights.reshape(
d1, d2, 1, d4))
multi_labels = torch.cat(multi_labels, dim=-2).reshape(-1)
multi_label_weights = torch.cat(
multi_label_weights, dim=-2).reshape(-1)
multi_bbox_reg = torch.cat(multi_bbox_reg, dim=-3).reshape(-1, d5)
multi_bbox_reg_weights = torch.cat(
multi_bbox_reg_weights, dim=-2).reshape(-1)
multi_dir_labels = torch.cat(multi_dir_labels, dim=-2).reshape(-1)
multi_dir_labels_weights = torch.cat(
multi_dir_labels_weights, dim=-2).reshape(-1)
batched_labels.append(multi_labels)
batched_label_weights.append(multi_label_weights)
batched_bbox_reg.append(multi_bbox_reg)
batched_bbox_reg_weights.append(multi_bbox_reg_weights)
batched_dir_labels.append(multi_dir_labels)
batched_dir_labels_weights.append(multi_dir_labels_weights)
rt_dict = dict(
batched_labels=torch.stack(
batched_labels, 0), # (bs, y_l * x_l * 3 * 2)
batched_label_weights=torch.stack(
batched_label_weights, 0), # (bs, y_l * x_l * 3 * 2)
batched_bbox_reg=torch.stack(
batched_bbox_reg, 0), # (bs, y_l * x_l * 3 * 2, 7)
batched_bbox_reg_weights=torch.stack(
batched_bbox_reg_weights, 0), # (bs, y_l * x_l * 3 * 2)
batched_dir_labels=torch.stack(
batched_dir_labels, 0), # (bs, y_l * x_l * 3 * 2)
batched_dir_labels_weights=torch.stack(
batched_dir_labels_weights, 0) # (bs, y_l * x_l * 3 * 2)
)
return rt_dict
import onnxruntime as ort
import argparse
import model.segmentation as network
import os
import numpy as np
import torch
from torchvision import transforms
from PIL import Image
import copy
import sys
from tqdm import tqdm
sys.path.append('..')
def get_calib_from_file(calib_file):
"""Read in a calibration file and parse into a dictionary."""
data = {}
with open(calib_file, 'r') as f:
lines = [line for line in f.readlines() if line.strip()]
for line in lines:
key, value = line.split(':', 1)
# The only non-float values in these files are dates, which
# we don't care about anyway
try:
if key == 'R0_rect':
data['R0'] = torch.tensor([float(x)
for x in value.split()]).reshape(3, 3)
else:
data[key] = torch.tensor([float(x)
for x in value.split()]).reshape(3, 4)
except ValueError:
pass
return data
def to_numpy(tensor):
return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()
class Painter:
def __init__(self, args, onnx=False):
self.root_split_path = args.training_path
self.save_path = os.path.join(args.training_path, "painted_lidar/")
self.onnx = onnx
if not os.path.exists(self.save_path):
os.mkdir(self.save_path)
self.seg_net_index = 0
self.model = None
print(f'Using Segmentation Network -- deeplabv3plus')
checkpoint_file = args.model_path
if self.onnx:
model = ort.InferenceSession(checkpoint_file)
self.input_image_name = model.get_inputs()[0].name
else:
model = network.modeling.__dict__['deeplabv3plus_resnet50'](
num_classes=19, output_stride=16)
checkpoint = torch.load(checkpoint_file)
model.load_state_dict(checkpoint["model_state"])
model.eval()
device = torch.device(
'cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
self.model = model
self.cam_sync = args.cam_sync
def get_lidar(self, idx):
lidar_file = os.path.join(
self.root_split_path, 'velodyne/' + ('%s.bin' % idx))
return torch.from_numpy(np.fromfile(
str(lidar_file), dtype=np.float32).reshape(-1, 6))
def get_image(self, idx, camera):
filename = os.path.join(self.root_split_path,
camera + ('%s.jpg' % idx))
input_image = Image.open(filename)
preprocess = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(
mean=[
0.485, 0.456, 0.406], std=[
0.229, 0.224, 0.225]),
])
input_tensor = preprocess(input_image)
# create a mini-batch as expected by the model
input_batch = input_tensor.unsqueeze(0)
if torch.cuda.is_available():
input_batch = input_batch.to('cuda')
# move the input and model to GPU for speed if available
if torch.cuda.is_available():
input_batch = input_batch.to('cuda')
return input_batch
def get_model_output(self, input_batch):
with torch.no_grad():
output = self.model(input_batch)[0]
return output
def get_score(self, model_output):
sf = torch.nn.Softmax(dim=2)
output_permute = model_output.permute(1, 2, 0)
output_permute = sf(output_permute)
output_reassign = torch.zeros(
output_permute.size(0), output_permute.size(1), 6).to(
device=model_output.device)
output_reassign[:, :, 0] = torch.sum(
output_permute[:, :, :11], dim=2) # background
output_reassign[:, :, 1] = output_permute[:, :, 18] # bicycle
output_reassign[:, :, 2] = torch.sum(
output_permute[:, :, [13, 14, 15, 16]], dim=2) # vehicles
output_reassign[:, :, 3] = output_permute[:, :, 11] # person
output_reassign[:, :, 4] = output_permute[:, :, 12] # rider
output_reassign[:, :, 5] = output_permute[:, :, 17] # motorcycle
return output_reassign
def get_calib_fromfile(self, idx, device):
calib_file = os.path.join(
self.root_split_path, 'calib/' + ('%s.txt' % idx))
calib = get_calib_from_file(calib_file)
calib['P0'] = torch.cat([calib['P0'], torch.tensor(
[[0., 0., 0., 1.]])], axis=0).to(device=device)
calib['P1'] = torch.cat([calib['P1'], torch.tensor(
[[0., 0., 0., 1.]])], axis=0).to(device=device)
calib['P2'] = torch.cat([calib['P2'], torch.tensor(
[[0., 0., 0., 1.]])], axis=0).to(device=device)
calib['P3'] = torch.cat([calib['P3'], torch.tensor(
[[0., 0., 0., 1.]])], axis=0).to(device=device)
calib['P4'] = torch.cat([calib['P4'], torch.tensor(
[[0., 0., 0., 1.]])], axis=0).to(device=device)
calib['R0_rect'] = torch.zeros(
[4, 4], dtype=calib['R0'].dtype, device=device)
calib['R0_rect'][3, 3] = 1.
calib['R0_rect'][:3, :3] = calib['R0'].to(device=device)
calib['Tr_velo_to_cam_0'] = torch.cat([calib['Tr_velo_to_cam_0'], torch.tensor(
[[0., 0., 0., 1.]], )], axis=0).to(device=device)
calib['Tr_velo_to_cam_1'] = torch.cat([calib['Tr_velo_to_cam_1'], torch.tensor(
[[0., 0., 0., 1.]], )], axis=0).to(device=device)
calib['Tr_velo_to_cam_2'] = torch.cat([calib['Tr_velo_to_cam_2'], torch.tensor(
[[0., 0., 0., 1.]], )], axis=0).to(device=device)
calib['Tr_velo_to_cam_3'] = torch.cat([calib['Tr_velo_to_cam_3'], torch.tensor(
[[0., 0., 0., 1.]], )], axis=0).to(device=device)
calib['Tr_velo_to_cam_4'] = torch.cat([calib['Tr_velo_to_cam_4'], torch.tensor(
[[0., 0., 0., 1.]], )], axis=0).to(device=device)
return calib
def cam_to_lidar(self, pointcloud, projection_mats, camera_num):
"""
Takes in lidar in velo coords, returns lidar points in camera coords
:param pointcloud: (n_points, 4) np.array (x,y,z,r) in velodyne coordinates
:return lidar_cam_coords: (n_points, 4) np.array (x,y,z,r) in camera coordinates
"""
lidar_velo_coords = copy.deepcopy(pointcloud)
# copy reflectances column
reflectances = copy.deepcopy(lidar_velo_coords[:, -1])
lidar_velo_coords[:, -1] = 1 # for multiplying with homogeneous matrix
lidar_cam_coords = projection_mats['Tr_velo_to_cam_' +
str(camera_num)].matmul(lidar_velo_coords.transpose(0, 1))
lidar_cam_coords = lidar_cam_coords.transpose(0, 1)
lidar_cam_coords[:, -1] = reflectances
return lidar_cam_coords
def project_points_mask(self, lidar_cam_points,
projection_mats, class_scores, camera_num):
points_projected_on_mask = projection_mats['P' + str(camera_num)].matmul(
projection_mats['R0_rect'].matmul(lidar_cam_points.transpose(0, 1)))
points_projected_on_mask = points_projected_on_mask.transpose(0, 1)
points_projected_on_mask = points_projected_on_mask / \
(points_projected_on_mask[:, 2].reshape(-1, 1))
true_where_x_on_img = (0 < points_projected_on_mask[:, 0]) & (
points_projected_on_mask[:, 0] < class_scores[camera_num].shape[1]) # x in img coords is cols of img
true_where_y_on_img = (0 < points_projected_on_mask[:, 1]) & (
points_projected_on_mask[:, 1] < class_scores[camera_num].shape[0])
true_where_point_on_img = true_where_x_on_img & true_where_y_on_img & (
lidar_cam_points[:, 2] > 0)
# filter out points that don't project to image
points_projected_on_mask = points_projected_on_mask[true_where_point_on_img]
# using floor so you don't end up indexing num_rows+1th row or col
points_projected_on_mask = torch.floor(points_projected_on_mask).int()
# drops homogenous coord 1 from every point, giving (N_pts, 2) int
# array
points_projected_on_mask = points_projected_on_mask[:, :2]
return (points_projected_on_mask, true_where_point_on_img)
def augment_lidar_class_scores_both(
self, class_scores, lidar_raw, projection_mats):
"""
Projects lidar points onto segmentation map, appends class score each point projects onto.
"""
# lidar_cam_coords = self.cam_to_lidar(lidar_raw, projection_mats)
################################
lidar_cam_coords = self.cam_to_lidar(
lidar_raw[:, :4], projection_mats, 0)
lidar_cam_coords[:, -1] = 1 # homogenous coords for projection
points_projected_on_mask_0, true_where_point_on_img_0 = self.project_points_mask(
lidar_cam_coords, projection_mats, class_scores, 0)
lidar_cam_coords = self.cam_to_lidar(
lidar_raw[:, :4], projection_mats, 1)
lidar_cam_coords[:, -1] = 1 # homogenous coords for projection
points_projected_on_mask_1, true_where_point_on_img_1 = self.project_points_mask(
lidar_cam_coords, projection_mats, class_scores, 1)
lidar_cam_coords = self.cam_to_lidar(
lidar_raw[:, :4], projection_mats, 2)
lidar_cam_coords[:, -1] = 1
points_projected_on_mask_2, true_where_point_on_img_2 = self.project_points_mask(
lidar_cam_coords, projection_mats, class_scores, 2)
lidar_cam_coords = self.cam_to_lidar(
lidar_raw[:, :4], projection_mats, 3)
lidar_cam_coords[:, -1] = 1
points_projected_on_mask_3, true_where_point_on_img_3 = self.project_points_mask(
lidar_cam_coords, projection_mats, class_scores, 3)
lidar_cam_coords = self.cam_to_lidar(
lidar_raw[:, :4], projection_mats, 4)
lidar_cam_coords[:, -1] = 1
points_projected_on_mask_4, true_where_point_on_img_4 = self.project_points_mask(
lidar_cam_coords, projection_mats, class_scores, 4)
true_where_point_on_both_0_1 = true_where_point_on_img_0 & true_where_point_on_img_1
true_where_point_on_both_0_2 = true_where_point_on_img_0 & true_where_point_on_img_2
true_where_point_on_both_1_3 = true_where_point_on_img_1 & true_where_point_on_img_3
true_where_point_on_both_2_4 = true_where_point_on_img_2 & true_where_point_on_img_4
true_where_point_on_img = true_where_point_on_img_1 | true_where_point_on_img_0 | true_where_point_on_img_2 | true_where_point_on_img_3 | true_where_point_on_img_4
point_scores_0 = class_scores[0][points_projected_on_mask_0[:, 1],
points_projected_on_mask_0[:, 0]].reshape(-1, class_scores[0].shape[2])
point_scores_1 = class_scores[1][points_projected_on_mask_1[:, 1],
points_projected_on_mask_1[:, 0]].reshape(-1, class_scores[1].shape[2])
point_scores_2 = class_scores[2][points_projected_on_mask_2[:, 1],
points_projected_on_mask_2[:, 0]].reshape(-1, class_scores[2].shape[2])
point_scores_3 = class_scores[3][points_projected_on_mask_3[:, 1],
points_projected_on_mask_3[:, 0]].reshape(-1, class_scores[3].shape[2])
point_scores_4 = class_scores[4][points_projected_on_mask_4[:, 1],
points_projected_on_mask_4[:, 0]].reshape(-1, class_scores[4].shape[2])
augmented_lidar = torch.cat((lidar_raw[:, :5], torch.zeros(
(lidar_raw.shape[0], class_scores[1].shape[2])).to(device=lidar_raw.device)), axis=1)
augmented_lidar[true_where_point_on_img_0, -
class_scores[0].shape[2]:] += point_scores_0
augmented_lidar[true_where_point_on_img_1, -
class_scores[1].shape[2]:] += point_scores_1
augmented_lidar[true_where_point_on_img_2, -
class_scores[2].shape[2]:] += point_scores_2
augmented_lidar[true_where_point_on_img_3, -
class_scores[3].shape[2]:] += point_scores_3
augmented_lidar[true_where_point_on_img_4, -
class_scores[4].shape[2]:] += point_scores_4
augmented_lidar[true_where_point_on_both_0_1, -class_scores[0].shape[2]:] = 0.5 * \
augmented_lidar[true_where_point_on_both_0_1, -
class_scores[0].shape[2]:]
augmented_lidar[true_where_point_on_both_0_2, -class_scores[0].shape[2]:] = 0.5 * \
augmented_lidar[true_where_point_on_both_0_2, -
class_scores[0].shape[2]:]
augmented_lidar[true_where_point_on_both_1_3, -class_scores[1].shape[2]:] = 0.5 * \
augmented_lidar[true_where_point_on_both_1_3, -
class_scores[1].shape[2]:]
augmented_lidar[true_where_point_on_both_2_4, -class_scores[2].shape[2]:] = 0.5 * \
augmented_lidar[true_where_point_on_both_2_4, -
class_scores[2].shape[2]:]
if self.cam_sync:
augmented_lidar = augmented_lidar[true_where_point_on_img]
return augmented_lidar
import torch
import torch.nn as nn
import torch.nn.functional as F
from model.anchors import Anchors, anchor_target, anchors2bboxes
from ops import Voxelization
import open3d.ml.torch as ml3d
from tools.process import limit_period
import math
class PillarLayer(nn.Module):
def __init__(self, voxel_size, point_cloud_range,
max_num_points, max_voxels):
super().__init__()
self.voxel_layer = Voxelization(voxel_size=voxel_size,
point_cloud_range=point_cloud_range,
max_num_points=max_num_points,
max_voxels=max_voxels)
@torch.no_grad()
def forward(self, batched_pts):
'''
batched_pts: list[tensor], len(batched_pts) = bs
return:
pillars: (p1 + p2 + ... + pb, num_points, c),
coors_batch: (p1 + p2 + ... + pb, 1 + 3),
num_points_per_pillar: (p1 + p2 + ... + pb, ), (b: batch size)
'''
pillars, coors, npoints_per_pillar = [], [], []
for i, pts in enumerate(batched_pts):
voxels_out, coors_out, num_points_per_voxel_out = self.voxel_layer(
pts)
# voxels_out: (max_voxel, num_points, c), coors_out: (max_voxel, 3)
# num_points_per_voxel_out: (max_voxel, )
pillars.append(voxels_out)
coors.append(coors_out.long())
npoints_per_pillar.append(num_points_per_voxel_out)
# (p1 + p2 + ... + pb, num_points, c)
pillars = torch.cat(pillars, dim=0)
npoints_per_pillar = torch.cat(
npoints_per_pillar,
dim=0) # (p1 + p2 + ... + pb, )
coors_batch = []
for i, cur_coors in enumerate(coors):
coors_batch.append(F.pad(cur_coors, (1, 0), value=i))
# (p1 + p2 + ... + pb, 1 + 3)
coors_batch = torch.cat(coors_batch, dim=0)
return pillars, coors_batch, npoints_per_pillar
class PillarEncoder(nn.Module):
def __init__(self, voxel_size, point_cloud_range, in_channel, out_channel):
super().__init__()
self.out_channel = out_channel
self.vx, self.vy = voxel_size[0], voxel_size[1]
self.x_offset = voxel_size[0] / 2 + point_cloud_range[0]
self.y_offset = voxel_size[1] / 2 + point_cloud_range[1]
self.x_l = math.ceil(
(point_cloud_range[3] -
point_cloud_range[0]) /
voxel_size[0])
self.y_l = math.ceil(
(point_cloud_range[4] -
point_cloud_range[1]) /
voxel_size[1])
self.conv = nn.Conv1d(in_channel, out_channel, 1, bias=False)
self.bn = nn.BatchNorm1d(out_channel, eps=1e-3, momentum=0.01)
def forward(self, pillars, coors_batch, npoints_per_pillar):
'''
pillars: (p1 + p2 + ... + pb, num_points, c), c = 4
coors_batch: (p1 + p2 + ... + pb, 1 + 3)
npoints_per_pillar: (p1 + p2 + ... + pb, )
return: (bs, out_channel, y_l, x_l)
'''
device = pillars.device
# 1. calculate offset to the points center (in each pillar)
offset_pt_center = pillars[:,
:,
:3] - torch.sum(pillars[:,
:,
:3],
dim=1,
keepdim=True) / npoints_per_pillar[:,
None,
None] # (p1 + p2 + ... + pb, num_points, 3)
# 2. calculate offset to the pillar center
# (p1 + p2 + ... + pb, num_points, 1)
x_offset_pi_center = pillars[:, :, :1] - \
(coors_batch[:, None, 1:2] * self.vx + self.x_offset)
# (p1 + p2 + ... + pb, num_points, 1)
y_offset_pi_center = pillars[:, :, 1:2] - \
(coors_batch[:, None, 2:3] * self.vy + self.y_offset)
# 3. encoder
features = torch.cat([pillars,
offset_pt_center,
x_offset_pi_center,
y_offset_pi_center],
dim=-1) # (p1 + p2 + ... + pb, num_points, 9)
features[:, :, 0:1] = x_offset_pi_center # tmp
features[:, :, 1:2] = y_offset_pi_center # tmp
# In consitent with mmdet3d.
# The reason can be referenced to
# https://github.com/open-mmlab/mmdetection3d/issues/1150
# 4. find mask for (0, 0, 0) and update the encoded features
# a very beautiful implementation
voxel_ids = torch.arange(
0, pillars.size(1)).to(device) # (num_points, )
# (num_points, p1 + p2 + ... + pb)
mask = voxel_ids[:, None] < npoints_per_pillar[None, :]
# (p1 + p2 + ... + pb, num_points)
mask = mask.permute(1, 0).contiguous()
features *= mask[:, :, None]
# 5. embedding
# (p1 + p2 + ... + pb, 9, num_points)
features = features.permute(0, 2, 1).contiguous()
# (p1 + p2 + ... + pb, out_channels, num_points)
features = F.relu(self.bn(self.conv(features)))
# (p1 + p2 + ... + pb, out_channels)
pooling_features = torch.max(features, dim=-1)[0]
# 6. pillar scatter
batched_canvas = []
bs = coors_batch[-1, 0] + 1
for i in range(bs):
cur_coors_idx = coors_batch[:, 0] == i
cur_coors = coors_batch[cur_coors_idx, :]
cur_features = pooling_features[cur_coors_idx]
canvas = torch.zeros(
(self.x_l,
self.y_l,
self.out_channel),
dtype=torch.float32,
device=device)
canvas[cur_coors[:, 1], cur_coors[:, 2]] = cur_features
canvas = canvas.permute(2, 1, 0).contiguous()
batched_canvas.append(canvas)
# (bs, in_channel, self.y_l, self.x_l)
batched_canvas = torch.stack(batched_canvas, dim=0)
return batched_canvas
class Backbone(nn.Module):
def __init__(self, in_channel, out_channels,
layer_nums, layer_strides=[2, 2, 2]):
super().__init__()
assert len(out_channels) == len(layer_nums)
assert len(out_channels) == len(layer_strides)
self.multi_blocks = nn.ModuleList()
for i in range(len(layer_strides)):
blocks = []
blocks.append(
nn.Conv2d(
in_channel,
out_channels[i],
3,
stride=layer_strides[i],
bias=False,
padding=1))
blocks.append(
nn.BatchNorm2d(
out_channels[i],
eps=1e-3,
momentum=0.01))
blocks.append(nn.ReLU(inplace=True))
for _ in range(layer_nums[i]):
blocks.append(
nn.Conv2d(
out_channels[i],
out_channels[i],
3,
bias=False,
padding=1))
blocks.append(
nn.BatchNorm2d(
out_channels[i],
eps=1e-3,
momentum=0.01))
blocks.append(nn.ReLU(inplace=True))
in_channel = out_channels[i]
self.multi_blocks.append(nn.Sequential(*blocks))
# in consitent with mmdet3d
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(
m.weight, mode='fan_out', nonlinearity='relu')
def forward(self, x):
'''
x: (b, c, y_l, x_l). Default: (6, 64, 496, 432)
return: list[]. Default: [(6, 64, 248, 216), (6, 128, 124, 108), (6, 256, 62, 54)]
'''
outs = []
for i in range(len(self.multi_blocks)):
x = self.multi_blocks[i](x)
outs.append(x)
return outs
class Neck(nn.Module):
def __init__(self, in_channels, upsample_strides, out_channels):
super().__init__()
assert len(in_channels) == len(upsample_strides)
assert len(upsample_strides) == len(out_channels)
self.decoder_blocks = nn.ModuleList()
for i in range(len(in_channels)):
decoder_block = []
decoder_block.append(nn.ConvTranspose2d(in_channels[i],
out_channels[i],
upsample_strides[i],
stride=upsample_strides[i],
bias=False))
decoder_block.append(
nn.BatchNorm2d(
out_channels[i],
eps=1e-3,
momentum=0.01))
decoder_block.append(nn.ReLU(inplace=True))
self.decoder_blocks.append(nn.Sequential(*decoder_block))
# in consitent with mmdet3d
for m in self.modules():
if isinstance(m, nn.ConvTranspose2d):
nn.init.kaiming_normal_(
m.weight, mode='fan_out', nonlinearity='relu')
def forward(self, x):
'''
x: [(bs, 64, 248, 216), (bs, 128, 124, 108), (bs, 256, 62, 54)]
return: (bs, 384, 248, 216)
'''
outs = []
for i in range(len(self.decoder_blocks)):
xi = self.decoder_blocks[i](x[i]) # (bs, 128, 248, 216)
outs.append(xi)
out = torch.cat(outs, dim=1)
return out
class Head(nn.Module):
def __init__(self, in_channel, n_anchors, n_classes):
super().__init__()
self.conv_cls = nn.Conv2d(in_channel, n_anchors * n_classes, 1)
self.conv_reg = nn.Conv2d(in_channel, n_anchors * 7, 1)
self.conv_dir_cls = nn.Conv2d(in_channel, n_anchors * 2, 1)
# in consitent with mmdet3d
conv_layer_id = 0
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.normal_(m.weight, mean=0, std=0.01)
if conv_layer_id == 0:
prior_prob = 0.01
bias_init = float(-math.log((1 - prior_prob) / prior_prob))
nn.init.constant_(m.bias, bias_init)
else:
nn.init.constant_(m.bias, 0)
conv_layer_id += 1
def forward(self, x):
'''
x: (bs, 384, 248, 216)
return:
bbox_cls_pred: (bs, n_anchors*3, 248, 216)
bbox_pred: (bs, n_anchors*7, 248, 216)
bbox_dir_cls_pred: (bs, n_anchors*2, 248, 216)
'''
bbox_cls_pred = self.conv_cls(x)
bbox_pred = self.conv_reg(x)
bbox_dir_cls_pred = self.conv_dir_cls(x)
return bbox_cls_pred, bbox_pred, bbox_dir_cls_pred
class PointPillars(nn.Module):
def __init__(self,
nclasses=3,
voxel_size=[0.32, 0.32, 6],
point_cloud_range=[-74.88, -74.88, -2, 74.88, 74.88, 4],
max_num_points=20,
max_voxels=(32000, 32000),
painted=False):
super().__init__()
self.nclasses = nclasses
self.pillar_layer = PillarLayer(voxel_size=voxel_size,
point_cloud_range=point_cloud_range,
max_num_points=max_num_points,
max_voxels=max_voxels)
if painted:
pillar_channel = 16
else:
pillar_channel = 10
self.pillar_encoder = PillarEncoder(voxel_size=voxel_size,
point_cloud_range=point_cloud_range,
in_channel=pillar_channel,
out_channel=64)
self.backbone = Backbone(in_channel=64,
out_channels=[64, 128, 256],
layer_nums=[3, 5, 5],
layer_strides=[1, 2, 2])
self.neck = Neck(in_channels=[64, 128, 256],
upsample_strides=[1, 2, 4],
out_channels=[128, 128, 128])
self.head = Head(
in_channel=384,
n_anchors=2 * nclasses,
n_classes=nclasses)
# anchors
ranges = [[-74.88, -74.88, -0.0345, 74.88, 74.88, -0.0345],
[-74.88, -74.88, 0, 74.88, 74.88, 0],
[-74.88, -74.88, -0.1188, 74.88, 74.88, -0.1188]]
sizes = [[0.84, .91, 1.74], [.84, 1.81, 1.77], [2.08, 4.73, 1.77]]
rotations = [0, 1.57]
self.anchors_generator = Anchors(ranges=ranges,
sizes=sizes,
rotations=rotations)
# train
self.assigners = [
{'pos_iou_thr': 0.5, 'neg_iou_thr': 0.3, 'min_iou_thr': 0.3},
{'pos_iou_thr': 0.5, 'neg_iou_thr': 0.3, 'min_iou_thr': 0.3},
{'pos_iou_thr': 0.55, 'neg_iou_thr': 0.4, 'min_iou_thr': 0.4},
]
# val and test
self.nms_pre = 4096
self.nms_thr = 0.25
self.score_thr = 0.1
self.max_num = 500
def get_predicted_bboxes_single(
self, bbox_cls_pred, bbox_pred, bbox_dir_cls_pred, anchors):
'''
bbox_cls_pred: (n_anchors*3, 248, 216)
bbox_pred: (n_anchors*7, 248, 216)
bbox_dir_cls_pred: (n_anchors*2, 248, 216)
anchors: (y_l, x_l, 3, 2, 7)
return:
bboxes: (k, 7)
labels: (k, )
scores: (k, )
'''
# 0. pre-process
bbox_cls_pred = bbox_cls_pred.permute(
1, 2, 0).reshape(-1, self.nclasses)
bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 7)
bbox_dir_cls_pred = bbox_dir_cls_pred.permute(1, 2, 0).reshape(-1, 2)
anchors = anchors.reshape(-1, 7)
bbox_cls_pred = torch.sigmoid(bbox_cls_pred)
bbox_dir_cls_pred = torch.max(bbox_dir_cls_pred, dim=1)[1]
# 1. obtain self.nms_pre bboxes based on scores
inds = bbox_cls_pred.max(1)[0].topk(self.nms_pre)[1]
bbox_cls_pred = bbox_cls_pred[inds]
bbox_pred = bbox_pred[inds]
bbox_dir_cls_pred = bbox_dir_cls_pred[inds]
anchors = anchors[inds]
# 2. decode predicted offsets to bboxes
bbox_pred = anchors2bboxes(anchors, bbox_pred)
# 3. nms
bbox_pred2d_xy = bbox_pred[:, [0, 1]]
bbox_pred2d_lw = bbox_pred[:, [3, 4]]
bbox_pred2d = torch.cat([bbox_pred2d_xy - bbox_pred2d_lw / 2,
bbox_pred2d_xy + bbox_pred2d_lw / 2,
bbox_pred[:, 6:]], dim=-1) # (n_anchors, 5)
ret_bboxes, ret_labels, ret_scores = [], [], []
for i in range(self.nclasses):
# 3.1 filter bboxes with scores below self.score_thr
cur_bbox_cls_pred = bbox_cls_pred[:, i]
score_inds = cur_bbox_cls_pred > self.score_thr
if score_inds.sum() == 0:
continue
cur_bbox_cls_pred = cur_bbox_cls_pred[score_inds]
cur_bbox_pred2d = bbox_pred2d[score_inds]
cur_bbox_pred = bbox_pred[score_inds]
cur_bbox_dir_cls_pred = bbox_dir_cls_pred[score_inds]
# 3.2 nms core
keep_inds = ml3d.ops.nms(
cur_bbox_pred2d.cpu(),
cur_bbox_cls_pred.cpu(),
self.nms_thr)
cur_bbox_cls_pred = cur_bbox_cls_pred[keep_inds]
cur_bbox_pred = cur_bbox_pred[keep_inds]
cur_bbox_dir_cls_pred = cur_bbox_dir_cls_pred[keep_inds]
cur_bbox_pred[:, -
1] = limit_period(cur_bbox_pred[:, -
1].detach().cpu(), 1, math.pi).to(cur_bbox_pred) # [-pi, 0]
cur_bbox_pred[:, -1] += (1 - cur_bbox_dir_cls_pred) * math.pi
ret_bboxes.append(cur_bbox_pred)
ret_labels.append(torch.zeros_like(
cur_bbox_pred[:, 0], dtype=torch.long) + i)
ret_scores.append(cur_bbox_cls_pred)
# 4. filter some bboxes if bboxes number is above self.max_num
if len(ret_bboxes) == 0:
return {
'lidar_bboxes': torch.empty((0, 7)).detach().cpu(),
'labels': torch.empty(0).detach().cpu(),
'scores': torch.empty(0).detach().cpu()
}
ret_bboxes = torch.cat(ret_bboxes, 0)
ret_labels = torch.cat(ret_labels, 0)
ret_scores = torch.cat(ret_scores, 0)
if ret_bboxes.size(0) > self.max_num:
final_inds = ret_scores.topk(self.max_num)[1]
ret_bboxes = ret_bboxes[final_inds]
ret_labels = ret_labels[final_inds]
ret_scores = ret_scores[final_inds]
result = {
'lidar_bboxes': ret_bboxes.detach().cpu(),
'labels': ret_labels.detach().cpu(),
'scores': ret_scores.detach().cpu()
}
return result
def get_predicted_bboxes(
self, bbox_cls_pred, bbox_pred, bbox_dir_cls_pred, batched_anchors):
'''
bbox_cls_pred: (bs, n_anchors*3, 248, 216)
bbox_pred: (bs, n_anchors*7, 248, 216)
bbox_dir_cls_pred: (bs, n_anchors*2, 248, 216)
batched_anchors: (bs, y_l, x_l, 3, 2, 7)
return:
bboxes: [(k1, 7), (k2, 7), ... ]
labels: [(k1, ), (k2, ), ... ]
scores: [(k1, ), (k2, ), ... ]
'''
results = []
bs = bbox_cls_pred.size(0)
for i in range(bs):
result = self.get_predicted_bboxes_single(bbox_cls_pred=bbox_cls_pred[i],
bbox_pred=bbox_pred[i],
bbox_dir_cls_pred=bbox_dir_cls_pred[i],
anchors=batched_anchors[i])
results.append(result)
return results
def forward(self, batched_pts, mode='test',
batched_gt_bboxes=None, batched_gt_labels=None):
batch_size = len(batched_pts)
# batched_pts: list[tensor] -> pillars: (p1 + p2 + ... + pb, num_points, c),
# coors_batch: (p1 + p2 + ... + pb, 1 + 3),
# num_points_per_pillar: (p1 + p2 + ... + pb, ), (b: batch size)
pillars, coors_batch, npoints_per_pillar = self.pillar_layer(
batched_pts)
# pillars: (p1 + p2 + ... + pb, num_points, c), c = 4
# coors_batch: (p1 + p2 + ... + pb, 1 + 3)
# npoints_per_pillar: (p1 + p2 + ... + pb, )
# -> pillar_features: (bs, out_channel, y_l, x_l)
pillar_features = self.pillar_encoder(
pillars, coors_batch, npoints_per_pillar)
# xs: [(bs, 64, 248, 216), (bs, 128, 124, 108), (bs, 256, 62, 54)]
xs = self.backbone(pillar_features)
# x: (bs, 384, 248, 216)
x = self.neck(xs)
# bbox_cls_pred: (bs, n_anchors*3, 248, 216)
# bbox_pred: (bs, n_anchors*7, 248, 216)
# bbox_dir_cls_pred: (bs, n_anchors*2, 248, 216)
bbox_cls_pred, bbox_pred, bbox_dir_cls_pred = self.head(x)
# anchors
device = bbox_cls_pred.device
feature_map_size = torch.tensor(
list(bbox_cls_pred.size()[-2:]), device=device)
anchors = self.anchors_generator.get_multi_anchors(feature_map_size)
batched_anchors = [anchors for _ in range(batch_size)]
if mode == 'train':
anchor_target_dict = anchor_target(batched_anchors=batched_anchors,
batched_gt_bboxes=batched_gt_bboxes,
batched_gt_labels=batched_gt_labels,
assigners=self.assigners,
nclasses=self.nclasses)
return bbox_cls_pred, bbox_pred, bbox_dir_cls_pred, anchor_target_dict
elif mode == 'val':
results = self.get_predicted_bboxes(bbox_cls_pred=bbox_cls_pred,
bbox_pred=bbox_pred,
bbox_dir_cls_pred=bbox_dir_cls_pred,
batched_anchors=batched_anchors)
return results
elif mode == 'test':
results = self.get_predicted_bboxes(bbox_cls_pred=bbox_cls_pred,
bbox_pred=bbox_pred,
bbox_dir_cls_pred=bbox_dir_cls_pred,
batched_anchors=batched_anchors)
return results
else:
raise ValueError
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment