Commit 64b02fb6 authored by liangjing's avatar liangjing
Browse files

version 1

parents
Pipeline #176 failed with stages
in 0 seconds
# Copyright (c) 2018-2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
ARG FROM_IMAGE_NAME=nvcr.io/nvidia/pytorch:22.09-py3
################################################################################
# DALI box iou builder image
################################################################################
FROM ${FROM_IMAGE_NAME} AS dali-box-iou-builder
WORKDIR /workspace/csrc
COPY csrc/dali_box_iou ./dali_box_iou
# Build CUDA box iou (written as a DALI extension)
RUN cd dali_box_iou && \
mkdir build && \
cd build && \
cmake .. && \
make
################################################################################
################################################################################
# DALI proposal matcher builder image
################################################################################
FROM ${FROM_IMAGE_NAME} AS dali-proposal-matcher-builder
WORKDIR /workspace/csrc
COPY csrc/dali_proposal_matcher ./dali_proposal_matcher
# Build CUDA proposal matcher (written as a DALI extension)
RUN cd dali_proposal_matcher && \
mkdir build && \
cd build && \
cmake .. && \
make
################################################################################
################################################################################
# RetinaNet
################################################################################
FROM ${FROM_IMAGE_NAME}
# Build args
ARG MAX_JOBS=4
ARG TORCH_CUDNN_V8_API_ENABLED=1
# Set env vars
ENV MAX_JOBS=${MAX_JOBS}
ENV TORCH_CUDNN_V8_API_ENABLED=${TORCH_CUDNN_V8_API_ENABLED}
# Install dependencies for system configuration logger
RUN apt-get update && \
apt-get install -y --no-install-recommends infiniband-diags pciutils && \
rm -rf /var/lib/apt/lists/*
# Set working directory
WORKDIR /workspace/ssd
# Copy code
COPY . .
# Remove the container's pycocotools in favor of the nvidia optimized version (installed from requirements.txt)
RUN pip uninstall -y pycocotools
# Install python dependencies
RUN pip install --no-cache-dir -r requirements.txt
# Copy DALI box iou and proposal matcher
COPY --from=dali-box-iou-builder /workspace/csrc/dali_box_iou/build/lib_box_iou.so /usr/local/lib/lib_box_iou.so
COPY --from=dali-proposal-matcher-builder /workspace/csrc/dali_proposal_matcher/build/lib_proposal_matcher.so /usr/local/lib/lib_proposal_matcher.so
################################################################################
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
RetinaNet in Torchvision
This folder includes source code from:
* https://github.com/pytorch/vision licensed BSD 3-Clause License.
# RetinaNet
## 模型介绍
RetinaNet是一种基于特征金字塔网络(Feature Pyramid Network)和Focal Loss损失函数的目标检测模型,由Facebook AI Research团队在2018年提出。RetinaNet旨在解决传统目标检测算法(如Faster R-CNN)在检测小目标时表现不佳的问题。
与传统的目标检测算法相比,RetinaNet具有更高的检测精度和更快的检测速度。它对小目标的检测表现尤为优秀,在各种视觉任务中都取得了很好的效果,例如物体检测、行人重识别等。
## 模型结构
RetinaNet的网络结构主要分为两个部分:特征提取网络和检测头。
1. 特征提取网络:RetinaNet采用了ResNet作为特征提取网络,通过不同深度的ResNet模块对输入图像进行特征提取,得到一系列的特征金字塔图。这些特征金字塔图代表了不同尺度的物体信息,可以用于检测不同大小的目标。
2. 检测头:RetinaNet的检测头由两个分支组成,一个分支用于预测目标的置信度,另一个分支用于预测目标的边界框。每个分支都由一系列卷积层和全连接层组成,最终输出一个特定数量的预测值。置信度分支使用Focal Loss损失函数来处理正负样本不均衡问题,边界框分支使用Smooth L1 Loss损失函数来计算边界框的误差。
在检测过程中,RetinaNet首先在特征金字塔图上进行目标检测,然后在检测结果中使用非极大值抑制(NMS)来消除重叠的边界框,并保留置信度最高的边界框。
## 目标精度
34.0% mAP
## MLPerf代码参考版本
版本:v2.1
原始代码位置:https://github.com/mlcommons/training_results_v2.1/tree/master/NVIDIA/benchmarks/bert/implementations/pytorch
## 数据集
模型训练的数据集来自训练数据:Open Images,该数据一个大规模的图像数据集,由Google在2016年发布。该数据集包含了超过900万张标注图像,其中每张图像都包含了多个物体的边界框和类别标签,可用于各种计算机视觉任务,例如物体检测、物体识别、场景理解等。
### Download dataset
该数据集的来源为[OpenImages-v6](https://storage.googleapis.com/openimages/web/index.html),按下述进行数据下载,数据大小约为352G;
cd ./scripts
pip3 install fiftyone
./download_openimages_mlperf.sh -d <DATAPATH>
### Download the pretrained backbone
该网络采用的预训练模型为来自ImageNet的ResNeXt50_32x4d,可通过下述进行获取;
./download_backbone.sh
## 训练
### 环境配置
提供[光源](https://www.sourcefind.cn/#/service-details)拉取的训练的docker镜像:
* 训练镜像:
python依赖安装:
pip3 install -r requirement.txt
python3 setup.py install
#注:运行该模式需要特定版本的apex,相应whl包在本目录中提供
pip3 install apex-0.1-cp37-cp37m-linux_x86_64.whl
cd ./cocoapi-0.7.0/PythonAPI; python3 setup.py install
### 训练
训练命令(此处以单机8卡规模为例说明):
nohup bash sbatch.sh >& ssd_bs16_epoch6.log &
#输出结果见ssd_bs16_epoch6.log
#注:可通过修改dcu.sh中DATASET_DIR参数按需修改输入数据的位置
## 性能和准确率数据
测试采用上述输入数据,加速卡采用Z100L,下面为单机8卡测试结果:
| 测试平台 | Accuacy | Throughput (samples/s) |
| :------: | :-----: | :--------------------: |
| Z100L | 34% mAP | 7.9726 |
## 历史版本
* https://developer.hpccube.com/codes/modelzoo/mlperf_retinanet
## 参考
* https://mlcommons.org/en/
* https://github.com/mlcommons
<h1 align="center">Single Shot Detector (SSD)</h1>
- [Summary](#summary)
- [Running the benchmark](#running-the-benchmark)
- [Requirements](#requirements)
- [Building the docker image](#building-the-docker-image)
- [Download dataset](#download-dataset)
- [Download the pretrained backbone](#download-the-pretrained-backbone)
- [Training on NVIDIA DGX-A100 (single node) with SLURM](#training-on-nvidia-dgx-a100-single-node-with-slurm)
- [Training on NVIDIA DGX-A100 (multi node) with SLURM](#training-on-nvidia-dgx-a100-multi-node-with-slurm)
- [Training on NVIDIA DGX-A100 (single node) with docker](#training-on-nvidia-dgx-a100-single-node-with-docker)
- [Hyperparameter settings](#hyperparameter-settings)
- [Dataset/Environment](#datasetenvironment)
- [Publication/Attribution](#publicationattribution)
- [The MLPerf Subset](#the-mlperf-subset)
- [Model](#model)
- [Backbone](#backbone)
- [Weight and bias initialization](#weight-and-bias-initialization)
- [Input augmentations](#input-augmentations)
- [Publication/Attribution](#publicationattribution-1)
- [Quality](#quality)
- [Quality metric](#quality-metric)
- [Quality target](#quality-target)
- [Evaluation frequency](#evaluation-frequency)
- [Evaluation thoroughness](#evaluation-thoroughness)
# Summary
Single Shot MultiBox Detector (SSD) is an object detection network. For an
input image, the network outputs a set of bounding boxes around the detected
objects, along with their classes. For example:
![](https://upload.wikimedia.org/wikipedia/commons/3/38/Detected-with-YOLO--Schreibtisch-mit-Objekten.jpg)
SSD is a one-stage detector, both localization and classification are done in a
single pass of the network. This allows for a faster inference than region
proposal network (RPN) based networks, making it more suited for real time
applications like automotive and low power devices like mobile phones. This is
also sometimes referred to as being a "single shot" detector for inference.
# Running the benchmark
The benchmark is intended to run on NVIDIA GPUs, it is tested on A100 but other GPUs should work too (some optimization flags might not be available in all GPU generations).
## Requirements
The recommended way to run the benchmark is within docker containers. You need to setup your
machine with:
1. [PyTorch 22.09-py3 NGC container](https://ngc.nvidia.com/registry/nvidia-pytorch)
2. [Docker](https://docs.docker.com/engine/install/)
3. [NVIDIA container runtime](https://github.com/NVIDIA/nvidia-docker)
4. Slurm with [Pyxis](https://github.com/NVIDIA/pyxis) and [Enroot](https://github.com/NVIDIA/enroot) (multi-node)
## Building the docker image
Once the above requirements have been met, you can build the benchmark docker image with:
```bash
docker build --pull -t <DOCKER_REGISTRY>/mlperf-nvidia:single_stage_detector-pytorch .
docker push <DOCKER_REGISTRY>/mlperf-nvidia:single_stage_detector-pytorch
```
## Download dataset
The benchmark uses a subset of [OpenImages-v6](https://storage.googleapis.com/openimages/web/index.html).
To download the subset:
```bash
pip install fiftyone
cd ./public-scripts
./download_openimages_mlperf.sh -d <DATAPATH>
```
The script will download the benchmark subset with metadata and labels, then
convert the labels to [COCO](https://cocodataset.org/#home) format. The
downloaded dataset size is 352GB and the expected folder structure after
running the script is:
```
<DATAPATH>
└───info.json
└───train
│ └─── data
│ │ 000002b66c9c498e.jpg
│ │ 000002b97e5471a0.jpg
│ │ ...
│ └─── metadata
│ │ classes.csv
│ │ hierarchy.json
│ │ image_ids.csv
│ └─── labels
│ detections.csv
│ openimages-mlperf.json
└───validation
└─── data
│ 0001eeaf4aed83f9.jpg
│ 0004886b7d043cfd.jpg
│ ...
└─── metadata
│ classes.csv
│ hierarchy.json
│ image_ids.csv
└─── labels
detections.csv
openimages-mlperf.json
```
Read more about the mlperf subset [here](#the-mlperf-subset).
## Download the pretrained backbone
The benchmark uses a ResNeXt50_32x4d backbone pretrained on ImageNet. The
weights are downloaded from PyTorch hub.
By default, the code will automatically download the weights to
`$TORCH_HOME/hub` (default is `~/.cache/torch/hub`) and save them for later use.
Alternatively, you can manually download the weights with:
```bash
bash ./public-scripts/download_backbone.sh
```
Then use the downloaded file with `--pretrained <PATH TO WEIGHTS>` .
## Training on NVIDIA DGX-A100 (single node) with SLURM
Launch configuration and system-specific hyperparameters for the NVIDIA DGX-A100
single node reference are in the `config_DGXA100_001x08x032.sh` script.
Steps required to launch single node training on NVIDIA DGX-A100:
```bash
source config_DGXA100_001x08x032.sh
CONT="<DOCKER_REGISTRY>/mlperf-nvidia:single_stage_detector-pytorch" DATADIR="<path/to/dir/containing/openimages/dir>" LOGDIR="<path/to/output/dir>" BACKBONE_DIR="<$(pwd) or path/to/pretrained/ckpt>" sbatch -N $DGXNNODES -t $WALLTIME run.sub
```
## Training on NVIDIA DGX-A100 (multi node) with SLURM
Launch configuration and system-specific hyperparameters for the NVIDIA DGX-A100
multi node reference are in the `config_DGXA100_*.sh` scripts.
Steps required to launch multi node training on NVIDIA DGX-A100:
```bash
source <MULT_NODE_CONFIG>
CONT="<DOCKER_REGISTRY>/mlperf-nvidia:single_stage_detector-pytorch" DATADIR="<path/to/dir/containing/openimages/dir>" LOGDIR="<path/to/output/dir>" BACKBONE_DIR="<$(pwd) or path/to/pretrained/ckpt>" sbatch -N $DGXNNODES -t $WALLTIME run.sub
```
## Training on NVIDIA DGX-A100 (single node) with docker
When generating results for the official v2.0 submission with one node, the
benchmark was launched onto a cluster managed by a SLURM scheduler. The
instructions in [Training on NVIDIA DGX-A100 (single node) with SLURM](#training-on-nvidia-dgx-a100-single-node-with-slurm) explain how that is done.
However, to make it easier to run this benchmark on a wider set of machine
environments, we are providing here an alternate set of launch instructions
that can be run using nvidia-docker. Note that performance or functionality may
vary from the tested SLURM instructions.
Launch configuration and system-specific hyperparameters for the NVIDIA DGX-A100
single node reference are in the `config_DGXA100_001x08x032.sh` script.
To launch single node training on NVIDIA DGX-A100 with docker, start
training with:
```bash
source config_DGXA100_001x08x032.sh
CONT="<DOCKER_REGISTRY>/mlperf-nvidia:single_stage_detector-pytorch" DATADIR="<path/to/dir/containing/openimages/dir>" LOGDIR="<path/to/output/dir>" BACKBONE_DIR="<$(pwd) or path/to/pretrained/ckpt>" ./run_with_docker.sh
```
Alternatively, you can launch an interactive docker session:
```bash
docker run --rm -it \
--gpus=all \
--ipc=host \
-v <DATADIR>:/datasets/open-images-v6-mlperf \
<DOCKER_REGISTRY>/mlperf-nvidia:single_stage_detector-pytorch bash
```
Then launching the training command manually with:
```bash
source config_DGXA100_001x08x032.sh
torchrun --standalone --nproc_per_node=${DGXNGPU} --no_python ./run_and_time.sh
```
You can read more about torchrun [here](https://pytorch.org/docs/stable/elastic/run.html).
## Hyperparameter settings
Hyperparameters are recorded in the `config_*.sh` files for each configuration
and in `run_and_time.sh`.
# Dataset/Environment
## Publication/Attribution
[Google Open Images Dataset V6](https://storage.googleapis.com/openimages/web/index.html)
## The MLPerf Subset
The MLPerf subset includes only 264 classes of the total 601 available in the
full dataset:
| Dataset | # classes | # train images | # validation images | Size |
|-------------------|-----------|----------------|---------------------|-------|
| OpenImages Full | 601 | 1,743,042 | 41,620 | 534GB |
| OpenImages MLperf | 264 | 1,170,301 | 24,781 | 352GB |
These are the lowest level classes (no child classes) in the dataset
[semantic hierarchy tree](https://storage.googleapis.com/openimages/2018_04/bbox_labels_600_hierarchy_visualizer/circle.html)
with at least 1000 samples.
The list of used classes can be viewed
[here](https://github.com/mlcommons/training/blob/master/single_stage_detector/scripts/download_openimages_mlperf.sh).
# Model
This network takes an input 800x800 image from
[OpenImages-v6](https://storage.googleapis.com/openimages/web/index.html)
and 264 categories, and computes a set of bounding boxes and categories.
Other detector models use multiple stages, first proposing regions of interest
that might contain objects, then iterating over the regions of interest to try
to categorize each object. SSD does both of these in one stage, leading to
lower-latency and higher-performance inference.
## Backbone
The backbone is based on ResNeXt50_32x4d as described in Section 3 of
[this paper](https://arxiv.org/pdf/1611.05431.pdf). Using the
same notation as Table 1 of the paper the backbone looks like:
| stage | # stacked blocks | shape of a residual block |
| :--------: | :--------------: | :------------------------: |
| conv1 | | 7x7, 64, stride 2 |
| | | 3x3 max pool, stride 2 |
| conv2_x | 3 | 1x1, 128 |
| | | 3x3, 128, groups=32 |
| | | 1x1, 256 |
| conv3_x | 4 | 1x1, 256 |
| | | 3x3, 256, groups=32 |
| | | 1x1, 512 |
| conv4_x | 6 | 1x1, 512 |
| | | 3x3, 512, groups=32 |
| | | 1x1, 1024 |
| conv5_x | 3 | 1x1, 1024 |
| | | 3x3, 1024, groups=32 |
| | | 1x1, 2048 |
Input images are 800x800 RGB. They are fed to a 7x7 stride 2 convolution with
64 output channels, then through a 3x3 stride 2 max-pool layer.
The rest of the backbone is built from "building blocks": 3x3
grouped convolutions with a "short-cut" residual connection
around the pair. All convolutions in the backbone are followed by batch-norm
and ReLU.
The backbone is initialized with the pretrained weights from the corresponding
layers of the ResNeXt50_32x4d implementation from the [Torchvision model
zoo](https://download.pytorch.org/models/-7cdf4587.pth), described in
detail [here](https://pytorch.org/hub/pytorch_vision_resnext/). It is
a ResNeXt50_32x4d network trained on 224x224 ImageNet to achieve a Top-1
error rate of 22.38 and a Top-5 error rate of 6.30.
Of the five convolution stages, only the last three are trained.
The weights of the first two stages are frozen
([code](https://github.com/mlcommons/training/blob/master/single_stage_detector/ssd/model/backbone_utils.py#L94-L101)).
In addition, all batch norm layers in the backbone are frozen
([code](https://github.com/mlcommons/training/blob/master/single_stage_detector/ssd/model/backbone_utils.py#L52)).
## Weight and bias initialization
1. The ResNeXt50_32x4d backbone is initialized with the pretrained weights
from [Torchvision model zoo](https://download.pytorch.org/models/-7cdf4587.pth).
2. The classification head weights are initialized using normal distribution
with `mean=0` and `std=0.01`. The biases are initialized with zeros, except
for the classification convolution which is initialized with
`constant=-4.59511985013459`
([code](https://github.com/mlcommons/training/blob/master/single_stage_detector/ssd/model/retinanet.py#L85-L90)).
3. The regression head weights are initialized using normal distribution
with `mean=0` and `std=0.01`. The biases are initialized with zeros
([code](https://github.com/mlcommons/training/blob/master/single_stage_detector/ssd/model/retinanet.py#L171-L177)).
4. The FPN network weights are initialized with uniform Kaiming (also known as
He initialization) using `negative slope=1`. The biases are initialized
with zeros
([code](https://github.com/mlcommons/training/blob/master/single_stage_detector/ssd/model/feature_pyramid_network.py#L90-L91)).
## Input augmentations
The input images are assumed to be sRGB with values in range 0.0 through 1.0.
The input pipeline does the following:
1. Random horizontal flip of both the image and its ground-truth bounding boxes
with a probability of 50%.
2. Normalize the colors to a mean of (0.485, 0.456, 0.406) and standard
deviation (0.229, 0.224, 0.225).
3. Resize image to 800x800 using bilinear interpolation.
## Publication/Attribution
Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed,
Cheng-Yang Fu, Alexander C. Berg. [SSD: Single Shot MultiBox
Detector](https://arxiv.org/abs/1512.02325). In the _Proceedings of the
European Conference on Computer Vision_, (ECCV-14):21-37, 2016.
Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun. [Deep Residual Learning for
Image Recognition](https://arxiv.org/abs/1512.03385). In the _Proceedings of
the Conference on Computer Vision and Pattern Recognition_, (CVPR):770-778, 2016.
Jonathan Huang, Vivek Rathod, Chen Sun, Menglong Zhu, Anoop Korattikara,
Alireza Fathi, Ian Fischer, Zbigniew Wojna, Yang Song, Sergio Guadarrama, Kevin
Murphy. [Speed/accuracy trade-offs for modern convolutional object
detectors](https://arxiv.org/abs/1611.10012). In the _Proceedings of the
Conference on Computer Vision and Pattern Recognition_, (CVPR):3296-3305, 2017.
Krasin I., Duerig T., Alldrin N., Ferrari V., Abu-El-Haija S., Kuznetsova A.,
Rom H., Uijlings J., Popov S., Kamali S., Malloci M., Pont-Tuset J., Veit A.,
Belongie S., Gomes V., Gupta A., Sun C., Chechik G., Cai D., Feng Z.,
Narayanan D., Murphy K.
[OpenImages](https://storage.googleapis.com/openimages/web/index.html): A public
dataset for large-scale multi-label and multi-class image classification, 2017.
Saining Xie, Ross Girshick, Piotr Dollár, Zhuowen Tu, Kaiming He.
[Aggregated Residual Transformations for Deep Neural Networks](https://arxiv.org/abs/1611.05431)
Tsung-Yi Lin, Priya Goyal, Ross Girshick, Kaiming He, Piotr Dollár.
[Focal Loss for Dense Object Detection](https://arxiv.org/abs/1708.02002)
Torchvision pretrained [ResNeXt50_32x4d](https://pytorch.org/vision/0.12/models.html#id25) on ImageNet
Torchvision [RetinaNet](https://pytorch.org/vision/0.12/models.html#id65)
# Quality
## Quality metric
Metric is COCO box mAP (averaged over IoU of 0.5:0.95), computed over the
OpenImages-MLPerf validation subset.
## Quality target
mAP of 0.34
## Evaluation frequency
Every epoch, starting with the first one.
## Evaluation thoroughness
All the images in the OpenImages-MLPerf validation subset
## Steps to launch training
### NVIDIA DGX A100 (multi node)
Launch configuration and system-specific hyperparameters for the NVIDIA DGX A100
multi node submission are in the `config_DGXA100_160x08x001.sh` script.
Steps required to launch multi node training on NVIDIA DGX A100
1. Build the docker container and push to a docker registry
```
docker build --pull -t <DOCKER_REGISTRY>/mlperf-nvidia:single_stage_detector-pytorch .
docker push <DOCKER_REGISTRY>/mlperf-nvidia:single_stage_detector-pytorch
```
2. Launch the training
```
source config_DGXA100_160x08x001.sh
CONT="<DOCKER_REGISTRY>/mlperf-nvidia:single_stage_detector-pytorch" DATADIR=<path/to/data/dir> LOGDIR=<path/to/output/dir> sbatch -N $DGXNNODES -t $WALLTIME run.sub
```
## Steps to launch training
### NVIDIA DGX A100 (multi node)
Launch configuration and system-specific hyperparameters for the NVIDIA DGX A100
multi node submission are in the `config_DGXA100_256x08x001.sh` script.
Steps required to launch multi node training on NVIDIA DGX A100
1. Build the docker container and push to a docker registry
```
docker build --pull -t <DOCKER_REGISTRY>/mlperf-nvidia:single_stage_detector-pytorch .
docker push <DOCKER_REGISTRY>/mlperf-nvidia:single_stage_detector-pytorch
```
2. Launch the training
```
source config_DGXA100_256x08x001.sh
CONT="<DOCKER_REGISTRY>/mlperf-nvidia:single_stage_detector-pytorch" DATADIR=<path/to/data/dir> LOGDIR=<path/to/output/dir> sbatch -N $DGXNNODES -t $WALLTIME run.sub
```
## Steps to launch training
### NVIDIA DGX A100 (multi node)
Launch configuration and system-specific hyperparameters for the NVIDIA DGX A100
multi node submission are in the `config_DGXA100_008x08x004.sh` script.
Steps required to launch multi node training on NVIDIA DGX A100
1. Build the docker container and push to a docker registry
```
docker build --pull -t <DOCKER_REGISTRY>/mlperf-nvidia:single_stage_detector-pytorch .
docker push <DOCKER_REGISTRY>/mlperf-nvidia:single_stage_detector-pytorch
```
2. Launch the training
```
source config_DGXA100_008x08x004.sh
CONT="<DOCKER_REGISTRY>/mlperf-nvidia:single_stage_detector-pytorch" DATADIR=<path/to/data/dir> LOGDIR=<path/to/output/dir> sbatch -N $DGXNNODES -t $WALLTIME run.sub
```
## Steps to launch training
### NVIDIA DGX A100 (single node)
Launch configuration and system-specific hyperparameters for the NVIDIA DGX A100
single node submission are in the `config_DGXA100_001x08x032.sh` script.
Steps required to launch single node training on NVIDIA DGX A100
1. Build the docker container and push to a docker registry
```
docker build --pull -t <DOCKER_REGISTRY>/mlperf-nvidia:single_stage_detector-pytorch .
docker push <DOCKER_REGISTRY>/mlperf-nvidia:single_stage_detector-pytorch
```
2. Launch the training
```
source config_DGXA100_001x08x032.sh
CONT="<DOCKER_REGISTRY>/mlperf-nvidia:single_stage_detector-pytorch" DATADIR=<path/to/data/dir> LOGDIR=<path/to/output/dir> sbatch -N $DGXNNODES -t $WALLTIME run.sub
```
# Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from collections import OrderedDict
import multiprocessing as mp # TODO(ahmadki): pytorch mp ?
from concurrent.futures import ProcessPoolExecutor # TODO(ahmadki): pytorch futures ?
# from loky import get_reusable_executor
# from mpi4py.futures import MPIPoolExecutor
# A general class for async executing of functions
class AsyncExecutor(object):
def __init__(self, max_workers=1, mp_context="spawn"):
self.max_workers = max_workers
self.tasks = OrderedDict() # a dict of {tags: futures}
self.mp_context = mp.get_context(mp_context)
# mp.set_start_method(mp_context)
self.pool = ProcessPoolExecutor(max_workers=self.max_workers, mp_context=self.mp_context)
# self.pool = MPIPoolExecutor(max_workers, main=False)
# self.pool = get_reusable_executor(max_workers=max_workers, timeout=None)
def __del__(self):
self.cancel(tag=None)
self.pool.shutdown(wait=False)
####################
# Executor functions
####################
# submit given function and its arguments for async execution
def submit(self, tag, fn, *args, **kwargs):
self.tasks[tag] = self.pool.submit(fn, *args, **kwargs)
def shutdown(wait=True):
self.pool.shutdown(wait=True)
#############################
# functions on future objects
#############################
def cancel(self, tag=None):
if tag:
return self.tasks[tag].cancel()
else:
return {tag: self.tasks[tag].cancel() for tag in self.tasks.keys()}
def cancelled(self, tag=None):
if tag:
return self.tasks[tag].cancelled()
else:
return {tag: self.tasks[tag].cancelled() for tag in self.tasks.keys()}
def running(self, tag=None):
if tag:
return self.tasks[tag].running()
else:
return {tag: self.tasks[tag].running() for tag in self.tasks.keys()}
def done(self, tag=None):
if tag:
return self.tasks[tag].done()
else:
return {tag: self.tasks[tag].done() for tag in self.tasks.keys()}
def result(self, tag=None, timeout=None):
if tag:
return self.tasks[tag].result(timeout=timeout)
else:
return {tag: self.tasks[tag].result(timeout=timeout) for tag in self.tasks.keys()}
def exception(self, tag=None, timeout=None):
if tag:
return self.tasks[tag].exception(timeout=timeout)
else:
return {tag: self.tasks[tag].exception(timeout=timeout) for tag in self.tasks.keys()}
def add_done_callback(self, tag=None, fn=None):
if tag:
return self.tasks[tag].add_done_callback(fn=fn)
else:
return {tag: self.tasks[tag].add_done_callback(fn=fn) for tag in self.tasks.keys()}
######################
# Management functions
######################
# return result of a task and deletes it if successful
# if blocking is true, wait timeout for the task to complete
# if timeout is None, wait indefinitely
def dequeue_if_done(self, tag, blocking=False, timeout=None):
if self.done(tag=tag):
result = self.result(tag=tag)
del self.tasks[tag]
elif blocking:
result = self.result(tag=tag, timeout=timeout)
del self.tasks[tag]
else:
result = None
return result
# return the result of last (LIFO) task or first task (FIFO) and delete it if successful
# if blocking is true, wait timeout for the task to complete
# if timeout is None, wait indefinitely
def pop_if_done(self, last=True, blocking=False, timeout=None):
if len(self.tasks)==0:
return None
tag = next(iter(self.tasks.keys())) if last else next(reversed(self.tasks.keys()))
return {tag: self.dequeue_if_done(tag=tag, blocking=blocking, timeout=timeout)}
# pop the result of all done tasks
def pop_all_done(self):
if len(self.tasks)==0:
return None
done_tasks = {}
tags = list(self.tasks.keys()) # make a copy of tags because we might mutate self.tasks
for tag in tags:
result = self.dequeue_if_done(tag, blocking=False)
if result:
done_tasks[tag] = result
return done_tasks
# return list of tags
def tags(self):
return self.tasks.keys()
async_executor = AsyncExecutor(max_workers=1)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment