Commit 61d5313f authored by xinghao's avatar xinghao
Browse files

Initial commit

parents
Pipeline #1620 failed with stages
in 0 seconds
# compilation and distribution
__pycache__
*.pyc
*.so
ext/build/
ext/torch_extension.egg-info/
dist/
*.egg-info
# pytorch/python/numpy formats
*.pth
*.pkl
*.npy
# ipython/jupyter notebooks
*.ipynb
**/.ipynb_checkpoints/
# Editor temporaries
*.swn
*.swo
*.swp
*~
# Pycharm editor settings
.idea
.DS_Store
# Develop Guide
## Custom Dataset
Add your custom dataset is simple and flexible.
For example, create `ssd/data/datasets/my_dataset.py`:
```python
import torch.utils.data
from ssd.structures.container import Container
class MyDataset(torch.utils.data.Dataset):
def __init__(self, ..., transform=None, target_transform=None):
# as you would do normally
...
self.transform = transform
self.target_transform = target_transform
def __getitem__(self, index):
# load the image as a PIL Image
image = ...
# load the bounding boxes in x1, y1, x2, y2 order.
boxes = np.array((N, 4), dtype=np.float32)
# and labels
labels = np.array((N, ), dtype=np.int64)
if self.transform:
image, boxes, labels = self.transform(image, boxes, labels)
if self.target_transform:
boxes, labels = self.target_transform(boxes, labels)
targets = Container(
boxes=boxes,
labels=labels,
)
# return the image, the targets and the index in your dataset
return image, targets, index
```
in `ssd/data/datasets/__init__.py`
```python
from .my_dataset import MyDataset
_DATASETS = {
'VOCDataset': VOCDataset,
'COCODataset': COCODataset,
'MyDataset': MyDataset,
}
```
in `ssd/config/path_catlog.py`:
```python
DATASETS = {
...
'my_custom_dataset': {
"arg1": "your/arg",
"arg2": "your/arg",
},
...
}
@staticmethod
def get(name):
...
if name == 'my_custom_dataset':
attrs = DatasetCatalog.DATASETS[name]
return dict(factory="MyDataset", args=attrs)
...
```
in your `config.ymal`:
```yaml
DATASETS:
TRAIN: ("my_custom_dataset", )
TEST: ("my_custom_test_dataset", )
```
### Test
While the aforementioned example should work for training, it's also easy to add your custom test code:
in `ssd/data/datasets/evaluation/__init__.py`
```python
if isinstance(dataset, MyDataset):
return my_own_evaluation(**args)
```
## Custom Backbone
It very simple to add your own backbone for SSD.
For example, create `ssd/modeling/backbone/my_backbone.py`:
```python
import torch.nn as nn
from ssd.modeling import registry
from ssd.utils.model_zoo import load_state_dict_from_url
class MyBackbone(nn.Module):
def __init__(self, cfg):
super().__init__()
...
def forward(self, x):
features = []
# forward your network
# add arbitrary feature you want to do prediction upon it.
features.append(feature1)
features.append(feature2)
features.append(feature3)
features.append(feature4)
# return them as a tuple
return tuple(features)
@registry.BACKBONES.register('my_backbone')
def my_backbone(cfg, pretrained=True):
model = MyBackbone(cfg)
model_url = 'you_model_url'
if pretrained:
model.init_from_pretrain(load_state_dict_from_url(model_url))
return model
```
in `ssd/modeling/backbone/__init__.py`:
```python
from .my_backbone import MyBackbone
```
in your `config.ymal`:
```yaml
MODEL:
BACKBONE:
NAME: 'my_backbone'
OUT_CHANNELS: (-, -, -, -) # should match feature1 - feature4's out_channels in MyBackbone
PRIORS:
FEATURE_MAPS: [-, -, -, -] # feature1 - feature4's size
STRIDES: [-, -, -, -] # feature1 - feature4's output stride
MIN_SIZES: [21, 45, 99, 153] # your custom anchor settings
MAX_SIZES: [45, 99, 153, 207]
ASPECT_RATIOS: [[2, 3], [2, 3], [2, 3], [2, 3]]
BOXES_PER_LOCATION: [6, 6, 6, 6]
```
\ No newline at end of file
MIT License
Copyright (c) 2018 lufficc
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
\ No newline at end of file
recursive-include configs *.yaml
# SSD
## 环境配置
### Docker
推荐使用docker方式运行,提供拉取的docker镜像:
```bash
docker pull image.sourcefind.cn:5000/dcu/admin/base/pytorch:2.1.0-centos7.6-dtk24.04-py310
#-v挂载工作目录
docker run -it --shm-size 80g --network=host --name=SSD --privileged --device=/dev/kfd --device=/dev/dri --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined image.sourcefind.cn:5000/dcu/admin/base/pytorch:2.1.0-centos7.6-dtk24.04-py310 bin/bash
```
安装docker中没有的依赖:
```bash
cd SSD
pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
```
## 训练
### 数据集设置
#### Pascal VOC
对于Pascal VOC数据集,将文件夹结构设置为如下:
```
VOC_ROOT
|__ VOC2007
|_ JPEGImages
|_ Annotations
|_ ImageSets
|_ SegmentationClass
|__ VOC2012
|_ JPEGImages
|_ Annotations
|_ ImageSets
|_ SegmentationClass
|__ ...
```
在当前项目中,`VOC_ROOT`默认为`datas`文件夹,可以直接将数据集放到其中,或者在`SSD/ssd/config/path_catlog.py`中第5行`DATA_DIR = 'datasets'`修改数据集地址。
#### COCO
对于COCO数据集,将文件夹结构设置为如下:
```
COCO_ROOT
|__ annotations
|_ instances_valminusminival2014.json
|_ instances_minival2014.json
|_ instances_train2014.json
|_ instances_val2014.json
|_ ...
|__ train2014
|_ <im-1-name>.jpg
|_ ...
|_ <im-N-name>.jpg
|__ val2014
|_ <im-1-name>.jpg
|_ ...
|_ <im-N-name>.jpg
|__ ...
```
在当前项目中,`COCO_ROOT`默认为`datas`文件夹,可以直接将数据集放到其中,或者在`SSD/ssd/config/path_catlog.py`中第5行`DATA_DIR = 'datasets'`修改数据集地址。
### 单DCU训练
```bash
export LD_LIBRARY_PATH=/opt/hayhal/hydm/lib:/opt/hayhal/lib:/opt/dtk-24.04.1/.hyhal/hydm/lib:$LD_LIBRARY_PATH
python train.py --config-file configs/vgg_ssd300_voc0712.yaml
```
### 多DCU训练
```bash
export NGPUS=4
python -m torch.distributed.launch --nproc_per_node=$NGPUS train.py --config-file configs/vgg_ssd300_voc0712.yaml SOLVER.WARMUP_FACTOR 0.03333 SOLVER.WARMUP_ITERS 1000
```
## 评估
### 单DCU评估
```bash
python test.py --config-file configs/vgg_ssd300_voc0712.yaml
```
### 多DCU评估
```bash
export NGPUS=4
python -m torch.distributed.launch --nproc_per_node=$NGPUS test.py --config-file configs/vgg_ssd300_voc0712.yaml
```
## 模型仓库
### COCO:
| Backbone | Input Size | box AP | Model Size | Download |
| :------------: | :----------:| :--------------------------: | :--------: | :-------: |
| VGG16 | 300 | 25.2 | 262MB | [model](https://github.com/lufficc/SSD/releases/download/1.2/vgg_ssd300_coco_trainval35k.pth) |
| VGG16 | 512 | 29.0 | 275MB | [model](https://github.com/lufficc/SSD/releases/download/1.2/vgg_ssd512_coco_trainval35k.pth) |
### PASCAL VOC:
| Backbone | Input Size | mAP | Model Size | Download |
| :--------------: | :----------:| :--------------------------: | :--------: | :-------: |
| VGG16 | 300 | 77.7 | 201MB | [model](https://github.com/lufficc/SSD/releases/download/1.2/vgg_ssd300_voc0712.pth) |
| VGG16 | 512 | 80.7 | 207MB | [model](https://github.com/lufficc/SSD/releases/download/1.2/vgg_ssd512_voc0712.pth) |
| Mobilenet V2 | 320 | 68.9 | 25.5MB | [model](https://github.com/lufficc/SSD/releases/download/1.2/mobilenet_v2_ssd320_voc0712_v2.pth) |
| Mobilenet V3 | 320 | 69.5 | 29.9MB | [model](https://github.com/lufficc/SSD/releases/download/1.2/mobilenet_v3_ssd320_voc0712.pth) |
| EfficientNet-B3 | 300 | 73.9 | 97.1MB | [model](https://github.com/lufficc/SSD/releases/download/1.2/efficient_net_b3_ssd300_voc0712.pth) |
## 引用
如果你在研究中使用这个项目,请引用这个项目。
```text
of SSD in PyTorch}},
year = {2018},
howpublished = {\url{https://github.com/lufficc/SSD}}
}
```
# Troubleshooting
## RuntimeError: merge_sort: failed to synchronize: an illegal memory access was encountered
This is caused in multi-box loss. The sort method failed due to NaN numbers. This may be a bug in `log_softmax`: https://github.com/pytorch/pytorch/issues/14335 .Three ways to solve :
1. Use a smaller warmup factor, like 0.1. (append `SOLVER.WARMUP_FACTOR 0.1` to your train cmd's end).
1. Use a longer warmup iters, like 1000. (append `SOLVER.WARMUP_ITERS 1000` to your train cmd's end).
1. [Described in the forums by Jinserk Baik](https://discuss.pytorch.org/t/ctcloss-performance-of-pytorch-1-0-0/27524/29)
\ No newline at end of file
MODEL:
NUM_CLASSES: 21
BACKBONE:
NAME: 'efficient_net-b3'
OUT_CHANNELS: (48, 136, 384, 256, 256, 256)
INPUT:
IMAGE_SIZE: 300
DATASETS:
TRAIN: ("voc_2007_trainval", "voc_2012_trainval")
TEST: ("voc_2007_test", )
SOLVER:
MAX_ITER: 160000
LR_STEPS: [105000, 135000]
GAMMA: 0.1
BATCH_SIZE: 24
LR: 1e-3
OUTPUT_DIR: 'outputs/efficient_net_b3_ssd300_voc0712'
\ No newline at end of file
MODEL:
NUM_CLASSES: 21
BOX_HEAD:
PREDICTOR: 'SSDLiteBoxPredictor'
BACKBONE:
NAME: 'mobilenet_v2'
OUT_CHANNELS: (96, 1280, 512, 256, 256, 64)
PRIORS:
FEATURE_MAPS: [20, 10, 5, 3, 2, 1]
STRIDES: [16, 32, 64, 107, 160, 320]
MIN_SIZES: [60, 105, 150, 195, 240, 285]
MAX_SIZES: [105, 150, 195, 240, 285, 330]
ASPECT_RATIOS: [[2, 3], [2, 3], [2, 3], [2, 3], [2, 3], [2, 3]]
BOXES_PER_LOCATION: [6, 6, 6, 6, 6, 6]
INPUT:
IMAGE_SIZE: 320
DATASETS:
TRAIN: ("voc_2007_trainval", "voc_2012_trainval")
TEST: ("voc_2007_test", )
SOLVER:
MAX_ITER: 120000
LR_STEPS: [80000, 100000]
GAMMA: 0.1
BATCH_SIZE: 32
LR: 1e-3
OUTPUT_DIR: 'outputs/mobilenet_v2_ssd320_voc0712'
\ No newline at end of file
MODEL:
NUM_CLASSES: 21
BOX_HEAD:
PREDICTOR: 'SSDLiteBoxPredictor'
BACKBONE:
NAME: 'mobilenet_v3'
OUT_CHANNELS: (112, 960, 512, 256, 256, 64)
PRIORS:
FEATURE_MAPS: [20, 10, 5, 3, 2, 1]
STRIDES: [16, 32, 64, 107, 160, 320]
MIN_SIZES: [60, 105, 150, 195, 240, 285]
MAX_SIZES: [105, 150, 195, 240, 285, 330]
ASPECT_RATIOS: [[2, 3], [2, 3], [2, 3], [2, 3], [2, 3], [2, 3]]
BOXES_PER_LOCATION: [6, 6, 6, 6, 6, 6]
INPUT:
IMAGE_SIZE: 320
DATASETS:
TRAIN: ("voc_2007_trainval", "voc_2012_trainval")
TEST: ("voc_2007_test", )
SOLVER:
MAX_ITER: 120000
LR_STEPS: [80000, 100000]
GAMMA: 0.1
BATCH_SIZE: 32
LR: 1e-3
OUTPUT_DIR: 'outputs/mobilenet_v3_ssd320_voc0712'
MODEL:
NUM_CLASSES: 81
PRIORS:
FEATURE_MAPS: [38, 19, 10, 5, 3, 1]
STRIDES: [8, 16, 32, 64, 100, 300]
MIN_SIZES: [21, 45, 99, 153, 207, 261]
MAX_SIZES: [45, 99, 153, 207, 261, 315]
ASPECT_RATIOS: [[2], [2, 3], [2, 3], [2, 3], [2], [2]]
BOXES_PER_LOCATION: [4, 6, 6, 6, 4, 4]
INPUT:
IMAGE_SIZE: 300
DATASETS:
TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
TEST: ("coco_2014_minival", )
SOLVER:
MAX_ITER: 400000
LR_STEPS: [280000, 360000]
GAMMA: 0.1
BATCH_SIZE: 32
LR: 1e-3
OUTPUT_DIR: 'outputs/vgg_ssd300_coco_trainval35k'
\ No newline at end of file
MODEL:
NUM_CLASSES: 21
INPUT:
IMAGE_SIZE: 300
DATASETS:
TRAIN: ("voc_2007_trainval", "voc_2012_trainval")
TEST: ("voc_2007_test", )
SOLVER:
MAX_ITER: 120000
LR_STEPS: [80000, 100000]
GAMMA: 0.1
BATCH_SIZE: 32
LR: 1e-3
OUTPUT_DIR: 'outputs/vgg_ssd300_voc0712'
\ No newline at end of file
MODEL:
NUM_CLASSES: 81
BACKBONE:
OUT_CHANNELS: (512, 1024, 512, 256, 256, 256, 256)
PRIORS:
FEATURE_MAPS: [64, 32, 16, 8, 4, 2, 1]
STRIDES: [8, 16, 32, 64, 128, 256, 512]
MIN_SIZES: [20.48, 51.2, 133.12, 215.04, 296.96, 378.88, 460.8]
MAX_SIZES: [51.2, 133.12, 215.04, 296.96, 378.88, 460.8, 542.72]
ASPECT_RATIOS: [[2], [2, 3], [2, 3], [2, 3], [2, 3], [2], [2]]
BOXES_PER_LOCATION: [4, 6, 6, 6, 6, 4, 4]
INPUT:
IMAGE_SIZE: 512
DATASETS:
TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
TEST: ("coco_2014_minival", )
SOLVER:
MAX_ITER: 520000
LR_STEPS: [360000, 480000]
GAMMA: 0.1
BATCH_SIZE: 24
LR: 1e-3
OUTPUT_DIR: 'outputs/vgg_ssd512_coco_trainval35k'
\ No newline at end of file
MODEL:
NUM_CLASSES: 21
BACKBONE:
OUT_CHANNELS: (512, 1024, 512, 256, 256, 256, 256)
PRIORS:
FEATURE_MAPS: [64, 32, 16, 8, 4, 2, 1]
STRIDES: [8, 16, 32, 64, 128, 256, 512]
MIN_SIZES: [35.84, 76.8, 153.6, 230.4, 307.2, 384.0, 460.8]
MAX_SIZES: [76.8, 153.6, 230.4, 307.2, 384.0, 460.8, 537.65]
ASPECT_RATIOS: [[2], [2, 3], [2, 3], [2, 3], [2, 3], [2], [2]]
BOXES_PER_LOCATION: [4, 6, 6, 6, 6, 4, 4]
INPUT:
IMAGE_SIZE: 512
DATASETS:
TRAIN: ("voc_2007_trainval", "voc_2012_trainval")
TEST: ("voc_2007_test", )
SOLVER:
MAX_ITER: 120000
LR_STEPS: [80000, 100000]
GAMMA: 0.1
BATCH_SIZE: 24
LR: 1e-3
OUTPUT_DIR: 'outputs/vgg_ssd512_voc0712'
\ No newline at end of file
import glob
import os
import time
import torch
from PIL import Image
from vizer.draw import draw_boxes
from ssd.config import cfg
from ssd.data.datasets import COCODataset, VOCDataset
import argparse
import numpy as np
from ssd.data.transforms import build_transforms
from ssd.modeling.detector import build_detection_model
from ssd.utils import mkdir
from ssd.utils.checkpoint import CheckPointer
@torch.no_grad()
def run_demo(cfg, ckpt, score_threshold, images_dir, output_dir, dataset_type):
if dataset_type == "voc":
class_names = VOCDataset.class_names
elif dataset_type == 'coco':
class_names = COCODataset.class_names
else:
raise NotImplementedError('Not implemented now.')
device = torch.device(cfg.MODEL.DEVICE)
model = build_detection_model(cfg)
model = model.to(device)
checkpointer = CheckPointer(model, save_dir=cfg.OUTPUT_DIR)
checkpointer.load(ckpt, use_latest=ckpt is None)
weight_file = ckpt if ckpt else checkpointer.get_checkpoint_file()
print('Loaded weights from {}'.format(weight_file))
image_paths = glob.glob(os.path.join(images_dir, '*.jpg'))
mkdir(output_dir)
cpu_device = torch.device("cpu")
transforms = build_transforms(cfg, is_train=False)
model.eval()
for i, image_path in enumerate(image_paths):
start = time.time()
image_name = os.path.basename(image_path)
image = np.array(Image.open(image_path).convert("RGB"))
height, width = image.shape[:2]
images = transforms(image)[0].unsqueeze(0)
load_time = time.time() - start
start = time.time()
result = model(images.to(device))[0]
inference_time = time.time() - start
result = result.resize((width, height)).to(cpu_device).numpy()
boxes, labels, scores = result['boxes'], result['labels'], result['scores']
indices = scores > score_threshold
boxes = boxes[indices]
labels = labels[indices]
scores = scores[indices]
meters = ' | '.join(
[
'objects {:02d}'.format(len(boxes)),
'load {:03d}ms'.format(round(load_time * 1000)),
'inference {:03d}ms'.format(round(inference_time * 1000)),
'FPS {}'.format(round(1.0 / inference_time))
]
)
print('({:04d}/{:04d}) {}: {}'.format(i + 1, len(image_paths), image_name, meters))
drawn_image = draw_boxes(image, boxes, labels, scores, class_names).astype(np.uint8)
Image.fromarray(drawn_image).save(os.path.join(output_dir, image_name))
def main():
parser = argparse.ArgumentParser(description="SSD Demo.")
parser.add_argument(
"--config-file",
default="",
metavar="FILE",
help="path to config file",
type=str,
)
parser.add_argument("--ckpt", type=str, default=None, help="Trained weights.")
parser.add_argument("--score_threshold", type=float, default=0.7)
parser.add_argument("--images_dir", default='demo', type=str, help='Specify a image dir to do prediction.')
parser.add_argument("--output_dir", default='demo/result', type=str, help='Specify a image dir to save predicted images.')
parser.add_argument("--dataset_type", default="voc", type=str, help='Specify dataset type. Currently support voc and coco.')
parser.add_argument(
"opts",
help="Modify config options using the command-line",
default=None,
nargs=argparse.REMAINDER,
)
args = parser.parse_args()
print(args)
cfg.merge_from_file(args.config_file)
cfg.merge_from_list(args.opts)
cfg.freeze()
print("Loaded configuration file {}".format(args.config_file))
with open(args.config_file, "r") as cf:
config_str = "\n" + cf.read()
print(config_str)
print("Running with config:\n{}".format(cfg))
run_demo(cfg=cfg,
ckpt=args.ckpt,
score_threshold=args.score_threshold,
images_dir=args.images_dir,
output_dir=args.output_dir,
dataset_type=args.dataset_type)
if __name__ == '__main__':
main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment