"vscode:/vscode.git/clone" did not exist on "0732b9d2f0fb9a4dd9753bdabe3ddb7d452c49cf"
Unverified Commit ac289b35 authored by Tai-Wang's avatar Tai-Wang Committed by GitHub
Browse files

Bump version to v1.3.0

Bump version to v1.3.0
parents b0e8ece9 12b595ca
# Copyright (c) OpenMMLab. All rights reserved.
# Copyright (c) OpenMMLab. All rights reserved.
if '_base_':
from mmengine import read_base
with read_base():
from .._base_.schedules.cosine import *
from .._base_.default_runtime import *
......@@ -293,7 +295,7 @@ test_dataloader = dict(
box_type_3d='LiDAR',
backend_args=backend_args))
optim_wrapper.merge(
optim_wrapper.update(
dict(
optimizer=dict(weight_decay=0.01),
clip_grad=dict(max_norm=35, norm_type=2),
......
# Copyright (c) OpenMMLab. All rights reserved.
# Copyright (c) OpenMMLab. All rights reserved.
if '_base_':
from mmengine import read_base
with read_base():
from .._base_.datasets.kitti_mono3d import *
from .._base_.models.pgd import *
from .._base_.schedules.mmdet_schedule_1x import *
......@@ -19,7 +21,7 @@ from mmdet3d.models.losses.uncertain_smooth_l1_loss import \
from mmdet3d.models.task_modules.coders.pgd_bbox_coder import PGDBBoxCoder
# model settings
model.merge(
model.update(
dict(
data_preprocessor=dict(
type=Det3DDataPreprocessor,
......@@ -121,13 +123,13 @@ test_pipeline = [
dict(type=Pack3DDetInputs, keys=['img'])
]
train_dataloader.merge(
train_dataloader.update(
dict(batch_size=3, num_workers=3, dataset=dict(pipeline=train_pipeline)))
test_dataloader.merge(dict(dataset=dict(pipeline=test_pipeline)))
val_dataloader.merge(dict(dataset=dict(pipeline=test_pipeline)))
test_dataloader.update(dict(dataset=dict(pipeline=test_pipeline)))
val_dataloader.update(dict(dataset=dict(pipeline=test_pipeline)))
# optimizer
optim_wrapper.merge(
optim_wrapper.update(
dict(
optimizer=dict(lr=0.001),
paramwise_cfg=dict(bias_lr_mult=2., bias_decay_mult=0.),
......@@ -146,5 +148,5 @@ param_scheduler = [
gamma=0.1)
]
train_cfg.merge(dict(max_epochs=48, val_interval=2))
auto_scale_lr.merge(dict(base_batch_size=12))
train_cfg.update(dict(max_epochs=48, val_interval=2))
auto_scale_lr.update(dict(base_batch_size=12))
# Copyright (c) OpenMMLab. All rights reserved.
if '_base_':
from mmengine import read_base
with read_base():
from .._base_.datasets.scannet_3d import *
from .._base_.models.votenet import *
from .._base_.schedules.schedule_3x import *
......@@ -11,7 +13,7 @@ from mmdet3d.models.task_modules.coders.partial_bin_based_bbox_coder import \
PartialBinBasedBBoxCoder
# model settings
model.merge(
model.update(
dict(
bbox_head=dict(
num_classes=18,
......@@ -39,9 +41,9 @@ model.merge(
[1.1511526, 1.0546296, 0.49706793],
[0.47535285, 0.49249494, 0.5802117]]))))
default_hooks.merge(dict(logger=dict(type=LoggerHook, interval=30)))
default_hooks.update(dict(logger=dict(type=LoggerHook, interval=30)))
# Default setting for scaling LR automatically
# - `enable` means enable scaling LR automatically
# or not by default.
# - `base_batch_size` = (8 GPUs) x (8 samples per GPU).
auto_scale_lr.merge(dict(enable=False, base_batch_size=64))
auto_scale_lr.update(dict(enable=False, base_batch_size=64))
......@@ -1153,7 +1153,6 @@ class MonoDet3DInferencerLoader(BaseTransform):
Added keys:
- img
- cam2img
- box_type_3d
- box_mode_3d
......@@ -1176,32 +1175,19 @@ class MonoDet3DInferencerLoader(BaseTransform):
dict: The dict contains loaded image and meta information.
"""
box_type_3d, box_mode_3d = get_box_type('camera')
assert 'calib' in single_input and 'img' in single_input, \
"key 'calib' and 'img' must be in input dict"
if isinstance(single_input['calib'], str):
calib_path = single_input['calib']
with open(calib_path, 'r') as f:
lines = f.readlines()
cam2img = np.array([
float(info) for info in lines[0].split(' ')[0:16]
]).reshape([4, 4])
elif isinstance(single_input['calib'], np.ndarray):
cam2img = single_input['calib']
else:
raise ValueError('Unsupported input calib type: '
f"{type(single_input['calib'])}")
if isinstance(single_input['img'], str):
inputs = dict(
images=dict(
CAM_FRONT=dict(
img_path=single_input['img'], cam2img=cam2img)),
img_path=single_input['img'],
cam2img=single_input['cam2img'])),
box_mode_3d=box_mode_3d,
box_type_3d=box_type_3d)
elif isinstance(single_input['img'], np.ndarray):
inputs = dict(
img=single_input['img'],
cam2img=cam2img,
cam2img=single_input['cam2img'],
box_type_3d=box_type_3d,
box_mode_3d=box_mode_3d)
else:
......@@ -1252,9 +1238,9 @@ class MultiModalityDet3DInferencerLoader(BaseTransform):
dict: The dict contains loaded image, point cloud and meta
information.
"""
assert 'points' in single_input and 'img' in single_input and \
'calib' in single_input, "key 'points', 'img' and 'calib' must be "
f'in input dict, but got {single_input}'
assert 'points' in single_input and 'img' in single_input, \
"key 'points', 'img' and must be in input dict," \
f'but got {single_input}'
if isinstance(single_input['points'], str):
inputs = dict(
lidar_points=dict(lidar_path=single_input['points']),
......@@ -1283,36 +1269,21 @@ class MultiModalityDet3DInferencerLoader(BaseTransform):
multi_modality_inputs = points_inputs
box_type_3d, box_mode_3d = get_box_type('lidar')
if isinstance(single_input['calib'], str):
calib = mmengine.load(single_input['calib'])
elif isinstance(single_input['calib'], dict):
calib = single_input['calib']
else:
raise ValueError('Unsupported input calib type: '
f"{type(single_input['calib'])}")
cam2img = np.asarray(calib['cam2img'], dtype=np.float32)
lidar2cam = np.asarray(calib['lidar2cam'], dtype=np.float32)
if 'lidar2cam' in calib:
lidar2img = np.asarray(calib['lidar2img'], dtype=np.float32)
else:
lidar2img = cam2img @ lidar2cam
if isinstance(single_input['img'], str):
inputs = dict(
img_path=single_input['img'],
cam2img=cam2img,
lidar2img=lidar2img,
lidar2cam=lidar2cam,
cam2img=single_input['cam2img'],
lidar2img=single_input['lidar2img'],
lidar2cam=single_input['lidar2cam'],
box_mode_3d=box_mode_3d,
box_type_3d=box_type_3d)
elif isinstance(single_input['img'], np.ndarray):
inputs = dict(
img=single_input['img'],
cam2img=cam2img,
lidar2img=lidar2img,
lidar2cam=lidar2cam,
cam2img=single_input['cam2img'],
lidar2img=single_input['lidar2img'],
lidar2cam=single_input['lidar2cam'],
box_type_3d=box_type_3d,
box_mode_3d=box_mode_3d)
else:
......
......@@ -2604,26 +2604,29 @@ class LaserMix(BaseTransform):
points = input_dict['points']
pts_semantic_mask = input_dict['pts_semantic_mask']
# convert angle to radian
pitch_angle_down = self.pitch_angles[0] / 180 * np.pi
pitch_angle_up = self.pitch_angles[1] / 180 * np.pi
rho = torch.sqrt(points.coord[:, 0]**2 + points.coord[:, 1]**2)
pitch = torch.atan2(points.coord[:, 2], rho)
pitch = torch.clamp(pitch, self.pitch_angles[0] + 1e-5,
self.pitch_angles[1] - 1e-5)
pitch = torch.clamp(pitch, pitch_angle_down + 1e-5,
pitch_angle_up - 1e-5)
mix_rho = torch.sqrt(mix_points.coord[:, 0]**2 +
mix_points.coord[:, 1]**2)
mix_pitch = torch.atan2(mix_points.coord[:, 2], mix_rho)
mix_pitch = torch.clamp(mix_pitch, self.pitch_angles[0] + 1e-5,
self.pitch_angles[1] - 1e-5)
mix_pitch = torch.clamp(mix_pitch, pitch_angle_down + 1e-5,
pitch_angle_up - 1e-5)
num_areas = np.random.choice(self.num_areas, size=1)[0]
angle_list = np.linspace(self.pitch_angles[1], self.pitch_angles[0],
angle_list = np.linspace(pitch_angle_up, pitch_angle_down,
num_areas + 1)
out_points = []
out_pts_semantic_mask = []
for i in range(num_areas):
# convert angle to radian
start_angle = angle_list[i + 1] / 180 * np.pi
end_angle = angle_list[i] / 180 * np.pi
start_angle = angle_list[i + 1]
end_angle = angle_list[i]
if i % 2 == 0: # pick from original point cloud
idx = (pitch > start_angle) & (pitch <= end_angle)
out_points.append(points[idx])
......
# Copyright (c) OpenMMLab. All rights reserved.
import copy
import math
import os
import sys
import time
from typing import List, Optional, Sequence, Tuple, Union
......@@ -155,7 +156,7 @@ class Det3DLocalVisualizer(DetLocalVisualizer):
if hasattr(self, 'pcd'):
del self.pcd
def _initialize_o3d_vis(self) -> Visualizer:
def _initialize_o3d_vis(self, show=True) -> Visualizer:
"""Initialize open3d vis according to frame_cfg.
Args:
......@@ -176,8 +177,9 @@ class Det3DLocalVisualizer(DetLocalVisualizer):
o3d_vis.register_key_action_callback(glfw_key_space,
self.space_action_callback)
o3d_vis.register_key_callback(glfw_key_right, self.right_callback)
o3d_vis.create_window()
self.view_control = o3d_vis.get_view_control()
if os.environ.get('DISPLAY', None) is not None and show:
o3d_vis.create_window()
self.view_control = o3d_vis.get_view_control()
return o3d_vis
@master_only
......@@ -859,6 +861,9 @@ class Det3DLocalVisualizer(DetLocalVisualizer):
self.view_port)
self.flag_exit = not self.o3d_vis.poll_events()
self.o3d_vis.update_renderer()
# if not hasattr(self, 'view_control'):
# self.o3d_vis.create_window()
# self.view_control = self.o3d_vis.get_view_control()
self.view_port = \
self.view_control.convert_to_pinhole_camera_parameters() # noqa: E501
if wait_time != -1:
......@@ -976,7 +981,7 @@ class Det3DLocalVisualizer(DetLocalVisualizer):
# For object detection datasets, no palette is saved
palette = self.dataset_meta.get('palette', None)
ignore_index = self.dataset_meta.get('ignore_index', None)
if ignore_index is not None and 'gt_pts_seg' in data_sample and vis_task == 'lidar_seg': # noqa: E501
if vis_task == 'lidar_seg' and ignore_index is not None and 'pts_semantic_mask' in data_sample.gt_pts_seg: # noqa: E501
keep_index = data_sample.gt_pts_seg.pts_semantic_mask != ignore_index # noqa: E501
else:
keep_index = None
......@@ -986,6 +991,12 @@ class Det3DLocalVisualizer(DetLocalVisualizer):
gt_img_data = None
pred_img_data = None
if not hasattr(self, 'o3d_vis') and vis_task in [
'multi-view_det', 'lidar_det', 'lidar_seg',
'multi-modality_det'
]:
self.o3d_vis = self._initialize_o3d_vis(show=show)
if draw_gt and data_sample is not None:
if 'gt_instances_3d' in data_sample:
gt_data_3d = self._draw_instances_3d(
......@@ -1083,6 +1094,7 @@ class Det3DLocalVisualizer(DetLocalVisualizer):
if drawn_img_3d is not None:
mmcv.imwrite(drawn_img_3d[..., ::-1], out_file)
if drawn_img is not None:
mmcv.imwrite(drawn_img[..., ::-1], out_file)
mmcv.imwrite(drawn_img[..., ::-1],
out_file[:-4] + '_2d' + out_file[-4:])
else:
self.add_image(name, drawn_img_3d, step)
......@@ -34,7 +34,7 @@ python projects/BEVFusion/setup.py develop
Run a demo on NuScenes data using [BEVFusion model](https://drive.google.com/file/d/1QkvbYDk4G2d6SZoeJqish13qSyXA4lp3/view?usp=share_link):
```shell
python demo/multi_modality_demo.py demo/data/nuscenes/n015-2018-07-24-11-22-45+0800__LIDAR_TOP__1532402927647951.pcd.bin demo/data/nuscenes/ demo/data/nuscenes/n015-2018-07-24-11-22-45+0800.pkl projects/BEVFusion/configs/bevfusion_lidar-cam_voxel0075_second_secfpn_8xb4-cyclic-20e_nus-3d.py ${CHECKPOINT_FILE} --cam-type all --score-thr 0.2 --show
python projects/BEVFusion/demo/multi_modality_demo.py demo/data/nuscenes/n015-2018-07-24-11-22-45+0800__LIDAR_TOP__1532402927647951.pcd.bin demo/data/nuscenes/ demo/data/nuscenes/n015-2018-07-24-11-22-45+0800.pkl projects/BEVFusion/configs/bevfusion_lidar-cam_voxel0075_second_secfpn_8xb4-cyclic-20e_nus-3d.py ${CHECKPOINT_FILE} --cam-type all --score-thr 0.2 --show
```
### Training commands
......
# Copyright (c) OpenMMLab. All rights reserved.
from argparse import ArgumentParser
import mmcv
from mmdet3d.apis import inference_multi_modality_detector, init_model
from mmdet3d.registry import VISUALIZERS
def parse_args():
parser = ArgumentParser()
parser.add_argument('pcd', help='Point cloud file')
parser.add_argument('img', help='image file')
parser.add_argument('ann', help='ann file')
parser.add_argument('config', help='Config file')
parser.add_argument('checkpoint', help='Checkpoint file')
parser.add_argument(
'--device', default='cuda:0', help='Device used for inference')
parser.add_argument(
'--cam-type',
type=str,
default='CAM_FRONT',
help='choose camera type to inference')
parser.add_argument(
'--score-thr', type=float, default=0.0, help='bbox score threshold')
parser.add_argument(
'--out-dir', type=str, default='demo', help='dir to save results')
parser.add_argument(
'--show',
action='store_true',
help='show online visualization results')
parser.add_argument(
'--snapshot',
action='store_true',
help='whether to save online visualization results')
args = parser.parse_args()
return args
def main(args):
# build the model from a config file and a checkpoint file
model = init_model(args.config, args.checkpoint, device=args.device)
# init visualizer
visualizer = VISUALIZERS.build(model.cfg.visualizer)
visualizer.dataset_meta = model.dataset_meta
# test a single image and point cloud sample
result, data = inference_multi_modality_detector(model, args.pcd, args.img,
args.ann, args.cam_type)
points = data['inputs']['points']
if isinstance(result.img_path, list):
img = []
for img_path in result.img_path:
single_img = mmcv.imread(img_path)
single_img = mmcv.imconvert(single_img, 'bgr', 'rgb')
img.append(single_img)
else:
img = mmcv.imread(result.img_path)
img = mmcv.imconvert(img, 'bgr', 'rgb')
data_input = dict(points=points, img=img)
# show the results
visualizer.add_datasample(
'result',
data_input,
data_sample=result,
draw_gt=False,
show=args.show,
wait_time=-1,
out_file=args.out_dir,
pred_score_thr=args.score_thr,
vis_task='multi-modality_det')
if __name__ == '__main__':
args = parse_args()
main(args)
# CENet: Toward Concise and Efficient LiDAR Semantic Segmentation for Autonomous Driving
> [CENet: Toward Concise and Efficient LiDAR Semantic Segmentation for Autonomous Driving](https://arxiv.org/abs/2207.12691)
<!-- [ALGORITHM] -->
## Abstract
Accurate and fast scene understanding is one of the challenging task for autonomous driving, which requires to take full advantage of LiDAR point clouds for semantic segmentation. In this paper, we present a concise and efficient image-based semantic segmentation network, named CENet. In order to improve the descriptive power of learned features and reduce the computational as well as time complexity, our CENet integrates the convolution with larger kernel size instead of MLP, carefully-selected activation functions, and multiple auxiliary segmentation heads with corresponding loss functions into architecture. Quantitative and qualitative experiments conducted on publicly available benchmarks, SemanticKITTI and SemanticPOSS, demonstrate that our pipeline achieves much better mIoU and inference performance compared with state-of-the-art models. The code will be available at https://github.com/huixiancheng/CENet.
<div align=center>
<img src="https://github.com/open-mmlab/mmdetection3d/assets/55445986/2c268392-0e0c-4e93-bb9d-dc3417c56dad" width="800"/>
</div>
## Introduction
We implement CENet and provide the results and pretrained checkpoints on SemanticKITTI dataset.
## Usage
<!-- For a typical model, this section should contain the commands for training and testing. You are also suggested to dump your environment specification to env.yml by `conda env export > env.yml`. -->
### Training commands
In MMDetection3D's root directory, run the following command to train the model:
```bash
python tools/train.py projects/CENet/configs/cenet-64x512_4xb4_semantickitti.py
```
For multi-gpu training, run:
```bash
python -m torch.distributed.launch --nnodes=1 --node_rank=0 --nproc_per_node=${NUM_GPUS} --master_port=29506 --master_addr="127.0.0.1" tools/train.py projects/CENet/configs/cenet-64x512_4xb4_semantickitti.py
```
### Testing commands
In MMDetection3D's root directory, run the following command to test the model:
```bash
python tools/test.py projects/CENet/configs/cenet-64x512_4xb4_semantickitti.py ${CHECKPOINT_PATH}
```
## Results and models
### NuScenes
| Backbone | Input resolution | Mem (GB) | Inf time (fps) | mIoU | Download |
| :----------------------------------------------------: | :--------------: | :------: | :------------: | :---: | :----------------------: |
| [CENet](./configs/cenet-64x512_4xb4_semantickitti.py) | 64\*512 | | 41.7 | 61.10 | [model](<>) \| [log](<>) |
| [CENet](./configs/cenet-64x1024_4xb4_semantickitti.py) | 64\*1024 | | 26.8 | 62.20 | [model](<>) \| [log](<>) |
| [CENet](./configs/cenet-64x2048_4xb4_semantickitti.py) | 64\*2048 | | 14.1 | 62.64 | [model](<>) \| [log](<>) |
**Note**
- We report point-based mIoU instead of range-view based mIoU
- The mIoU is the best results during inference after each epoch training, which is consistent with official code
- If your setting is different with our settings, we strongly suggest to enable `auto_scale_lr` to achieve comparable results.
## Citation
```latex
@inproceedings{cheng2022cenet,
title={Cenet: Toward Concise and Efficient Lidar Semantic Segmentation for Autonomous Driving},
author={Cheng, Hui--Xian and Han, Xian--Feng and Xiao, Guo--Qiang},
booktitle={2022 IEEE International Conference on Multimedia and Expo (ICME)},
pages={01--06},
year={2022},
organization={IEEE}
}
```
## Checklist
<!-- Here is a checklist illustrating a usual development workflow of a successful project, and also serves as an overview of this project's progress. The PIC (person in charge) or contributors of this project should check all the items that they believe have been finished, which will further be verified by codebase maintainers via a PR.
OpenMMLab's maintainer will review the code to ensure the project's quality. Reaching the first milestone means that this project suffices the minimum requirement of being merged into 'projects/'. But this project is only eligible to become a part of the core package upon attaining the last milestone.
Note that keeping this section up-to-date is crucial not only for this project's developers but the entire community, since there might be some other contributors joining this project and deciding their starting point from this list. It also helps maintainers accurately estimate time and effort on further code polishing, if needed.
A project does not necessarily have to be finished in a single PR, but it's essential for the project to at least reach the first milestone in its very first PR. -->
- [x] Milestone 1: PR-ready, and acceptable to be one of the `projects/`.
- [x] Finish the code
<!-- The code's design shall follow existing interfaces and convention. For example, each model component should be registered into `mmdet3d.registry.MODELS` and configurable via a config file. -->
- [x] Basic docstrings & proper citation
<!-- Each major object should contain a docstring, describing its functionality and arguments. If you have adapted the code from other open-source projects, don't forget to cite the source project in docstring and make sure your behavior is not against its license. Typically, we do not accept any code snippet under GPL license. [A Short Guide to Open Source Licenses](https://medium.com/nationwide-technology/a-short-guide-to-open-source-licenses-cf5b1c329edd) -->
- [x] Test-time correctness
<!-- If you are reproducing the result from a paper, make sure your model's inference-time performance matches that in the original paper. The weights usually could be obtained by simply renaming the keys in the official pre-trained weights. This test could be skipped though, if you are able to prove the training-time correctness and check the second milestone. -->
- [x] A full README
<!-- As this template does. -->
- [x] Milestone 2: Indicates a successful model implementation.
- [x] Training-time correctness
<!-- If you are reproducing the result from a paper, checking this item means that you should have trained your model from scratch based on the original paper's specification and verified that the final result matches the report within a minor error range. -->
- [ ] Milestone 3: Good to be a part of our core package!
- [ ] Type hints and docstrings
<!-- Ideally *all* the methods should have [type hints](https://www.pythontutorial.net/python-basics/python-type-hints/) and [docstrings](https://google.github.io/styleguide/pyguide.html#381-docstrings). [Example](https://github.com/open-mmlab/mmdetection3d/blob/dev-1.x/mmdet3d/models/detectors/fcos_mono3d.py) -->
- [ ] Unit tests
<!-- Unit tests for each module are required. [Example](https://github.com/open-mmlab/mmdetection3d/blob/dev-1.x/tests/test_models/test_dense_heads/test_fcos_mono3d_head.py) -->
- [ ] Code polishing
<!-- Refactor your code according to reviewer's comment. -->
- [ ] Metafile.yml
<!-- It will be parsed by MIM and Inferencer. [Example](https://github.com/open-mmlab/mmdetection3d/blob/dev-1.x/configs/fcos3d/metafile.yml) -->
- [ ] Move your modules into the core package following the codebase's file hierarchy structure.
<!-- In particular, you may have to refactor this README into a standard one. [Example](/configs/textdet/dbnet/README.md) -->
- [ ] Refactor your modules into the core package following the codebase's file hierarchy structure.
# Copyright (c) OpenMMLab. All rights reserved.
from .boundary_loss import BoundaryLoss
from .cenet_backbone import CENet
from .range_image_head import RangeImageHead
from .range_image_segmentor import RangeImageSegmentor
from .transforms_3d import SemkittiRangeView
__all__ = [
'CENet', 'RangeImageHead', 'RangeImageSegmentor', 'SemkittiRangeView',
'BoundaryLoss'
]
# Copyright (c) OpenMMLab. All rights reserved.
import torch
from torch import Tensor, nn
from torch.nn import functional as F
from mmdet3d.registry import MODELS
def one_hot(label: Tensor,
n_classes: int,
requires_grad: bool = True) -> Tensor:
"""Return One Hot Label."""
device = label.device
one_hot_label = torch.eye(
n_classes, device=device, requires_grad=requires_grad)[label]
one_hot_label = one_hot_label.transpose(1, 3).transpose(2, 3)
return one_hot_label
@MODELS.register_module()
class BoundaryLoss(nn.Module):
"""Boundary loss."""
def __init__(self, theta0=3, theta=5, loss_weight: float = 1.0) -> None:
super(BoundaryLoss, self).__init__()
self.theta0 = theta0
self.theta = theta
self.loss_weight = loss_weight
def forward(self, pred: Tensor, gt: Tensor) -> Tensor:
"""Forward function.
Args:
pred (Tensor): The output from model.
gt (Tensor): Ground truth map.
Returns:
Tensor: Loss tensor.
"""
pred = F.softmax(pred, dim=1)
n, c, _, _ = pred.shape
# one-hot vector of ground truth
one_hot_gt = one_hot(gt, c)
# boundary map
gt_b = F.max_pool2d(
1 - one_hot_gt,
kernel_size=self.theta0,
stride=1,
padding=(self.theta0 - 1) // 2)
gt_b -= 1 - one_hot_gt
pred_b = F.max_pool2d(
1 - pred,
kernel_size=self.theta0,
stride=1,
padding=(self.theta0 - 1) // 2)
pred_b -= 1 - pred
gt_b = gt_b.view(n, c, -1)
pred_b = pred_b.view(n, c, -1)
# Precision, Recall
P = torch.sum(pred_b * gt_b, dim=2) / (torch.sum(pred_b, dim=2) + 1e-7)
R = torch.sum(pred_b * gt_b, dim=2) / (torch.sum(gt_b, dim=2) + 1e-7)
# Boundary F1 Score
BF1 = 2 * P * R / (P + R + 1e-7)
# summing BF1 Score for each class and average over mini-batch
loss = torch.mean(1 - BF1)
return self.loss_weight * loss
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Optional, Sequence, Tuple
import torch
from mmcv.cnn import (ConvModule, build_activation_layer, build_conv_layer,
build_norm_layer)
from mmengine.model import BaseModule
from torch import Tensor, nn
from torch.nn import functional as F
from mmdet3d.registry import MODELS
from mmdet3d.utils import ConfigType, OptConfigType, OptMultiConfig
class BasicBlock(BaseModule):
def __init__(self,
inplanes: int,
planes: int,
stride: int = 1,
dilation: int = 1,
downsample: Optional[nn.Module] = None,
conv_cfg: OptConfigType = None,
norm_cfg: ConfigType = dict(type='BN'),
act_cfg: ConfigType = dict(type='LeakyReLU'),
init_cfg: OptMultiConfig = None) -> None:
super(BasicBlock, self).__init__(init_cfg)
self.norm1_name, norm1 = build_norm_layer(norm_cfg, planes, postfix=1)
self.norm2_name, norm2 = build_norm_layer(norm_cfg, planes, postfix=2)
self.conv1 = build_conv_layer(
conv_cfg,
inplanes,
planes,
3,
stride=stride,
padding=dilation,
dilation=dilation,
bias=False)
self.add_module(self.norm1_name, norm1)
self.conv2 = build_conv_layer(
conv_cfg, planes, planes, 3, padding=1, bias=False)
self.add_module(self.norm2_name, norm2)
self.relu = build_activation_layer(act_cfg)
self.downsample = downsample
@property
def norm1(self) -> nn.Module:
"""nn.Module: normalization layer after the first convolution layer."""
return getattr(self, self.norm1_name)
@property
def norm2(self) -> nn.Module:
"""nn.Module: normalization layer after the second convolution layer.
"""
return getattr(self, self.norm2_name)
def forward(self, x: Tensor) -> Tensor:
identity = x
out = self.conv1(x)
out = self.norm1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.norm2(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity
out = self.relu(out)
return out
@MODELS.register_module()
class CENet(BaseModule):
def __init__(self,
in_channels: int = 5,
stem_channels: int = 128,
num_stages: int = 4,
stage_blocks: Sequence[int] = (3, 4, 6, 3),
out_channels: Sequence[int] = (128, 128, 128, 128),
strides: Sequence[int] = (1, 2, 2, 2),
dilations: Sequence[int] = (1, 1, 1, 1),
fuse_channels: Sequence[int] = (256, 128),
conv_cfg: OptConfigType = None,
norm_cfg: ConfigType = dict(type='BN'),
act_cfg: ConfigType = dict(type='LeakyReLU'),
init_cfg=None) -> None:
super(CENet, self).__init__(init_cfg)
assert len(stage_blocks) == len(out_channels) == len(strides) == len(
dilations) == num_stages, \
'The length of stage_blocks, out_channels, strides and ' \
'dilations should be equal to num_stages'
self.conv_cfg = conv_cfg
self.norm_cfg = norm_cfg
self.act_cfg = act_cfg
self._make_stem_layer(in_channels, stem_channels)
inplanes = stem_channels
self.res_layers = []
for i, num_blocks in enumerate(stage_blocks):
stride = strides[i]
dilation = dilations[i]
planes = out_channels[i]
res_layer = self.make_res_layer(
inplanes=inplanes,
planes=planes,
num_blocks=num_blocks,
stride=stride,
dilation=dilation,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg,
act_cfg=act_cfg)
inplanes = planes
layer_name = f'layer{i + 1}'
self.add_module(layer_name, res_layer)
self.res_layers.append(layer_name)
in_channels = stem_channels + sum(out_channels)
self.fuse_layers = []
for i, fuse_channel in enumerate(fuse_channels):
fuse_layer = ConvModule(
in_channels,
fuse_channel,
kernel_size=3,
padding=1,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg,
act_cfg=act_cfg)
in_channels = fuse_channel
layer_name = f'fuse_layer{i + 1}'
self.add_module(layer_name, fuse_layer)
self.fuse_layers.append(layer_name)
def _make_stem_layer(self, in_channels: int, out_channels: int) -> None:
self.stem = nn.Sequential(
build_conv_layer(
self.conv_cfg,
in_channels,
out_channels // 2,
kernel_size=3,
padding=1,
bias=False),
build_norm_layer(self.norm_cfg, out_channels // 2)[1],
build_activation_layer(self.act_cfg),
build_conv_layer(
self.conv_cfg,
out_channels // 2,
out_channels,
kernel_size=3,
padding=1,
bias=False),
build_norm_layer(self.norm_cfg, out_channels)[1],
build_activation_layer(self.act_cfg),
build_conv_layer(
self.conv_cfg,
out_channels,
out_channels,
kernel_size=3,
padding=1,
bias=False),
build_norm_layer(self.norm_cfg, out_channels)[1],
build_activation_layer(self.act_cfg))
def make_res_layer(
self,
inplanes: int,
planes: int,
num_blocks: int,
stride: int,
dilation: int,
conv_cfg: OptConfigType = None,
norm_cfg: ConfigType = dict(type='BN'),
act_cfg: ConfigType = dict(type='LeakyReLU')
) -> nn.Sequential:
downsample = None
if stride != 1 or inplanes != planes:
downsample = nn.Sequential(
build_conv_layer(
conv_cfg,
inplanes,
planes,
kernel_size=1,
stride=stride,
bias=False),
build_norm_layer(norm_cfg, planes)[1])
layers = []
layers.append(
BasicBlock(
inplanes=inplanes,
planes=planes,
stride=stride,
dilation=dilation,
downsample=downsample,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg,
act_cfg=act_cfg))
inplanes = planes
for _ in range(1, num_blocks):
layers.append(
BasicBlock(
inplanes=inplanes,
planes=planes,
stride=1,
dilation=dilation,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg,
act_cfg=act_cfg))
return nn.Sequential(*layers)
def forward(self, x: Tensor) -> Tuple[Tensor]:
x = self.stem(x)
outs = [x]
for layer_name in self.res_layers:
res_layer = getattr(self, layer_name)
x = res_layer(x)
outs.append(x)
# TODO: move the following operation into neck.
for i in range(len(outs)):
if outs[i].shape != outs[0].shape:
outs[i] = F.interpolate(
outs[i],
size=outs[0].size()[2:],
mode='bilinear',
align_corners=True)
outs[0] = torch.cat(outs, dim=1)
for layer_name in self.fuse_layers:
fuse_layer = getattr(self, layer_name)
outs[0] = fuse_layer(outs[0])
return tuple(outs)
# Copyright (c) OpenMMLab. All rights reserved.
from typing import List, Tuple
import torch
from torch import Tensor, nn
from mmdet3d.models import Base3DDecodeHead
from mmdet3d.registry import MODELS
from mmdet3d.structures.det3d_data_sample import SampleList
from mmdet3d.utils import ConfigType, OptConfigType
@MODELS.register_module()
class RangeImageHead(Base3DDecodeHead):
"""RangeImage decoder head.
Args:
loss_ce (dict or :obj:`ConfigDict`): Config of CrossEntropy loss.
Defaults to dict(
type='mmdet.CrossEntropyLoss',
use_sigmoid=False,
class_weight=None,
loss_weight=1.0).
loss_lovasz (dict or :obj:`ConfigDict`, optional): Config of Lovasz
loss. Defaults to None.
lpss_boundary (dict or :obj:`ConfigDict`, optional): Config of boundary
loss. Defaults to None.
indices (int): The indice of features to use. Defaults to 0.
"""
def __init__(self,
loss_ce: ConfigType = dict(
type='mmdet.CrossEntropyLoss',
use_sigmoid=False,
class_weight=None,
loss_weight=1.0),
loss_lovasz: OptConfigType = None,
loss_boundary: OptConfigType = None,
indices: int = 0,
**kwargs) -> None:
super(RangeImageHead, self).__init__(**kwargs)
self.loss_ce = MODELS.build(loss_ce)
if loss_lovasz is not None:
self.loss_lovasz = MODELS.build(loss_lovasz)
else:
self.loss_lovasz = None
if loss_boundary is not None:
self.loss_boundary = MODELS.build(loss_boundary)
else:
self.loss_boundary = None
self.indices = indices
def build_conv_seg(self, channels: int, num_classes: int,
kernel_size: int) -> nn.Module:
return nn.Conv2d(channels, num_classes, kernel_size=kernel_size)
def forward(self, feats: Tuple[Tensor]) -> Tensor:
"""Forward function."""
seg_logit = self.cls_seg(feats[self.indices])
return seg_logit
def _stack_batch_gt(self, batch_data_samples: SampleList) -> Tensor:
gt_semantic_segs = [
data_sample.gt_pts_seg.semantic_seg
for data_sample in batch_data_samples
]
return torch.stack(gt_semantic_segs, dim=0)
def loss_by_feat(self, seg_logit: Tensor,
batch_data_samples: SampleList) -> dict:
"""Compute semantic segmentation loss.
Args:
seg_logit (Tensor): Predicted logits.
batch_data_samples (List[:obj:`Det3DDataSample`]): The seg
data samples. It usually includes information such
as `metainfo` and `gt_pts_seg`.
Returns:
Dict[str, Tensor]: A dictionary of loss components.
"""
seg_label = self._stack_batch_gt(batch_data_samples)
seg_label = seg_label.squeeze(dim=1)
loss = dict()
loss['loss_ce'] = self.loss_ce(
seg_logit, seg_label, ignore_index=self.ignore_index)
if self.loss_lovasz:
loss['loss_lovasz'] = self.loss_lovasz(
seg_logit, seg_label, ignore_index=self.ignore_index)
if self.loss_boundary:
loss['loss_boundary'] = self.loss_boundary(seg_logit, seg_label)
return loss
def predict(self, inputs: Tuple[Tensor], batch_input_metas: List[dict],
test_cfg: ConfigType) -> torch.Tensor:
"""Forward function for testing.
Args:
inputs (Tuple[Tensor]): Features from backbone.
batch_input_metas (List[:obj:`Det3DDataSample`]): The det3d data
samples. It usually includes information such as `metainfo` and
`gt_pts_seg`. We use `point2voxel_map` in this function.
test_cfg (dict or :obj:`ConfigDict`): The testing config.
Returns:
List[Tensor]: List of point-wise segmentation labels.
"""
seg_logits = self.forward(inputs)
seg_labels = seg_logits.argmax(dim=1)
device = seg_logits.device
use_knn = test_cfg.get('use_knn', False)
if use_knn:
from .utils import KNN
post_module = KNN(
test_cfg=test_cfg,
num_classes=self.num_classes,
ignore_index=self.ignore_index)
seg_label_list = []
for i in range(len(batch_input_metas)):
input_metas = batch_input_metas[i]
proj_x = torch.tensor(
input_metas['proj_x'], dtype=torch.int64, device=device)
proj_y = torch.tensor(
input_metas['proj_y'], dtype=torch.int64, device=device)
proj_range = torch.tensor(
input_metas['proj_range'], dtype=torch.float32, device=device)
unproj_range = torch.tensor(
input_metas['unproj_range'],
dtype=torch.float32,
device=device)
if use_knn:
seg_label_list.append(
post_module(proj_range, unproj_range, seg_labels[i],
proj_x, proj_y))
else:
seg_label_list.append(seg_labels[i, proj_y, proj_x])
return seg_label_list
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Dict, List
from torch import Tensor
from mmdet3d.models import EncoderDecoder3D
from mmdet3d.registry import MODELS
from mmdet3d.structures import PointData
from mmdet3d.structures.det3d_data_sample import OptSampleList, SampleList
@MODELS.register_module()
class RangeImageSegmentor(EncoderDecoder3D):
def loss(self, batch_inputs_dict: dict,
batch_data_samples: SampleList) -> Dict[str, Tensor]:
"""Calculate losses from a batch of inputs and data samples.
Args:
batch_inputs_dict (dict): Input sample dict which
includes 'points' and 'imgs' keys.
- points (List[Tensor]): Point cloud of each sample.
- imgs (Tensor, optional): Image tensor has shape (B, C, H, W).
batch_data_samples (List[:obj:`Det3DDataSample`]): The det3d data
samples. It usually includes information such as `metainfo` and
`gt_pts_seg`.
Returns:
Dict[str, Tensor]: A dictionary of loss components.
"""
# extract features using backbone
imgs = batch_inputs_dict['imgs']
x = self.extract_feat(imgs)
losses = dict()
loss_decode = self._decode_head_forward_train(x, batch_data_samples)
losses.update(loss_decode)
if self.with_auxiliary_head:
loss_aux = self._auxiliary_head_forward_train(
x, batch_data_samples)
losses.update(loss_aux)
return losses
def predict(self,
batch_inputs_dict: dict,
batch_data_samples: SampleList,
rescale: bool = True) -> SampleList:
"""Simple test with single scene.
Args:
batch_inputs_dict (dict): Input sample dict which includes 'points'
and 'imgs' keys.
- points (List[Tensor]): Point cloud of each sample.
- imgs (Tensor, optional): Image tensor has shape (B, C, H, W).
batch_data_samples (List[:obj:`Det3DDataSample`]): The det3d data
samples. It usually includes information such as `metainfo` and
`gt_pts_seg`.
rescale (bool): Whether transform to original number of points.
Will be used for voxelization based segmentors.
Defaults to True.
Returns:
List[:obj:`Det3DDataSample`]: Segmentation results of the input
points. Each Det3DDataSample usually contains:
- ``pred_pts_seg`` (PointData): Prediction of 3D semantic
segmentation.
- ``pts_seg_logits`` (PointData): Predicted logits of 3D semantic
segmentation before normalization.
"""
# 3D segmentation requires per-point prediction, so it's impossible
# to use down-sampling to get a batch of scenes with same num_points
# therefore, we only support testing one scene every time
batch_input_metas = []
for data_sample in batch_data_samples:
batch_input_metas.append(data_sample.metainfo)
imgs = batch_inputs_dict['imgs']
x = self.extract_feat(imgs)
seg_labels_list = self.decode_head.predict(x, batch_input_metas,
self.test_cfg)
return self.postprocess_result(seg_labels_list, batch_data_samples)
def _forward(self,
batch_inputs_dict: dict,
batch_data_samples: OptSampleList = None) -> Tensor:
"""Network forward process.
Args:
batch_inputs_dict (dict): Input sample dict which includes 'points'
and 'imgs' keys.
- points (List[Tensor]): Point cloud of each sample.
- imgs (Tensor, optional): Image tensor has shape (B, C, H, W).
batch_data_samples (List[:obj:`Det3DDataSample`]): The det3d data
samples. It usually includes information such as `metainfo` and
`gt_pts_seg`.
Returns:
Tensor: Forward output of model without any post-processes.
"""
imgs = batch_inputs_dict['imgs']
x = self.extract_feat(imgs)
return self.decode_head.forward(x)
def postprocess_result(self, seg_labels_list: List[Tensor],
batch_data_samples: SampleList) -> SampleList:
"""Convert results list to `Det3DDataSample`.
Args:
seg_labels_list (List[Tensor]): List of segmentation results,
seg_logits from model of each input point clouds sample.
batch_data_samples (List[:obj:`Det3DDataSample`]): The det3d data
samples. It usually includes information such as `metainfo` and
`gt_pts_seg`.
Returns:
List[:obj:`Det3DDataSample`]: Segmentation results of the input
points. Each Det3DDataSample usually contains:
- ``pred_pts_seg`` (PointData): Prediction of 3D semantic
segmentation.
- ``pts_seg_logits`` (PointData): Predicted logits of 3D semantic
segmentation before normalization.
"""
for i, seg_pred in enumerate(seg_labels_list):
batch_data_samples[i].set_data(
{'pred_pts_seg': PointData(**{'pts_semantic_mask': seg_pred})})
return batch_data_samples
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Sequence
import numpy as np
from mmcv.transforms import BaseTransform
from mmdet3d.registry import TRANSFORMS
@TRANSFORMS.register_module()
class SemkittiRangeView(BaseTransform):
"""Convert Semantickitti point cloud dataset to range image."""
def __init__(self,
H: int = 64,
W: int = 2048,
fov_up: float = 3.0,
fov_down: float = -25.0,
means: Sequence[float] = (11.71279, -0.1023471, 0.4952,
-1.0545, 0.2877),
stds: Sequence[float] = (10.24, 12.295865, 9.4287, 0.8643,
0.1450),
ignore_index: int = 19) -> None:
self.H = H
self.W = W
self.fov_up = fov_up / 180.0 * np.pi
self.fov_down = fov_down / 180.0 * np.pi
self.fov = abs(self.fov_down) + abs(self.fov_up)
self.means = np.array(means, dtype=np.float32)
self.stds = np.array(stds, dtype=np.float32)
self.ignore_index = ignore_index
def transform(self, results: dict) -> dict:
points_numpy = results['points'].numpy()
proj_image = np.full((self.H, self.W, 5), -1, dtype=np.float32)
proj_idx = np.full((self.H, self.W), -1, dtype=np.int64)
# get depth of all points
depth = np.linalg.norm(points_numpy[:, :3], 2, axis=1)
# get angles of all points
yaw = -np.arctan2(points_numpy[:, 1], points_numpy[:, 0])
pitch = np.arcsin(points_numpy[:, 2] / depth)
# get projection in image coords
proj_x = 0.5 * (yaw / np.pi + 1.0)
proj_y = 1.0 - (pitch + abs(self.fov_down)) / self.fov
# scale to image size using angular resolution
proj_x *= self.W
proj_y *= self.H
# round and clamp for use as index
proj_x = np.floor(proj_x)
proj_x = np.minimum(self.W - 1, proj_x)
proj_x = np.maximum(0, proj_x).astype(np.int64)
proj_y = np.floor(proj_y)
proj_y = np.minimum(self.H - 1, proj_y)
proj_y = np.maximum(0, proj_y).astype(np.int64)
results['proj_x'] = proj_x
results['proj_y'] = proj_y
results['unproj_range'] = depth
# order in decreasing depth
indices = np.arange(depth.shape[0])
order = np.argsort(depth)[::-1]
proj_idx[proj_y[order], proj_x[order]] = indices[order]
proj_image[proj_y[order], proj_x[order], 0] = depth[order]
proj_image[proj_y[order], proj_x[order], 1:] = points_numpy[order]
proj_mask = (proj_idx > 0).astype(np.int32)
results['proj_range'] = proj_image[..., 0]
proj_image = (proj_image -
self.means[None, None, :]) / self.stds[None, None, :]
proj_image = proj_image * proj_mask[..., None].astype(np.float32)
results['img'] = proj_image
if 'pts_semantic_mask' in results:
proj_sem_label = np.full((self.H, self.W),
self.ignore_index,
dtype=np.int64)
proj_sem_label[proj_y[order],
proj_x[order]] = results['pts_semantic_mask'][order]
results['gt_semantic_seg'] = proj_sem_label
return results
# Copyright (c) OpenMMLab. All rights reserved.
import math
import torch
from torch import Tensor, nn
from torch.nn import functional as F
from mmdet3d.utils import ConfigType
def get_gaussian_kernel(kernel_size: int = 3, sigma: int = 2) -> Tensor:
# Create a x, y coordinate grid of shape (kernel_size, kernel_size, 2)
x_coord = torch.arange(kernel_size)
x_grid = x_coord.repeat(kernel_size).view(kernel_size, kernel_size)
y_grid = x_grid.t()
xy_grid = torch.stack([x_grid, y_grid], dim=-1).float()
mean = (kernel_size - 1) / 2.
variance = sigma**2.
# Calculate the 2-dimensional gaussian kernel which is
# the product of two gaussian distributions for two different
# variables (in this case called x and y)
gaussian_kernel = (1. / (2. * math.pi * variance)) * torch.exp(-torch.sum(
(xy_grid - mean)**2., dim=-1) / (2 * variance))
# Make sure sum of values in gaussian kernel equals 1.
gaussian_kernel = gaussian_kernel / torch.sum(gaussian_kernel)
# Reshape to 2d depthwise convolutional weight
gaussian_kernel = gaussian_kernel.view(kernel_size, kernel_size)
return gaussian_kernel
class KNN(nn.Module):
def __init__(self, test_cfg: ConfigType, num_classes: int,
ignore_index: int) -> None:
super(KNN, self).__init__()
self.knn = test_cfg.knn
self.search = test_cfg.search
self.sigma = test_cfg.sigma
self.cutoff = test_cfg.cutoff
self.num_classes = num_classes
self.ignore_index = ignore_index
def forward(self, proj_range: Tensor, unproj_range: Tensor,
proj_argmax: Tensor, px: Tensor, py: Tensor) -> Tensor:
# sizes of projection scan
H, W = proj_range.shape
# number of points
P = unproj_range.shape
# check if size of kernel is odd and complain
if self.search % 2 == 0:
raise ValueError('Nearest neighbor kernel must be odd number')
# calculate padding
pad = int((self.search - 1) / 2)
# unfold neighborhood to get nearest neighbors for each pixel
# (range image)
proj_unfold_k_rang = F.unfold(
proj_range[None, None, ...],
kernel_size=(self.search, self.search),
padding=(pad, pad))
# index with px, py to get ALL the pcld points
idx_list = py * W + px
unproj_unfold_k_rang = proj_unfold_k_rang[:, :, idx_list]
# WARNING, THIS IS A HACK
# Make non valid (<0) range points extremely big so that there is no
# screwing up the nn self.search
unproj_unfold_k_rang[unproj_unfold_k_rang < 0] = float('inf')
# now the matrix is unfolded TOTALLY, replace the middle points with
# the actual range points
center = int(((self.search * self.search) - 1) / 2)
unproj_unfold_k_rang[:, center, :] = unproj_range
# now compare range
k2_distances = torch.abs(unproj_unfold_k_rang - unproj_range)
# make a kernel to weigh the ranges according to distance in (x,y)
# I make this 1 - kernel because I want distances that are close
# in (x,y) to matter more
inv_gauss_k = (1 - get_gaussian_kernel(self.search, self.sigma)).view(
1, -1, 1)
inv_gauss_k = inv_gauss_k.to(proj_range.device).type(proj_range.type())
# apply weighing
k2_distances = k2_distances * inv_gauss_k
# find nearest neighbors
_, knn_idx = k2_distances.topk(
self.knn, dim=1, largest=False, sorted=False)
# do the same unfolding with the argmax
proj_unfold_1_argmax = F.unfold(
proj_argmax[None, None, ...].float(),
kernel_size=(self.search, self.search),
padding=(pad, pad)).long()
unproj_unfold_1_argmax = proj_unfold_1_argmax[:, :, idx_list]
# get the top k logits from the knn at each pixel
knn_argmax = torch.gather(
input=unproj_unfold_1_argmax, dim=1, index=knn_idx)
# fake an invalid argmax of classes + 1 for all cutoff items
if self.cutoff > 0:
knn_distances = torch.gather(
input=k2_distances, dim=1, index=knn_idx)
knn_invalid_idx = knn_distances > self.cutoff
knn_argmax[knn_invalid_idx] = self.num_classes
# now vote
# argmax onehot has an extra class for objects after cutoff
knn_argmax_onehot = torch.zeros(
(1, self.num_classes + 1, P[0]),
device=proj_range.device).type(proj_range.type())
ones = torch.ones_like(knn_argmax).type(proj_range.type())
knn_argmax_onehot = knn_argmax_onehot.scatter_add_(1, knn_argmax, ones)
# now vote (as a sum over the onehot shit)
# (don't let it choose unlabeled OR invalid)
if self.ignore_index == self.num_classes - 1:
knn_argmax_out = knn_argmax_onehot[:, :-2].argmax(dim=1)
elif self.ignore_index == 0:
knn_argmax_out = knn_argmax_onehot[:, 1:-1].argmax(dim=1) + 1
else:
knn_argmax_out = knn_argmax_onehot[:, :-1].argmax(dim=1)
# reshape again
knn_argmax_out = knn_argmax_out.view(P)
return knn_argmax_out
_base_ = ['./cenet-64x512_4xb4_semantickitti.py']
backend_args = None
train_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=4,
backend_args=backend_args),
dict(
type='LoadAnnotations3D',
with_bbox_3d=False,
with_label_3d=False,
with_seg_3d=True,
seg_3d_dtype='np.int32',
seg_offset=2**16,
dataset_type='semantickitti',
backend_args=backend_args),
dict(type='PointSegClassMapping'),
dict(type='PointSample', num_points=0.9),
dict(
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5),
dict(
type='GlobalRotScaleTrans',
rot_range=[-3.1415929, 3.1415929],
scale_ratio_range=[0.95, 1.05],
translation_std=[0.1, 0.1, 0.1],
),
dict(
type='SemkittiRangeView',
H=64,
W=1024,
fov_up=3.0,
fov_down=-25.0,
means=(11.71279, -0.1023471, 0.4952, -1.0545, 0.2877),
stds=(10.24, 12.295865, 9.4287, 0.8643, 0.1450),
ignore_index=19),
dict(type='Pack3DDetInputs', keys=['img', 'gt_semantic_seg'])
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=4,
backend_args=backend_args),
dict(
type='LoadAnnotations3D',
with_bbox_3d=False,
with_label_3d=False,
with_seg_3d=True,
seg_3d_dtype='np.int32',
seg_offset=2**16,
dataset_type='semantickitti',
backend_args=backend_args),
dict(type='PointSegClassMapping'),
dict(
type='SemkittiRangeView',
H=64,
W=1024,
fov_up=3.0,
fov_down=-25.0,
means=(11.71279, -0.1023471, 0.4952, -1.0545, 0.2877),
stds=(10.24, 12.295865, 9.4287, 0.8643, 0.1450),
ignore_index=19),
dict(
type='Pack3DDetInputs',
keys=['img'],
meta_keys=('proj_x', 'proj_y', 'proj_range', 'unproj_range'))
]
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
test_dataloader = val_dataloader
_base_ = ['./cenet-64x512_4xb4_semantickitti.py']
backend_args = None
train_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=4,
backend_args=backend_args),
dict(
type='LoadAnnotations3D',
with_bbox_3d=False,
with_label_3d=False,
with_seg_3d=True,
seg_3d_dtype='np.int32',
seg_offset=2**16,
dataset_type='semantickitti',
backend_args=backend_args),
dict(type='PointSegClassMapping'),
dict(type='PointSample', num_points=0.9),
dict(
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5),
dict(
type='GlobalRotScaleTrans',
rot_range=[-3.1415929, 3.1415929],
scale_ratio_range=[0.95, 1.05],
translation_std=[0.1, 0.1, 0.1],
),
dict(
type='SemkittiRangeView',
H=64,
W=2048,
fov_up=3.0,
fov_down=-25.0,
means=(11.71279, -0.1023471, 0.4952, -1.0545, 0.2877),
stds=(10.24, 12.295865, 9.4287, 0.8643, 0.1450),
ignore_index=19),
dict(type='Pack3DDetInputs', keys=['img', 'gt_semantic_seg'])
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=4,
backend_args=backend_args),
dict(
type='LoadAnnotations3D',
with_bbox_3d=False,
with_label_3d=False,
with_seg_3d=True,
seg_3d_dtype='np.int32',
seg_offset=2**16,
dataset_type='semantickitti',
backend_args=backend_args),
dict(type='PointSegClassMapping'),
dict(
type='SemkittiRangeView',
H=64,
W=2048,
fov_up=3.0,
fov_down=-25.0,
means=(11.71279, -0.1023471, 0.4952, -1.0545, 0.2877),
stds=(10.24, 12.295865, 9.4287, 0.8643, 0.1450),
ignore_index=19),
dict(
type='Pack3DDetInputs',
keys=['img'],
meta_keys=('proj_x', 'proj_y', 'proj_range', 'unproj_range'))
]
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
test_dataloader = val_dataloader
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment