"lib/llm/vscode:/vscode.git/clone" did not exist on "e5db9e8634e242ec7965c522ee0bde3034c4a8a0"
Commit df3c64a9 authored by zhiqi-li's avatar zhiqi-li
Browse files

support occupancy prediction

parent bdd98bcb
# Copyright (c) OpenMMLab. All rights reserved.
import math
import numpy as np
import torch
from mmcv.runner import get_dist_info
from torch.utils.data import Sampler
from .sampler import SAMPLER
import random
from IPython import embed
@SAMPLER.register_module()
class DistributedGroupSampler(Sampler):
"""Sampler that restricts data loading to a subset of the dataset.
It is especially useful in conjunction with
:class:`torch.nn.parallel.DistributedDataParallel`. In such case, each
process can pass a DistributedSampler instance as a DataLoader sampler,
and load a subset of the original dataset that is exclusive to it.
.. note::
Dataset is assumed to be of constant size.
Arguments:
dataset: Dataset used for sampling.
num_replicas (optional): Number of processes participating in
distributed training.
rank (optional): Rank of the current process within num_replicas.
seed (int, optional): random seed used to shuffle the sampler if
``shuffle=True``. This number should be identical across all
processes in the distributed group. Default: 0.
"""
def __init__(self,
dataset,
samples_per_gpu=1,
num_replicas=None,
rank=None,
seed=0):
_rank, _num_replicas = get_dist_info()
if num_replicas is None:
num_replicas = _num_replicas
if rank is None:
rank = _rank
self.dataset = dataset
self.samples_per_gpu = samples_per_gpu
self.num_replicas = num_replicas
self.rank = rank
self.epoch = 0
self.seed = seed if seed is not None else 0
assert hasattr(self.dataset, 'flag')
self.flag = self.dataset.flag
self.group_sizes = np.bincount(self.flag)
self.num_samples = 0
for i, j in enumerate(self.group_sizes):
self.num_samples += int(
math.ceil(self.group_sizes[i] * 1.0 / self.samples_per_gpu /
self.num_replicas)) * self.samples_per_gpu
self.total_size = self.num_samples * self.num_replicas
def __iter__(self):
# deterministically shuffle based on epoch
g = torch.Generator()
g.manual_seed(self.epoch + self.seed)
indices = []
for i, size in enumerate(self.group_sizes):
if size > 0:
indice = np.where(self.flag == i)[0]
assert len(indice) == size
# add .numpy() to avoid bug when selecting indice in parrots.
# TODO: check whether torch.randperm() can be replaced by
# numpy.random.permutation().
indice = indice[list(
torch.randperm(int(size), generator=g).numpy())].tolist()
extra = int(
math.ceil(
size * 1.0 / self.samples_per_gpu / self.num_replicas)
) * self.samples_per_gpu * self.num_replicas - len(indice)
# pad indice
tmp = indice.copy()
for _ in range(extra // size):
indice.extend(tmp)
indice.extend(tmp[:extra % size])
indices.extend(indice)
assert len(indices) == self.total_size
indices = [
indices[j] for i in list(
torch.randperm(
len(indices) // self.samples_per_gpu, generator=g))
for j in range(i * self.samples_per_gpu, (i + 1) *
self.samples_per_gpu)
]
# subsample
offset = self.num_samples * self.rank
indices = indices[offset:offset + self.num_samples]
assert len(indices) == self.num_samples
return iter(indices)
def __len__(self):
return self.num_samples
def set_epoch(self, epoch):
self.epoch = epoch
from mmcv.utils.registry import Registry, build_from_cfg
SAMPLER = Registry('sampler')
def build_sampler(cfg, default_args):
return build_from_cfg(cfg, SAMPLER, default_args)
from .vovnet import VoVNet
__all__ = ['VoVNet']
\ No newline at end of file
from collections import OrderedDict
from mmcv.runner import BaseModule
from mmdet.models.builder import BACKBONES
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.modules.batchnorm import _BatchNorm
VoVNet19_slim_dw_eSE = {
'stem': [64, 64, 64],
'stage_conv_ch': [64, 80, 96, 112],
'stage_out_ch': [112, 256, 384, 512],
"layer_per_block": 3,
"block_per_stage": [1, 1, 1, 1],
"eSE": True,
"dw": True
}
VoVNet19_dw_eSE = {
'stem': [64, 64, 64],
"stage_conv_ch": [128, 160, 192, 224],
"stage_out_ch": [256, 512, 768, 1024],
"layer_per_block": 3,
"block_per_stage": [1, 1, 1, 1],
"eSE": True,
"dw": True
}
VoVNet19_slim_eSE = {
'stem': [64, 64, 128],
'stage_conv_ch': [64, 80, 96, 112],
'stage_out_ch': [112, 256, 384, 512],
'layer_per_block': 3,
'block_per_stage': [1, 1, 1, 1],
'eSE': True,
"dw": False
}
VoVNet19_eSE = {
'stem': [64, 64, 128],
"stage_conv_ch": [128, 160, 192, 224],
"stage_out_ch": [256, 512, 768, 1024],
"layer_per_block": 3,
"block_per_stage": [1, 1, 1, 1],
"eSE": True,
"dw": False
}
VoVNet39_eSE = {
'stem': [64, 64, 128],
"stage_conv_ch": [128, 160, 192, 224],
"stage_out_ch": [256, 512, 768, 1024],
"layer_per_block": 5,
"block_per_stage": [1, 1, 2, 2],
"eSE": True,
"dw": False
}
VoVNet57_eSE = {
'stem': [64, 64, 128],
"stage_conv_ch": [128, 160, 192, 224],
"stage_out_ch": [256, 512, 768, 1024],
"layer_per_block": 5,
"block_per_stage": [1, 1, 4, 3],
"eSE": True,
"dw": False
}
VoVNet99_eSE = {
'stem': [64, 64, 128],
"stage_conv_ch": [128, 160, 192, 224],
"stage_out_ch": [256, 512, 768, 1024],
"layer_per_block": 5,
"block_per_stage": [1, 3, 9, 3],
"eSE": True,
"dw": False
}
_STAGE_SPECS = {
"V-19-slim-dw-eSE": VoVNet19_slim_dw_eSE,
"V-19-dw-eSE": VoVNet19_dw_eSE,
"V-19-slim-eSE": VoVNet19_slim_eSE,
"V-19-eSE": VoVNet19_eSE,
"V-39-eSE": VoVNet39_eSE,
"V-57-eSE": VoVNet57_eSE,
"V-99-eSE": VoVNet99_eSE,
}
def dw_conv3x3(in_channels, out_channels, module_name, postfix, stride=1, kernel_size=3, padding=1):
"""3x3 convolution with padding"""
return [
(
'{}_{}/dw_conv3x3'.format(module_name, postfix),
nn.Conv2d(
in_channels,
out_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
groups=out_channels,
bias=False
)
),
(
'{}_{}/pw_conv1x1'.format(module_name, postfix),
nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0, groups=1, bias=False)
),
('{}_{}/pw_norm'.format(module_name, postfix), nn.BatchNorm2d(out_channels)),
('{}_{}/pw_relu'.format(module_name, postfix), nn.ReLU(inplace=True)),
]
def conv3x3(in_channels, out_channels, module_name, postfix, stride=1, groups=1, kernel_size=3, padding=1):
"""3x3 convolution with padding"""
return [
(
f"{module_name}_{postfix}/conv",
nn.Conv2d(
in_channels,
out_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
groups=groups,
bias=False,
),
),
(f"{module_name}_{postfix}/norm", nn.BatchNorm2d(out_channels)),
(f"{module_name}_{postfix}/relu", nn.ReLU(inplace=True)),
]
def conv1x1(in_channels, out_channels, module_name, postfix, stride=1, groups=1, kernel_size=1, padding=0):
"""1x1 convolution with padding"""
return [
(
f"{module_name}_{postfix}/conv",
nn.Conv2d(
in_channels,
out_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
groups=groups,
bias=False,
),
),
(f"{module_name}_{postfix}/norm", nn.BatchNorm2d(out_channels)),
(f"{module_name}_{postfix}/relu", nn.ReLU(inplace=True)),
]
class Hsigmoid(nn.Module):
def __init__(self, inplace=True):
super(Hsigmoid, self).__init__()
self.inplace = inplace
def forward(self, x):
return F.relu6(x + 3.0, inplace=self.inplace) / 6.0
class eSEModule(nn.Module):
def __init__(self, channel, reduction=4):
super(eSEModule, self).__init__()
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.fc = nn.Conv2d(channel, channel, kernel_size=1, padding=0)
self.hsigmoid = Hsigmoid()
def forward(self, x):
input = x
x = self.avg_pool(x)
x = self.fc(x)
x = self.hsigmoid(x)
return input * x
class _OSA_module(nn.Module):
def __init__(
self, in_ch, stage_ch, concat_ch, layer_per_block, module_name, SE=False, identity=False, depthwise=False
):
super(_OSA_module, self).__init__()
self.identity = identity
self.depthwise = depthwise
self.isReduced = False
self.layers = nn.ModuleList()
in_channel = in_ch
if self.depthwise and in_channel != stage_ch:
self.isReduced = True
self.conv_reduction = nn.Sequential(
OrderedDict(conv1x1(in_channel, stage_ch, "{}_reduction".format(module_name), "0"))
)
for i in range(layer_per_block):
if self.depthwise:
self.layers.append(nn.Sequential(OrderedDict(dw_conv3x3(stage_ch, stage_ch, module_name, i))))
else:
self.layers.append(nn.Sequential(OrderedDict(conv3x3(in_channel, stage_ch, module_name, i))))
in_channel = stage_ch
# feature aggregation
in_channel = in_ch + layer_per_block * stage_ch
self.concat = nn.Sequential(OrderedDict(conv1x1(in_channel, concat_ch, module_name, "concat")))
self.ese = eSEModule(concat_ch)
def forward(self, x):
identity_feat = x
output = []
output.append(x)
if self.depthwise and self.isReduced:
x = self.conv_reduction(x)
for layer in self.layers:
x = layer(x)
output.append(x)
x = torch.cat(output, dim=1)
xt = self.concat(x)
xt = self.ese(xt)
if self.identity:
xt = xt + identity_feat
return xt
class _OSA_stage(nn.Sequential):
def __init__(
self, in_ch, stage_ch, concat_ch, block_per_stage, layer_per_block, stage_num, SE=False, depthwise=False
):
super(_OSA_stage, self).__init__()
if not stage_num == 2:
self.add_module("Pooling", nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True))
if block_per_stage != 1:
SE = False
module_name = f"OSA{stage_num}_1"
self.add_module(
module_name, _OSA_module(in_ch, stage_ch, concat_ch, layer_per_block, module_name, SE, depthwise=depthwise)
)
for i in range(block_per_stage - 1):
if i != block_per_stage - 2: # last block
SE = False
module_name = f"OSA{stage_num}_{i + 2}"
self.add_module(
module_name,
_OSA_module(
concat_ch,
stage_ch,
concat_ch,
layer_per_block,
module_name,
SE,
identity=True,
depthwise=depthwise
),
)
@BACKBONES.register_module()
class VoVNet(BaseModule):
def __init__(self, spec_name, input_ch=3, out_features=None,
frozen_stages=-1, norm_eval=True, pretrained=None, init_cfg=None):
"""
Args:
input_ch(int) : the number of input channel
out_features (list[str]): name of the layers whose outputs should
be returned in forward. Can be anything in "stem", "stage2" ...
"""
super(VoVNet, self).__init__(init_cfg)
self.frozen_stages = frozen_stages
self.norm_eval = norm_eval
if isinstance(pretrained, str):
warnings.warn('DeprecationWarning: pretrained is deprecated, '
'please use "init_cfg" instead')
self.init_cfg = dict(type='Pretrained', checkpoint=pretrained)
stage_specs = _STAGE_SPECS[spec_name]
stem_ch = stage_specs["stem"]
config_stage_ch = stage_specs["stage_conv_ch"]
config_concat_ch = stage_specs["stage_out_ch"]
block_per_stage = stage_specs["block_per_stage"]
layer_per_block = stage_specs["layer_per_block"]
SE = stage_specs["eSE"]
depthwise = stage_specs["dw"]
self._out_features = out_features
# Stem module
conv_type = dw_conv3x3 if depthwise else conv3x3
stem = conv3x3(input_ch, stem_ch[0], "stem", "1", 2)
stem += conv_type(stem_ch[0], stem_ch[1], "stem", "2", 1)
stem += conv_type(stem_ch[1], stem_ch[2], "stem", "3", 2)
self.add_module("stem", nn.Sequential((OrderedDict(stem))))
current_stirde = 4
self._out_feature_strides = {"stem": current_stirde, "stage2": current_stirde}
self._out_feature_channels = {"stem": stem_ch[2]}
stem_out_ch = [stem_ch[2]]
in_ch_list = stem_out_ch + config_concat_ch[:-1]
# OSA stages
self.stage_names = []
for i in range(4): # num_stages
name = "stage%d" % (i + 2) # stage 2 ... stage 5
self.stage_names.append(name)
self.add_module(
name,
_OSA_stage(
in_ch_list[i],
config_stage_ch[i],
config_concat_ch[i],
block_per_stage[i],
layer_per_block,
i + 2,
SE,
depthwise,
),
)
self._out_feature_channels[name] = config_concat_ch[i]
if not i == 0:
self._out_feature_strides[name] = current_stirde = int(current_stirde * 2)
# initialize weights
# self._initialize_weights()
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight)
def forward(self, x):
outputs = {}
x = self.stem(x)
if "stem" in self._out_features:
outputs["stem"] = x
for name in self.stage_names:
x = getattr(self, name)(x)
if name in self._out_features:
outputs[name] = x
return outputs
def _freeze_stages(self):
if self.frozen_stages >= 0:
m = getattr(self, 'stem')
m.eval()
for param in m.parameters():
param.requires_grad = False
for i in range(1, self.frozen_stages + 1):
m = getattr(self, f'stage{i+1}')
m.eval()
for param in m.parameters():
param.requires_grad = False
def train(self, mode=True):
"""Convert the model into training mode while keep normalization layer
freezed."""
super(VoVNet, self).train(mode)
self._freeze_stages()
if mode and self.norm_eval:
for m in self.modules():
# trick: eval have effect on BatchNorm only
if isinstance(m, _BatchNorm):
m.eval()
\ No newline at end of file
from mmcv.runner.hooks.hook import HOOKS, Hook
from projects.mmdet3d_plugin.models.utils import run_time
@HOOKS.register_module()
class GradChecker(Hook):
def after_train_iter(self, runner):
for key, val in runner.model.named_parameters():
if val.grad == None and val.requires_grad:
print('WARNNING: {key}\'s parameters are not be used!!!!'.format(key=key))
try:
from torch.optim import _functional as F
except:
print('WARNING!!!, I recommend using torch>=1.8')
import torch
from torch.optim.optimizer import Optimizer
from mmcv.runner.optimizer.builder import OPTIMIZERS
@OPTIMIZERS.register_module()
class AdamW2(Optimizer):
r"""Implements AdamW algorithm. Solve the bug of torch 1.8
The original Adam algorithm was proposed in `Adam: A Method for Stochastic Optimization`_.
The AdamW variant was proposed in `Decoupled Weight Decay Regularization`_.
Args:
params (iterable): iterable of parameters to optimize or dicts defining
parameter groups
lr (float, optional): learning rate (default: 1e-3)
betas (Tuple[float, float], optional): coefficients used for computing
running averages of gradient and its square (default: (0.9, 0.999))
eps (float, optional): term added to the denominator to improve
numerical stability (default: 1e-8)
weight_decay (float, optional): weight decay coefficient (default: 1e-2)
amsgrad (boolean, optional): whether to use the AMSGrad variant of this
algorithm from the paper `On the Convergence of Adam and Beyond`_
(default: False)
.. _Adam\: A Method for Stochastic Optimization:
https://arxiv.org/abs/1412.6980
.. _Decoupled Weight Decay Regularization:
https://arxiv.org/abs/1711.05101
.. _On the Convergence of Adam and Beyond:
https://openreview.net/forum?id=ryQu7f-RZ
"""
def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8,
weight_decay=1e-2, amsgrad=False):
if not 0.0 <= lr:
raise ValueError("Invalid learning rate: {}".format(lr))
if not 0.0 <= eps:
raise ValueError("Invalid epsilon value: {}".format(eps))
if not 0.0 <= betas[0] < 1.0:
raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0]))
if not 0.0 <= betas[1] < 1.0:
raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1]))
if not 0.0 <= weight_decay:
raise ValueError("Invalid weight_decay value: {}".format(weight_decay))
defaults = dict(lr=lr, betas=betas, eps=eps,
weight_decay=weight_decay, amsgrad=amsgrad)
super(AdamW2, self).__init__(params, defaults)
def __setstate__(self, state):
super(AdamW2, self).__setstate__(state)
for group in self.param_groups:
group.setdefault('amsgrad', False)
@torch.no_grad()
def step(self, closure=None):
"""Performs a single optimization step.
Args:
closure (callable, optional): A closure that reevaluates the model
and returns the loss.
"""
loss = None
if closure is not None:
with torch.enable_grad():
loss = closure()
for group in self.param_groups:
params_with_grad = []
grads = []
exp_avgs = []
exp_avg_sqs = []
state_sums = []
max_exp_avg_sqs = []
state_steps = []
amsgrad = group['amsgrad']
# put this line here for solving bug
beta1, beta2 = group['betas']
for p in group['params']:
if p.grad is None:
continue
params_with_grad.append(p)
if p.grad.is_sparse:
raise RuntimeError('AdamW does not support sparse gradients')
grads.append(p.grad)
state = self.state[p]
# State initialization
if len(state) == 0:
state['step'] = 0
# Exponential moving average of gradient values
state['exp_avg'] = torch.zeros_like(p, memory_format=torch.preserve_format)
# Exponential moving average of squared gradient values
state['exp_avg_sq'] = torch.zeros_like(p, memory_format=torch.preserve_format)
if amsgrad:
# Maintains max of all exp. moving avg. of sq. grad. values
state['max_exp_avg_sq'] = torch.zeros_like(p, memory_format=torch.preserve_format)
exp_avgs.append(state['exp_avg'])
exp_avg_sqs.append(state['exp_avg_sq'])
if amsgrad:
max_exp_avg_sqs.append(state['max_exp_avg_sq'])
# update the steps for each param group update
state['step'] += 1
# record the step after step update
state_steps.append(state['step'])
F.adamw(params_with_grad,
grads,
exp_avgs,
exp_avg_sqs,
max_exp_avg_sqs,
state_steps,
amsgrad,
beta1,
beta2,
group['lr'],
group['weight_decay'],
group['eps'])
return loss
\ No newline at end of file
from .bricks import run_time
from .grid_mask import GridMask
from .position_embedding import RelPositionEmbedding
from .visual import save_tensor
from .positional_encoding import LearnedPositionalEncoding3D
\ No newline at end of file
import functools
import time
from collections import defaultdict
import torch
time_maps = defaultdict(lambda :0.)
count_maps = defaultdict(lambda :0.)
def run_time(name):
def middle(fn):
def wrapper(*args, **kwargs):
torch.cuda.synchronize()
start = time.time()
res = fn(*args, **kwargs)
torch.cuda.synchronize()
time_maps['%s : %s'%(name, fn.__name__) ] += time.time()-start
count_maps['%s : %s'%(name, fn.__name__) ] +=1
print("%s : %s takes up %f "% (name, fn.__name__,time_maps['%s : %s'%(name, fn.__name__) ] /count_maps['%s : %s'%(name, fn.__name__) ] ))
return res
return wrapper
return middle
\ No newline at end of file
import torch
import torch.nn as nn
import numpy as np
from PIL import Image
from mmcv.runner import force_fp32, auto_fp16
class Grid(object):
def __init__(self, use_h, use_w, rotate = 1, offset=False, ratio = 0.5, mode=0, prob = 1.):
self.use_h = use_h
self.use_w = use_w
self.rotate = rotate
self.offset = offset
self.ratio = ratio
self.mode=mode
self.st_prob = prob
self.prob = prob
def set_prob(self, epoch, max_epoch):
self.prob = self.st_prob * epoch / max_epoch
def __call__(self, img, label):
if np.random.rand() > self.prob:
return img, label
h = img.size(1)
w = img.size(2)
self.d1 = 2
self.d2 = min(h, w)
hh = int(1.5*h)
ww = int(1.5*w)
d = np.random.randint(self.d1, self.d2)
if self.ratio == 1:
self.l = np.random.randint(1, d)
else:
self.l = min(max(int(d*self.ratio+0.5),1),d-1)
mask = np.ones((hh, ww), np.float32)
st_h = np.random.randint(d)
st_w = np.random.randint(d)
if self.use_h:
for i in range(hh//d):
s = d*i + st_h
t = min(s+self.l, hh)
mask[s:t,:] *= 0
if self.use_w:
for i in range(ww//d):
s = d*i + st_w
t = min(s+self.l, ww)
mask[:,s:t] *= 0
r = np.random.randint(self.rotate)
mask = Image.fromarray(np.uint8(mask))
mask = mask.rotate(r)
mask = np.asarray(mask)
mask = mask[(hh-h)//2:(hh-h)//2+h, (ww-w)//2:(ww-w)//2+w]
mask = torch.from_numpy(mask).float()
if self.mode == 1:
mask = 1-mask
mask = mask.expand_as(img)
if self.offset:
offset = torch.from_numpy(2 * (np.random.rand(h,w) - 0.5)).float()
offset = (1 - mask) * offset
img = img * mask + offset
else:
img = img * mask
return img, label
class GridMask(nn.Module):
def __init__(self, use_h, use_w, rotate = 1, offset=False, ratio = 0.5, mode=0, prob = 1.):
super(GridMask, self).__init__()
self.use_h = use_h
self.use_w = use_w
self.rotate = rotate
self.offset = offset
self.ratio = ratio
self.mode = mode
self.st_prob = prob
self.prob = prob
self.fp16_enable = False
def set_prob(self, epoch, max_epoch):
self.prob = self.st_prob * epoch / max_epoch #+ 1.#0.5
@auto_fp16()
def forward(self, x):
if np.random.rand() > self.prob or not self.training:
return x
n,c,h,w = x.size()
x = x.view(-1,h,w)
hh = int(1.5*h)
ww = int(1.5*w)
d = np.random.randint(2, h)
self.l = min(max(int(d*self.ratio+0.5),1),d-1)
mask = np.ones((hh, ww), np.float32)
st_h = np.random.randint(d)
st_w = np.random.randint(d)
if self.use_h:
for i in range(hh//d):
s = d*i + st_h
t = min(s+self.l, hh)
mask[s:t,:] *= 0
if self.use_w:
for i in range(ww//d):
s = d*i + st_w
t = min(s+self.l, ww)
mask[:,s:t] *= 0
r = np.random.randint(self.rotate)
mask = Image.fromarray(np.uint8(mask))
mask = mask.rotate(r)
mask = np.asarray(mask)
mask = mask[(hh-h)//2:(hh-h)//2+h, (ww-w)//2:(ww-w)//2+w]
mask = torch.from_numpy(mask).to(x.dtype).cuda()
if self.mode == 1:
mask = 1-mask
mask = mask.expand_as(x)
if self.offset:
offset = torch.from_numpy(2 * (np.random.rand(h,w) - 0.5)).to(x.dtype).cuda()
x = x * mask + offset * (1 - mask)
else:
x = x * mask
return x.view(n,c,h,w)
\ No newline at end of file
import torch
import torch.nn as nn
import math
class RelPositionEmbedding(nn.Module):
def __init__(self, num_pos_feats=64, pos_norm=True):
super().__init__()
self.num_pos_feats = num_pos_feats
self.fc = nn.Linear(4, self.num_pos_feats,bias=False)
#nn.init.orthogonal_(self.fc.weight)
#self.fc.weight.requires_grad = False
self.pos_norm = pos_norm
if self.pos_norm:
self.norm = nn.LayerNorm(self.num_pos_feats)
def forward(self, tensor):
#mask = nesttensor.mask
B,C,H,W = tensor.shape
#print('tensor.shape', tensor.shape)
y_range = (torch.arange(H) / float(H - 1)).to(tensor.device)
#y_axis = torch.stack((y_range, 1-y_range),dim=1)
y_axis = torch.stack((torch.cos(y_range * math.pi), torch.sin(y_range * math.pi)), dim=1)
y_axis = y_axis.reshape(H, 1, 2).repeat(1, W, 1).reshape(H * W, 2)
x_range = (torch.arange(W) / float(W - 1)).to(tensor.device)
#x_axis =torch.stack((x_range,1-x_range),dim=1)
x_axis = torch.stack((torch.cos(x_range * math.pi), torch.sin(x_range * math.pi)), dim=1)
x_axis = x_axis.reshape(1, W, 2).repeat(H, 1, 1).reshape(H * W, 2)
x_pos = torch.cat((y_axis, x_axis), dim=1)
x_pos = self.fc(x_pos)
if self.pos_norm:
x_pos = self.norm(x_pos)
#print('xpos,', x_pos.max(),x_pos.min())
return x_pos
\ No newline at end of file
import math
import torch
import torch.nn as nn
from mmcv.cnn.bricks.transformer import POSITIONAL_ENCODING
from mmcv.runner import BaseModule
@POSITIONAL_ENCODING.register_module()
class LearnedPositionalEncoding3D(BaseModule):
"""Position embedding with learnable embedding weights.
Args:
num_feats (int): The feature dimension for each position
along x-axis or y-axis. The final returned dimension for
each position is 2 times of this value.
row_num_embed (int, optional): The dictionary size of row embeddings.
Default 50.
col_num_embed (int, optional): The dictionary size of col embeddings.
Default 50.
init_cfg (dict or list[dict], optional): Initialization config dict.
"""
def __init__(self,
num_feats,
row_num_embed=50,
col_num_embed=50,
height_num_embed=50,
init_cfg=dict(type='Uniform', layer='Embedding')):
super(LearnedPositionalEncoding3D, self).__init__(init_cfg)
self.row_embed = nn.Embedding(row_num_embed, num_feats)
self.col_embed = nn.Embedding(col_num_embed, num_feats)
self.height_embed = nn.Embedding(height_num_embed, num_feats)
self.num_feats = num_feats
self.row_num_embed = row_num_embed
self.col_num_embed = col_num_embed
self.height_num_embed = height_num_embed
def forward(self, mask):
"""Forward function for `LearnedPositionalEncoding`.
Args:
mask (Tensor): ByteTensor mask. Non-zero values representing
ignored positions, while zero values means valid positions
for this image. Shape [bs, h, w].
Returns:
pos (Tensor): Returned position embedding with shape
[bs, num_feats*2, h, w].
"""
l, h, w = mask.shape[-3:]
x = torch.arange(w, device=mask.device)
y = torch.arange(h, device=mask.device)
z = torch.arange(l, device=mask.device)
x_embed = self.col_embed(x)
y_embed = self.row_embed(y)
z_embed = self.height_embed(z)
pos = torch.cat(
(x_embed.unsqueeze(0).unsqueeze(0).repeat(l, h, 1, 1),
y_embed.unsqueeze(1).unsqueeze(0).repeat(l, 1, w, 1),
z_embed.unsqueeze(1).unsqueeze(1).repeat(1, h, w, 1)),dim=-1).permute(3, 0, 1, 2).unsqueeze(0).repeat(mask.shape[0],1, 1, 1, 1)
return pos
def __repr__(self):
"""str: a string that describes the module"""
repr_str = self.__class__.__name__
repr_str += f'(num_feats={self.num_feats}, '
repr_str += f'row_num_embed={self.row_num_embed}, '
repr_str += f'col_num_embed={self.col_num_embed})'
repr_str += f'height_num_embed={self.height_num_embed})'
return repr_str
import torch
from torchvision.utils import make_grid
import torchvision
import matplotlib.pyplot as plt
import cv2
def convert_color(img_path):
plt.figure()
img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
plt.imsave(img_path, img, cmap=plt.get_cmap('viridis'))
plt.close()
def save_tensor(tensor, path, pad_value=254.0,):
print('save_tensor', path)
tensor = tensor.to(torch.float).detach().cpu()
if tensor.type() == 'torch.BoolTensor':
tensor = tensor*255
if len(tensor.shape) == 3:
tensor = tensor.unsqueeze(1)
tensor = make_grid(tensor, pad_value=pad_value, normalize=False).permute(1, 2, 0).numpy().copy()
torchvision.utils.save_image(torch.tensor(tensor).permute(2, 0, 1), path)
convert_color(path)
# ---------------------------------------------
# Copyright (c) OpenMMLab. All rights reserved.
# ---------------------------------------------
# Modified by Zhiqi Li
# ---------------------------------------------
from __future__ import division
import argparse
import copy
import mmcv
import os
import time
import torch
import warnings
from mmcv import Config, DictAction
from mmcv.runner import get_dist_info, init_dist
from os import path as osp
from mmdet import __version__ as mmdet_version
from mmdet3d import __version__ as mmdet3d_version
#from mmdet3d.apis import train_model
from mmdet3d.datasets import build_dataset
from mmdet3d.models import build_model
from mmdet3d.utils import collect_env, get_root_logger
from mmdet.apis import set_random_seed
from mmseg import __version__ as mmseg_version
from mmcv.utils import TORCH_VERSION, digit_version
def parse_args():
parser = argparse.ArgumentParser(description='Train a detector')
parser.add_argument('config', help='train config file path')
parser.add_argument('--work-dir', help='the dir to save logs and models')
parser.add_argument(
'--resume-from', help='the checkpoint file to resume from')
parser.add_argument(
'--no-validate',
action='store_true',
help='whether not to evaluate the checkpoint during training')
group_gpus = parser.add_mutually_exclusive_group()
group_gpus.add_argument(
'--gpus',
type=int,
help='number of gpus to use '
'(only applicable to non-distributed training)')
group_gpus.add_argument(
'--gpu-ids',
type=int,
nargs='+',
help='ids of gpus to use '
'(only applicable to non-distributed training)')
parser.add_argument('--seed', type=int, default=0, help='random seed')
parser.add_argument(
'--deterministic',
action='store_true',
help='whether to set deterministic options for CUDNN backend.')
parser.add_argument(
'--options',
nargs='+',
action=DictAction,
help='override some settings in the used config, the key-value pair '
'in xxx=yyy format will be merged into config file (deprecate), '
'change to --cfg-options instead.')
parser.add_argument(
'--cfg-options',
nargs='+',
action=DictAction,
help='override some settings in the used config, the key-value pair '
'in xxx=yyy format will be merged into config file. If the value to '
'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
'Note that the quotation marks are necessary and that no white space '
'is allowed.')
parser.add_argument(
'--launcher',
choices=['none', 'pytorch', 'slurm', 'mpi'],
default='none',
help='job launcher')
parser.add_argument('--local_rank', type=int, default=0)
parser.add_argument(
'--autoscale-lr',
action='store_true',
help='automatically scale lr with the number of gpus')
args = parser.parse_args()
if 'LOCAL_RANK' not in os.environ:
os.environ['LOCAL_RANK'] = str(args.local_rank)
if args.options and args.cfg_options:
raise ValueError(
'--options and --cfg-options cannot be both specified, '
'--options is deprecated in favor of --cfg-options')
if args.options:
warnings.warn('--options is deprecated in favor of --cfg-options')
args.cfg_options = args.options
return args
def main():
args = parse_args()
cfg = Config.fromfile(args.config)
if args.cfg_options is not None:
cfg.merge_from_dict(args.cfg_options)
# import modules from string list.
if cfg.get('custom_imports', None):
from mmcv.utils import import_modules_from_strings
import_modules_from_strings(**cfg['custom_imports'])
# import modules from plguin/xx, registry will be updated
if hasattr(cfg, 'plugin'):
if cfg.plugin:
import importlib
if hasattr(cfg, 'plugin_dir'):
plugin_dir = cfg.plugin_dir
_module_dir = os.path.dirname(plugin_dir)
_module_dir = _module_dir.split('/')
_module_path = _module_dir[0]
for m in _module_dir[1:]:
_module_path = _module_path + '.' + m
print(_module_path)
plg_lib = importlib.import_module(_module_path)
else:
# import dir is the dirpath for the config file
_module_dir = os.path.dirname(args.config)
_module_dir = _module_dir.split('/')
_module_path = _module_dir[0]
for m in _module_dir[1:]:
_module_path = _module_path + '.' + m
print(_module_path)
plg_lib = importlib.import_module(_module_path)
from projects.mmdet3d_plugin.bevformer.apis.train import custom_train_model
# set cudnn_benchmark
if cfg.get('cudnn_benchmark', False):
torch.backends.cudnn.benchmark = True
# work_dir is determined in this priority: CLI > segment in file > filename
if args.work_dir is not None:
# update configs according to CLI args if args.work_dir is not None
cfg.work_dir = args.work_dir
elif cfg.get('work_dir', None) is None:
# use config filename as default work_dir if cfg.work_dir is None
cfg.work_dir = osp.join('./work_dirs',
osp.splitext(osp.basename(args.config))[0])
# if args.resume_from is not None:
if args.resume_from is not None and osp.isfile(args.resume_from):
cfg.resume_from = args.resume_from
if args.gpu_ids is not None:
cfg.gpu_ids = args.gpu_ids
else:
cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus)
if digit_version(TORCH_VERSION) == digit_version('1.8.1') and cfg.optimizer['type'] == 'AdamW':
cfg.optimizer['type'] = 'AdamW2' # fix bug in Adamw
if args.autoscale_lr:
# apply the linear scaling rule (https://arxiv.org/abs/1706.02677)
cfg.optimizer['lr'] = cfg.optimizer['lr'] * len(cfg.gpu_ids) / 8
# init distributed env first, since logger depends on the dist info.
if args.launcher == 'none':
distributed = False
else:
distributed = True
init_dist(args.launcher, **cfg.dist_params)
# re-set gpu_ids with distributed training mode
_, world_size = get_dist_info()
cfg.gpu_ids = range(world_size)
# create work_dir
mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
# dump config
cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config)))
# init the logger before other steps
timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
log_file = osp.join(cfg.work_dir, f'{timestamp}.log')
# specify logger name, if we still use 'mmdet', the output info will be
# filtered and won't be saved in the log_file
# TODO: ugly workaround to judge whether we are training det or seg model
if cfg.model.type in ['EncoderDecoder3D']:
logger_name = 'mmseg'
else:
logger_name = 'mmdet'
logger = get_root_logger(
log_file=log_file, log_level=cfg.log_level, name=logger_name)
# init the meta dict to record some important information such as
# environment info and seed, which will be logged
meta = dict()
# log env info
env_info_dict = collect_env()
env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()])
dash_line = '-' * 60 + '\n'
logger.info('Environment info:\n' + dash_line + env_info + '\n' +
dash_line)
meta['env_info'] = env_info
meta['config'] = cfg.pretty_text
# log some basic info
logger.info(f'Distributed training: {distributed}')
logger.info(f'Config:\n{cfg.pretty_text}')
# set random seeds
if args.seed is not None:
logger.info(f'Set random seed to {args.seed}, '
f'deterministic: {args.deterministic}')
set_random_seed(args.seed, deterministic=args.deterministic)
cfg.seed = args.seed
meta['seed'] = args.seed
meta['exp_name'] = osp.basename(args.config)
model = build_model(
cfg.model,
train_cfg=cfg.get('train_cfg'),
test_cfg=cfg.get('test_cfg'))
model.init_weights()
logger.info(f'Model:\n{model}')
datasets = [build_dataset(cfg.data.train)]
if len(cfg.workflow) == 2:
val_dataset = copy.deepcopy(cfg.data.val)
# in case we use a dataset wrapper
if 'dataset' in cfg.data.train:
val_dataset.pipeline = cfg.data.train.dataset.pipeline
else:
val_dataset.pipeline = cfg.data.train.pipeline
# set test_mode=False here in deep copied config
# which do not affect AP/AR calculation later
# refer to https://mmdetection3d.readthedocs.io/en/latest/tutorials/customize_runtime.html#customize-workflow # noqa
val_dataset.test_mode = False
datasets.append(build_dataset(val_dataset))
if cfg.checkpoint_config is not None:
# save mmdet version, config file content and class names in
# checkpoints as meta data
cfg.checkpoint_config.meta = dict(
mmdet_version=mmdet_version,
mmseg_version=mmseg_version,
mmdet3d_version=mmdet3d_version,
config=cfg.pretty_text,
CLASSES=datasets[0].CLASSES,
PALETTE=datasets[0].PALETTE # for segmentors
if hasattr(datasets[0], 'PALETTE') else None)
# add an attribute for visualization convenience
model.CLASSES = datasets[0].CLASSES
custom_train_model(
model,
datasets,
cfg,
distributed=distributed,
validate=(not args.no_validate),
timestamp=timestamp,
meta=meta)
if __name__ == '__main__':
main()
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import json
import numpy as np
import seaborn as sns
from collections import defaultdict
from matplotlib import pyplot as plt
def cal_train_time(log_dicts, args):
for i, log_dict in enumerate(log_dicts):
print(f'{"-" * 5}Analyze train time of {args.json_logs[i]}{"-" * 5}')
all_times = []
for epoch in log_dict.keys():
if args.include_outliers:
all_times.append(log_dict[epoch]['time'])
else:
all_times.append(log_dict[epoch]['time'][1:])
all_times = np.array(all_times)
epoch_ave_time = all_times.mean(-1)
slowest_epoch = epoch_ave_time.argmax()
fastest_epoch = epoch_ave_time.argmin()
std_over_epoch = epoch_ave_time.std()
print(f'slowest epoch {slowest_epoch + 1}, '
f'average time is {epoch_ave_time[slowest_epoch]:.4f}')
print(f'fastest epoch {fastest_epoch + 1}, '
f'average time is {epoch_ave_time[fastest_epoch]:.4f}')
print(f'time std over epochs is {std_over_epoch:.4f}')
print(f'average iter time: {np.mean(all_times):.4f} s/iter')
print()
def plot_curve(log_dicts, args):
if args.backend is not None:
plt.switch_backend(args.backend)
sns.set_style(args.style)
# if legend is None, use {filename}_{key} as legend
legend = args.legend
if legend is None:
legend = []
for json_log in args.json_logs:
for metric in args.keys:
legend.append(f'{json_log}_{metric}')
assert len(legend) == (len(args.json_logs) * len(args.keys))
metrics = args.keys
num_metrics = len(metrics)
for i, log_dict in enumerate(log_dicts):
epochs = list(log_dict.keys())
for j, metric in enumerate(metrics):
print(f'plot curve of {args.json_logs[i]}, metric is {metric}')
if metric not in log_dict[epochs[args.interval - 1]]:
raise KeyError(
f'{args.json_logs[i]} does not contain metric {metric}')
if args.mode == 'eval':
if min(epochs) == args.interval:
x0 = args.interval
else:
# if current training is resumed from previous checkpoint
# we lost information in early epochs
# `xs` should start according to `min(epochs)`
if min(epochs) % args.interval == 0:
x0 = min(epochs)
else:
# find the first epoch that do eval
x0 = min(epochs) + args.interval - \
min(epochs) % args.interval
xs = np.arange(x0, max(epochs) + 1, args.interval)
ys = []
for epoch in epochs[args.interval - 1::args.interval]:
ys += log_dict[epoch][metric]
# if training is aborted before eval of the last epoch
# `xs` and `ys` will have different length and cause an error
# check if `ys[-1]` is empty here
if not log_dict[epoch][metric]:
xs = xs[:-1]
ax = plt.gca()
ax.set_xticks(xs)
plt.xlabel('epoch')
plt.plot(xs, ys, label=legend[i * num_metrics + j], marker='o')
else:
xs = []
ys = []
num_iters_per_epoch = \
log_dict[epochs[args.interval-1]]['iter'][-1]
for epoch in epochs[args.interval - 1::args.interval]:
iters = log_dict[epoch]['iter']
if log_dict[epoch]['mode'][-1] == 'val':
iters = iters[:-1]
xs.append(
np.array(iters) + (epoch - 1) * num_iters_per_epoch)
ys.append(np.array(log_dict[epoch][metric][:len(iters)]))
xs = np.concatenate(xs)
ys = np.concatenate(ys)
plt.xlabel('iter')
plt.plot(
xs, ys, label=legend[i * num_metrics + j], linewidth=0.5)
plt.legend()
if args.title is not None:
plt.title(args.title)
if args.out is None:
plt.show()
else:
print(f'save curve to: {args.out}')
plt.savefig(args.out)
plt.cla()
def add_plot_parser(subparsers):
parser_plt = subparsers.add_parser(
'plot_curve', help='parser for plotting curves')
parser_plt.add_argument(
'json_logs',
type=str,
nargs='+',
help='path of train log in json format')
parser_plt.add_argument(
'--keys',
type=str,
nargs='+',
default=['mAP_0.25'],
help='the metric that you want to plot')
parser_plt.add_argument('--title', type=str, help='title of figure')
parser_plt.add_argument(
'--legend',
type=str,
nargs='+',
default=None,
help='legend of each plot')
parser_plt.add_argument(
'--backend', type=str, default=None, help='backend of plt')
parser_plt.add_argument(
'--style', type=str, default='dark', help='style of plt')
parser_plt.add_argument('--out', type=str, default=None)
parser_plt.add_argument('--mode', type=str, default='train')
parser_plt.add_argument('--interval', type=int, default=1)
def add_time_parser(subparsers):
parser_time = subparsers.add_parser(
'cal_train_time',
help='parser for computing the average time per training iteration')
parser_time.add_argument(
'json_logs',
type=str,
nargs='+',
help='path of train log in json format')
parser_time.add_argument(
'--include-outliers',
action='store_true',
help='include the first value of every epoch when computing '
'the average time')
def parse_args():
parser = argparse.ArgumentParser(description='Analyze Json Log')
# currently only support plot curve and calculate average train time
subparsers = parser.add_subparsers(dest='task', help='task parser')
add_plot_parser(subparsers)
add_time_parser(subparsers)
args = parser.parse_args()
return args
def load_json_logs(json_logs):
# load and convert json_logs to log_dict, key is epoch, value is a sub dict
# keys of sub dict is different metrics, e.g. memory, bbox_mAP
# value of sub dict is a list of corresponding values of all iterations
log_dicts = [dict() for _ in json_logs]
for json_log, log_dict in zip(json_logs, log_dicts):
with open(json_log, 'r') as log_file:
for line in log_file:
log = json.loads(line.strip())
# skip lines without `epoch` field
if 'epoch' not in log:
continue
epoch = log.pop('epoch')
if epoch not in log_dict:
log_dict[epoch] = defaultdict(list)
for k, v in log.items():
log_dict[epoch][k].append(v)
return log_dicts
def main():
args = parse_args()
json_logs = args.json_logs
for json_log in json_logs:
assert json_log.endswith('.json')
log_dicts = load_json_logs(json_logs)
eval(args.task)(log_dicts, args)
if __name__ == '__main__':
main()
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import time
import torch
from mmcv import Config
from mmcv.parallel import MMDataParallel
from mmcv.runner import load_checkpoint, wrap_fp16_model
import sys
sys.path.append('.')
from projects.mmdet3d_plugin.datasets.builder import build_dataloader
from projects.mmdet3d_plugin.datasets import custom_build_dataset
# from mmdet3d.datasets import build_dataloader, build_dataset
from mmdet3d.models import build_detector
#from tools.misc.fuse_conv_bn import fuse_module
def parse_args():
parser = argparse.ArgumentParser(description='MMDet benchmark a model')
parser.add_argument('config', help='test config file path')
parser.add_argument('--checkpoint', default=None, help='checkpoint file')
parser.add_argument('--samples', default=2000, help='samples to benchmark')
parser.add_argument(
'--log-interval', default=50, help='interval of logging')
parser.add_argument(
'--fuse-conv-bn',
action='store_true',
help='Whether to fuse conv and bn, this will slightly increase'
'the inference speed')
args = parser.parse_args()
return args
def main():
args = parse_args()
cfg = Config.fromfile(args.config)
# set cudnn_benchmark
if cfg.get('cudnn_benchmark', False):
torch.backends.cudnn.benchmark = True
cfg.model.pretrained = None
cfg.data.test.test_mode = True
# build the dataloader
# TODO: support multiple images per gpu (only minor changes are needed)
print(cfg.data.test)
dataset = custom_build_dataset(cfg.data.test)
data_loader = build_dataloader(
dataset,
samples_per_gpu=1,
workers_per_gpu=cfg.data.workers_per_gpu,
dist=False,
shuffle=False)
# build the model and load checkpoint
cfg.model.train_cfg = None
model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg'))
fp16_cfg = cfg.get('fp16', None)
if fp16_cfg is not None:
wrap_fp16_model(model)
if args.checkpoint is not None:
load_checkpoint(model, args.checkpoint, map_location='cpu')
#if args.fuse_conv_bn:
# model = fuse_module(model)
model = MMDataParallel(model, device_ids=[0])
model.eval()
# the first several iterations may be very slow so skip them
num_warmup = 5
pure_inf_time = 0
# benchmark with several samples and take the average
for i, data in enumerate(data_loader):
torch.cuda.synchronize()
start_time = time.perf_counter()
with torch.no_grad():
model(return_loss=False, rescale=True, **data)
torch.cuda.synchronize()
elapsed = time.perf_counter() - start_time
if i >= num_warmup:
pure_inf_time += elapsed
if (i + 1) % args.log_interval == 0:
fps = (i + 1 - num_warmup) / pure_inf_time
print(f'Done image [{i + 1:<3}/ {args.samples}], '
f'fps: {fps:.1f} img / s')
if (i + 1) == args.samples:
pure_inf_time += elapsed
fps = (i + 1 - num_warmup) / pure_inf_time
print(f'Overall fps: {fps:.1f} img / s')
break
if __name__ == '__main__':
main()
import torch
file_path = './ckpts/bevformer_v4.pth'
model = torch.load(file_path, map_location='cpu')
all = 0
for key in list(model['state_dict'].keys()):
all += model['state_dict'][key].nelement()
print(all)
# smaller 63374123
# v4 69140395
# Based on https://github.com/nutonomy/nuscenes-devkit
# ---------------------------------------------
# Modified by Zhiqi Li
# ---------------------------------------------
import mmcv
from nuscenes.nuscenes import NuScenes
from PIL import Image
from nuscenes.utils.geometry_utils import view_points, box_in_image, BoxVisibility, transform_matrix
from typing import Tuple, List, Iterable
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
from matplotlib import rcParams
from matplotlib.axes import Axes
from pyquaternion import Quaternion
from PIL import Image
from matplotlib import rcParams
from matplotlib.axes import Axes
from pyquaternion import Quaternion
from tqdm import tqdm
from nuscenes.utils.data_classes import LidarPointCloud, RadarPointCloud, Box
from nuscenes.utils.geometry_utils import view_points, box_in_image, BoxVisibility, transform_matrix
from nuscenes.eval.common.data_classes import EvalBoxes, EvalBox
from nuscenes.eval.detection.data_classes import DetectionBox
from nuscenes.eval.detection.utils import category_to_detection_name
from nuscenes.eval.detection.render import visualize_sample
cams = ['CAM_FRONT',
'CAM_FRONT_RIGHT',
'CAM_BACK_RIGHT',
'CAM_BACK',
'CAM_BACK_LEFT',
'CAM_FRONT_LEFT']
import numpy as np
import matplotlib.pyplot as plt
from nuscenes.utils.data_classes import LidarPointCloud, RadarPointCloud, Box
from PIL import Image
from matplotlib import rcParams
def render_annotation(
anntoken: str,
margin: float = 10,
view: np.ndarray = np.eye(4),
box_vis_level: BoxVisibility = BoxVisibility.ANY,
out_path: str = 'render.png',
extra_info: bool = False) -> None:
"""
Render selected annotation.
:param anntoken: Sample_annotation token.
:param margin: How many meters in each direction to include in LIDAR view.
:param view: LIDAR view point.
:param box_vis_level: If sample_data is an image, this sets required visibility for boxes.
:param out_path: Optional path to save the rendered figure to disk.
:param extra_info: Whether to render extra information below camera view.
"""
ann_record = nusc.get('sample_annotation', anntoken)
sample_record = nusc.get('sample', ann_record['sample_token'])
assert 'LIDAR_TOP' in sample_record['data'].keys(), 'Error: No LIDAR_TOP in data, unable to render.'
# Figure out which camera the object is fully visible in (this may return nothing).
boxes, cam = [], []
cams = [key for key in sample_record['data'].keys() if 'CAM' in key]
all_bboxes = []
select_cams = []
for cam in cams:
_, boxes, _ = nusc.get_sample_data(sample_record['data'][cam], box_vis_level=box_vis_level,
selected_anntokens=[anntoken])
if len(boxes) > 0:
all_bboxes.append(boxes)
select_cams.append(cam)
# We found an image that matches. Let's abort.
# assert len(boxes) > 0, 'Error: Could not find image where annotation is visible. ' \
# 'Try using e.g. BoxVisibility.ANY.'
# assert len(boxes) < 2, 'Error: Found multiple annotations. Something is wrong!'
num_cam = len(all_bboxes)
fig, axes = plt.subplots(1, num_cam + 1, figsize=(18, 9))
select_cams = [sample_record['data'][cam] for cam in select_cams]
print('bbox in cams:', select_cams)
# Plot LIDAR view.
lidar = sample_record['data']['LIDAR_TOP']
data_path, boxes, camera_intrinsic = nusc.get_sample_data(lidar, selected_anntokens=[anntoken])
LidarPointCloud.from_file(data_path).render_height(axes[0], view=view)
for box in boxes:
c = np.array(get_color(box.name)) / 255.0
box.render(axes[0], view=view, colors=(c, c, c))
corners = view_points(boxes[0].corners(), view, False)[:2, :]
axes[0].set_xlim([np.min(corners[0, :]) - margin, np.max(corners[0, :]) + margin])
axes[0].set_ylim([np.min(corners[1, :]) - margin, np.max(corners[1, :]) + margin])
axes[0].axis('off')
axes[0].set_aspect('equal')
# Plot CAMERA view.
for i in range(1, num_cam + 1):
cam = select_cams[i - 1]
data_path, boxes, camera_intrinsic = nusc.get_sample_data(cam, selected_anntokens=[anntoken])
im = Image.open(data_path)
axes[i].imshow(im)
axes[i].set_title(nusc.get('sample_data', cam)['channel'])
axes[i].axis('off')
axes[i].set_aspect('equal')
for box in boxes:
c = np.array(get_color(box.name)) / 255.0
box.render(axes[i], view=camera_intrinsic, normalize=True, colors=(c, c, c))
# Print extra information about the annotation below the camera view.
axes[i].set_xlim(0, im.size[0])
axes[i].set_ylim(im.size[1], 0)
if extra_info:
rcParams['font.family'] = 'monospace'
w, l, h = ann_record['size']
category = ann_record['category_name']
lidar_points = ann_record['num_lidar_pts']
radar_points = ann_record['num_radar_pts']
sample_data_record = nusc.get('sample_data', sample_record['data']['LIDAR_TOP'])
pose_record = nusc.get('ego_pose', sample_data_record['ego_pose_token'])
dist = np.linalg.norm(np.array(pose_record['translation']) - np.array(ann_record['translation']))
information = ' \n'.join(['category: {}'.format(category),
'',
'# lidar points: {0:>4}'.format(lidar_points),
'# radar points: {0:>4}'.format(radar_points),
'',
'distance: {:>7.3f}m'.format(dist),
'',
'width: {:>7.3f}m'.format(w),
'length: {:>7.3f}m'.format(l),
'height: {:>7.3f}m'.format(h)])
plt.annotate(information, (0, 0), (0, -20), xycoords='axes fraction', textcoords='offset points', va='top')
if out_path is not None:
plt.savefig(out_path)
def get_sample_data(sample_data_token: str,
box_vis_level: BoxVisibility = BoxVisibility.ANY,
selected_anntokens=None,
use_flat_vehicle_coordinates: bool = False):
"""
Returns the data path as well as all annotations related to that sample_data.
Note that the boxes are transformed into the current sensor's coordinate frame.
:param sample_data_token: Sample_data token.
:param box_vis_level: If sample_data is an image, this sets required visibility for boxes.
:param selected_anntokens: If provided only return the selected annotation.
:param use_flat_vehicle_coordinates: Instead of the current sensor's coordinate frame, use ego frame which is
aligned to z-plane in the world.
:return: (data_path, boxes, camera_intrinsic <np.array: 3, 3>)
"""
# Retrieve sensor & pose records
sd_record = nusc.get('sample_data', sample_data_token)
cs_record = nusc.get('calibrated_sensor', sd_record['calibrated_sensor_token'])
sensor_record = nusc.get('sensor', cs_record['sensor_token'])
pose_record = nusc.get('ego_pose', sd_record['ego_pose_token'])
data_path = nusc.get_sample_data_path(sample_data_token)
if sensor_record['modality'] == 'camera':
cam_intrinsic = np.array(cs_record['camera_intrinsic'])
imsize = (sd_record['width'], sd_record['height'])
else:
cam_intrinsic = None
imsize = None
# Retrieve all sample annotations and map to sensor coordinate system.
if selected_anntokens is not None:
boxes = list(map(nusc.get_box, selected_anntokens))
else:
boxes = nusc.get_boxes(sample_data_token)
# Make list of Box objects including coord system transforms.
box_list = []
for box in boxes:
if use_flat_vehicle_coordinates:
# Move box to ego vehicle coord system parallel to world z plane.
yaw = Quaternion(pose_record['rotation']).yaw_pitch_roll[0]
box.translate(-np.array(pose_record['translation']))
box.rotate(Quaternion(scalar=np.cos(yaw / 2), vector=[0, 0, np.sin(yaw / 2)]).inverse)
else:
# Move box to ego vehicle coord system.
box.translate(-np.array(pose_record['translation']))
box.rotate(Quaternion(pose_record['rotation']).inverse)
# Move box to sensor coord system.
box.translate(-np.array(cs_record['translation']))
box.rotate(Quaternion(cs_record['rotation']).inverse)
if sensor_record['modality'] == 'camera' and not \
box_in_image(box, cam_intrinsic, imsize, vis_level=box_vis_level):
continue
box_list.append(box)
return data_path, box_list, cam_intrinsic
def get_predicted_data(sample_data_token: str,
box_vis_level: BoxVisibility = BoxVisibility.ANY,
selected_anntokens=None,
use_flat_vehicle_coordinates: bool = False,
pred_anns=None
):
"""
Returns the data path as well as all annotations related to that sample_data.
Note that the boxes are transformed into the current sensor's coordinate frame.
:param sample_data_token: Sample_data token.
:param box_vis_level: If sample_data is an image, this sets required visibility for boxes.
:param selected_anntokens: If provided only return the selected annotation.
:param use_flat_vehicle_coordinates: Instead of the current sensor's coordinate frame, use ego frame which is
aligned to z-plane in the world.
:return: (data_path, boxes, camera_intrinsic <np.array: 3, 3>)
"""
# Retrieve sensor & pose records
sd_record = nusc.get('sample_data', sample_data_token)
cs_record = nusc.get('calibrated_sensor', sd_record['calibrated_sensor_token'])
sensor_record = nusc.get('sensor', cs_record['sensor_token'])
pose_record = nusc.get('ego_pose', sd_record['ego_pose_token'])
data_path = nusc.get_sample_data_path(sample_data_token)
if sensor_record['modality'] == 'camera':
cam_intrinsic = np.array(cs_record['camera_intrinsic'])
imsize = (sd_record['width'], sd_record['height'])
else:
cam_intrinsic = None
imsize = None
# Retrieve all sample annotations and map to sensor coordinate system.
# if selected_anntokens is not None:
# boxes = list(map(nusc.get_box, selected_anntokens))
# else:
# boxes = nusc.get_boxes(sample_data_token)
boxes = pred_anns
# Make list of Box objects including coord system transforms.
box_list = []
for box in boxes:
if use_flat_vehicle_coordinates:
# Move box to ego vehicle coord system parallel to world z plane.
yaw = Quaternion(pose_record['rotation']).yaw_pitch_roll[0]
box.translate(-np.array(pose_record['translation']))
box.rotate(Quaternion(scalar=np.cos(yaw / 2), vector=[0, 0, np.sin(yaw / 2)]).inverse)
else:
# Move box to ego vehicle coord system.
box.translate(-np.array(pose_record['translation']))
box.rotate(Quaternion(pose_record['rotation']).inverse)
# Move box to sensor coord system.
box.translate(-np.array(cs_record['translation']))
box.rotate(Quaternion(cs_record['rotation']).inverse)
if sensor_record['modality'] == 'camera' and not \
box_in_image(box, cam_intrinsic, imsize, vis_level=box_vis_level):
continue
box_list.append(box)
return data_path, box_list, cam_intrinsic
def lidiar_render(sample_token, data,out_path=None):
bbox_gt_list = []
bbox_pred_list = []
anns = nusc.get('sample', sample_token)['anns']
for ann in anns:
content = nusc.get('sample_annotation', ann)
try:
bbox_gt_list.append(DetectionBox(
sample_token=content['sample_token'],
translation=tuple(content['translation']),
size=tuple(content['size']),
rotation=tuple(content['rotation']),
velocity=nusc.box_velocity(content['token'])[:2],
ego_translation=(0.0, 0.0, 0.0) if 'ego_translation' not in content
else tuple(content['ego_translation']),
num_pts=-1 if 'num_pts' not in content else int(content['num_pts']),
detection_name=category_to_detection_name(content['category_name']),
detection_score=-1.0 if 'detection_score' not in content else float(content['detection_score']),
attribute_name=''))
except:
pass
bbox_anns = data['results'][sample_token]
for content in bbox_anns:
bbox_pred_list.append(DetectionBox(
sample_token=content['sample_token'],
translation=tuple(content['translation']),
size=tuple(content['size']),
rotation=tuple(content['rotation']),
velocity=tuple(content['velocity']),
ego_translation=(0.0, 0.0, 0.0) if 'ego_translation' not in content
else tuple(content['ego_translation']),
num_pts=-1 if 'num_pts' not in content else int(content['num_pts']),
detection_name=content['detection_name'],
detection_score=-1.0 if 'detection_score' not in content else float(content['detection_score']),
attribute_name=content['attribute_name']))
gt_annotations = EvalBoxes()
pred_annotations = EvalBoxes()
gt_annotations.add_boxes(sample_token, bbox_gt_list)
pred_annotations.add_boxes(sample_token, bbox_pred_list)
print('green is ground truth')
print('blue is the predited result')
visualize_sample(nusc, sample_token, gt_annotations, pred_annotations, savepath=out_path+'_bev')
def get_color(category_name: str):
"""
Provides the default colors based on the category names.
This method works for the general nuScenes categories, as well as the nuScenes detection categories.
"""
a = ['noise', 'animal', 'human.pedestrian.adult', 'human.pedestrian.child', 'human.pedestrian.construction_worker',
'human.pedestrian.personal_mobility', 'human.pedestrian.police_officer', 'human.pedestrian.stroller',
'human.pedestrian.wheelchair', 'movable_object.barrier', 'movable_object.debris',
'movable_object.pushable_pullable', 'movable_object.trafficcone', 'static_object.bicycle_rack', 'vehicle.bicycle',
'vehicle.bus.bendy', 'vehicle.bus.rigid', 'vehicle.car', 'vehicle.construction', 'vehicle.emergency.ambulance',
'vehicle.emergency.police', 'vehicle.motorcycle', 'vehicle.trailer', 'vehicle.truck', 'flat.driveable_surface',
'flat.other', 'flat.sidewalk', 'flat.terrain', 'static.manmade', 'static.other', 'static.vegetation',
'vehicle.ego']
class_names = [
'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
]
#print(category_name)
if category_name == 'bicycle':
return nusc.colormap['vehicle.bicycle']
elif category_name == 'construction_vehicle':
return nusc.colormap['vehicle.construction']
elif category_name == 'traffic_cone':
return nusc.colormap['movable_object.trafficcone']
for key in nusc.colormap.keys():
if category_name in key:
return nusc.colormap[key]
return [0, 0, 0]
def render_sample_data(
sample_toekn: str,
with_anns: bool = True,
box_vis_level: BoxVisibility = BoxVisibility.ANY,
axes_limit: float = 40,
ax=None,
nsweeps: int = 1,
out_path: str = None,
underlay_map: bool = True,
use_flat_vehicle_coordinates: bool = True,
show_lidarseg: bool = False,
show_lidarseg_legend: bool = False,
filter_lidarseg_labels=None,
lidarseg_preds_bin_path: str = None,
verbose: bool = True,
show_panoptic: bool = False,
pred_data=None,
) -> None:
"""
Render sample data onto axis.
:param sample_data_token: Sample_data token.
:param with_anns: Whether to draw box annotations.
:param box_vis_level: If sample_data is an image, this sets required visibility for boxes.
:param axes_limit: Axes limit for lidar and radar (measured in meters).
:param ax: Axes onto which to render.
:param nsweeps: Number of sweeps for lidar and radar.
:param out_path: Optional path to save the rendered figure to disk.
:param underlay_map: When set to true, lidar data is plotted onto the map. This can be slow.
:param use_flat_vehicle_coordinates: Instead of the current sensor's coordinate frame, use ego frame which is
aligned to z-plane in the world. Note: Previously this method did not use flat vehicle coordinates, which
can lead to small errors when the vertical axis of the global frame and lidar are not aligned. The new
setting is more correct and rotates the plot by ~90 degrees.
:param show_lidarseg: When set to True, the lidar data is colored with the segmentation labels. When set
to False, the colors of the lidar data represent the distance from the center of the ego vehicle.
:param show_lidarseg_legend: Whether to display the legend for the lidarseg labels in the frame.
:param filter_lidarseg_labels: Only show lidar points which belong to the given list of classes. If None
or the list is empty, all classes will be displayed.
:param lidarseg_preds_bin_path: A path to the .bin file which contains the user's lidar segmentation
predictions for the sample.
:param verbose: Whether to display the image after it is rendered.
:param show_panoptic: When set to True, the lidar data is colored with the panoptic labels. When set
to False, the colors of the lidar data represent the distance from the center of the ego vehicle.
If show_lidarseg is True, show_panoptic will be set to False.
"""
lidiar_render(sample_toekn, pred_data, out_path=out_path)
sample = nusc.get('sample', sample_toekn)
# sample = data['results'][sample_token_list[0]][0]
cams = [
'CAM_FRONT_LEFT',
'CAM_FRONT',
'CAM_FRONT_RIGHT',
'CAM_BACK_LEFT',
'CAM_BACK',
'CAM_BACK_RIGHT',
]
if ax is None:
_, ax = plt.subplots(4, 3, figsize=(24, 18))
j = 0
for ind, cam in enumerate(cams):
sample_data_token = sample['data'][cam]
sd_record = nusc.get('sample_data', sample_data_token)
sensor_modality = sd_record['sensor_modality']
if sensor_modality in ['lidar', 'radar']:
assert False
elif sensor_modality == 'camera':
# Load boxes and image.
boxes = [Box(record['translation'], record['size'], Quaternion(record['rotation']),
name=record['detection_name'], token='predicted') for record in
pred_data['results'][sample_toekn] if record['detection_score'] > 0.2]
data_path, boxes_pred, camera_intrinsic = get_predicted_data(sample_data_token,
box_vis_level=box_vis_level, pred_anns=boxes)
_, boxes_gt, _ = nusc.get_sample_data(sample_data_token, box_vis_level=box_vis_level)
if ind == 3:
j += 1
ind = ind % 3
data = Image.open(data_path)
# mmcv.imwrite(np.array(data)[:,:,::-1], f'{cam}.png')
# Init axes.
# Show image.
ax[j, ind].imshow(data)
ax[j + 2, ind].imshow(data)
# Show boxes.
if with_anns:
for box in boxes_pred:
c = np.array(get_color(box.name)) / 255.0
box.render(ax[j, ind], view=camera_intrinsic, normalize=True, colors=(c, c, c))
for box in boxes_gt:
c = np.array(get_color(box.name)) / 255.0
box.render(ax[j + 2, ind], view=camera_intrinsic, normalize=True, colors=(c, c, c))
# Limit visible range.
ax[j, ind].set_xlim(0, data.size[0])
ax[j, ind].set_ylim(data.size[1], 0)
ax[j + 2, ind].set_xlim(0, data.size[0])
ax[j + 2, ind].set_ylim(data.size[1], 0)
else:
raise ValueError("Error: Unknown sensor modality!")
ax[j, ind].axis('off')
ax[j, ind].set_title('PRED: {} {labels_type}'.format(
sd_record['channel'], labels_type='(predictions)' if lidarseg_preds_bin_path else ''))
ax[j, ind].set_aspect('equal')
ax[j + 2, ind].axis('off')
ax[j + 2, ind].set_title('GT:{} {labels_type}'.format(
sd_record['channel'], labels_type='(predictions)' if lidarseg_preds_bin_path else ''))
ax[j + 2, ind].set_aspect('equal')
if out_path is not None:
plt.savefig(out_path+'_camera', bbox_inches='tight', pad_inches=0, dpi=200)
if verbose:
plt.show()
plt.close()
if __name__ == '__main__':
nusc = NuScenes(version='v1.0-trainval', dataroot='./data/nuscenes', verbose=True)
# render_annotation('7603b030b42a4b1caa8c443ccc1a7d52')
bevformer_results = mmcv.load('test/bevformer_base/Thu_Jun__9_16_22_37_2022/pts_bbox/results_nusc.json')
sample_token_list = list(bevformer_results['results'].keys())
for id in range(0, 10):
render_sample_data(sample_token_list[id], pred_data=bevformer_results, out_path=sample_token_list[id])
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment