Commit 41b18fd8 authored by zhe chen's avatar zhe chen
Browse files

Use pre-commit to reformat code


Use pre-commit to reformat code
parent ff20ea39
......@@ -2,6 +2,7 @@ import math
import torch
from torch.utils.data import DistributedSampler as _DistributedSampler
from .sampler import SAMPLER
......@@ -33,9 +34,9 @@ class DistributedSampler(_DistributedSampler):
assert len(indices) == self.total_size
# subsample
per_replicas = self.total_size//self.num_replicas
per_replicas = self.total_size // self.num_replicas
# indices = indices[self.rank:self.total_size:self.num_replicas]
indices = indices[self.rank*per_replicas:(self.rank+1)*per_replicas]
indices = indices[self.rank * per_replicas:(self.rank + 1) * per_replicas]
assert len(indices) == self.num_samples
return iter(indices)
# Copyright (c) OpenMMLab. All rights reserved.
import math
......@@ -6,9 +5,8 @@ import numpy as np
import torch
from mmcv.runner import get_dist_info
from torch.utils.data import Sampler
from .sampler import SAMPLER
import random
from IPython import embed
@SAMPLER.register_module()
......@@ -107,4 +105,3 @@ class DistributedGroupSampler(Sampler):
def set_epoch(self, epoch):
self.epoch = epoch
from mmcv.utils.registry import Registry, build_from_cfg
SAMPLER = Registry('sampler')
def build_sampler(cfg, default_args):
return build_from_cfg(cfg, SAMPLER, default_args)
from mmcv.utils.registry import Registry, build_from_cfg
SAMPLER = Registry('sampler')
def build_sampler(cfg, default_args):
return build_from_cfg(cfg, SAMPLER, default_args)
from .vovnet import VoVNet
__all__ = ['VoVNet']
\ No newline at end of file
__all__ = ['VoVNet']
from collections import OrderedDict
from mmcv.runner import BaseModule
from mmdet.models.builder import BACKBONES
import torch
import torch.nn as nn
import torch.nn.functional as F
from mmcv.runner import BaseModule
from mmdet.models.builder import BACKBONES
from torch.nn.modules.batchnorm import _BatchNorm
VoVNet19_slim_dw_eSE = {
'stem': [64, 64, 64],
'stage_conv_ch': [64, 80, 96, 112],
'stage_out_ch': [112, 256, 384, 512],
"layer_per_block": 3,
"block_per_stage": [1, 1, 1, 1],
"eSE": True,
"dw": True
'layer_per_block': 3,
'block_per_stage': [1, 1, 1, 1],
'eSE': True,
'dw': True
}
VoVNet19_dw_eSE = {
'stem': [64, 64, 64],
"stage_conv_ch": [128, 160, 192, 224],
"stage_out_ch": [256, 512, 768, 1024],
"layer_per_block": 3,
"block_per_stage": [1, 1, 1, 1],
"eSE": True,
"dw": True
'stage_conv_ch': [128, 160, 192, 224],
'stage_out_ch': [256, 512, 768, 1024],
'layer_per_block': 3,
'block_per_stage': [1, 1, 1, 1],
'eSE': True,
'dw': True
}
VoVNet19_slim_eSE = {
......@@ -35,57 +34,57 @@ VoVNet19_slim_eSE = {
'layer_per_block': 3,
'block_per_stage': [1, 1, 1, 1],
'eSE': True,
"dw": False
'dw': False
}
VoVNet19_eSE = {
'stem': [64, 64, 128],
"stage_conv_ch": [128, 160, 192, 224],
"stage_out_ch": [256, 512, 768, 1024],
"layer_per_block": 3,
"block_per_stage": [1, 1, 1, 1],
"eSE": True,
"dw": False
'stage_conv_ch': [128, 160, 192, 224],
'stage_out_ch': [256, 512, 768, 1024],
'layer_per_block': 3,
'block_per_stage': [1, 1, 1, 1],
'eSE': True,
'dw': False
}
VoVNet39_eSE = {
'stem': [64, 64, 128],
"stage_conv_ch": [128, 160, 192, 224],
"stage_out_ch": [256, 512, 768, 1024],
"layer_per_block": 5,
"block_per_stage": [1, 1, 2, 2],
"eSE": True,
"dw": False
'stage_conv_ch': [128, 160, 192, 224],
'stage_out_ch': [256, 512, 768, 1024],
'layer_per_block': 5,
'block_per_stage': [1, 1, 2, 2],
'eSE': True,
'dw': False
}
VoVNet57_eSE = {
'stem': [64, 64, 128],
"stage_conv_ch": [128, 160, 192, 224],
"stage_out_ch": [256, 512, 768, 1024],
"layer_per_block": 5,
"block_per_stage": [1, 1, 4, 3],
"eSE": True,
"dw": False
'stage_conv_ch': [128, 160, 192, 224],
'stage_out_ch': [256, 512, 768, 1024],
'layer_per_block': 5,
'block_per_stage': [1, 1, 4, 3],
'eSE': True,
'dw': False
}
VoVNet99_eSE = {
'stem': [64, 64, 128],
"stage_conv_ch": [128, 160, 192, 224],
"stage_out_ch": [256, 512, 768, 1024],
"layer_per_block": 5,
"block_per_stage": [1, 3, 9, 3],
"eSE": True,
"dw": False
'stage_conv_ch': [128, 160, 192, 224],
'stage_out_ch': [256, 512, 768, 1024],
'layer_per_block': 5,
'block_per_stage': [1, 3, 9, 3],
'eSE': True,
'dw': False
}
_STAGE_SPECS = {
"V-19-slim-dw-eSE": VoVNet19_slim_dw_eSE,
"V-19-dw-eSE": VoVNet19_dw_eSE,
"V-19-slim-eSE": VoVNet19_slim_eSE,
"V-19-eSE": VoVNet19_eSE,
"V-39-eSE": VoVNet39_eSE,
"V-57-eSE": VoVNet57_eSE,
"V-99-eSE": VoVNet99_eSE,
'V-19-slim-dw-eSE': VoVNet19_slim_dw_eSE,
'V-19-dw-eSE': VoVNet19_dw_eSE,
'V-19-slim-eSE': VoVNet19_slim_eSE,
'V-19-eSE': VoVNet19_eSE,
'V-39-eSE': VoVNet39_eSE,
'V-57-eSE': VoVNet57_eSE,
'V-99-eSE': VoVNet99_eSE,
}
......@@ -117,7 +116,7 @@ def conv3x3(in_channels, out_channels, module_name, postfix, stride=1, groups=1,
"""3x3 convolution with padding"""
return [
(
f"{module_name}_{postfix}/conv",
f'{module_name}_{postfix}/conv',
nn.Conv2d(
in_channels,
out_channels,
......@@ -128,8 +127,8 @@ def conv3x3(in_channels, out_channels, module_name, postfix, stride=1, groups=1,
bias=False,
),
),
(f"{module_name}_{postfix}/norm", nn.BatchNorm2d(out_channels)),
(f"{module_name}_{postfix}/relu", nn.ReLU(inplace=True)),
(f'{module_name}_{postfix}/norm', nn.BatchNorm2d(out_channels)),
(f'{module_name}_{postfix}/relu', nn.ReLU(inplace=True)),
]
......@@ -137,7 +136,7 @@ def conv1x1(in_channels, out_channels, module_name, postfix, stride=1, groups=1,
"""1x1 convolution with padding"""
return [
(
f"{module_name}_{postfix}/conv",
f'{module_name}_{postfix}/conv',
nn.Conv2d(
in_channels,
out_channels,
......@@ -148,8 +147,8 @@ def conv1x1(in_channels, out_channels, module_name, postfix, stride=1, groups=1,
bias=False,
),
),
(f"{module_name}_{postfix}/norm", nn.BatchNorm2d(out_channels)),
(f"{module_name}_{postfix}/relu", nn.ReLU(inplace=True)),
(f'{module_name}_{postfix}/norm', nn.BatchNorm2d(out_channels)),
(f'{module_name}_{postfix}/relu', nn.ReLU(inplace=True)),
]
......@@ -179,7 +178,7 @@ class eSEModule(nn.Module):
class _OSA_module(nn.Module):
def __init__(
self, in_ch, stage_ch, concat_ch, layer_per_block, module_name, SE=False, identity=False, depthwise=False
self, in_ch, stage_ch, concat_ch, layer_per_block, module_name, SE=False, identity=False, depthwise=False
):
super(_OSA_module, self).__init__()
......@@ -192,7 +191,7 @@ class _OSA_module(nn.Module):
if self.depthwise and in_channel != stage_ch:
self.isReduced = True
self.conv_reduction = nn.Sequential(
OrderedDict(conv1x1(in_channel, stage_ch, "{}_reduction".format(module_name), "0"))
OrderedDict(conv1x1(in_channel, stage_ch, '{}_reduction'.format(module_name), '0'))
)
for i in range(layer_per_block):
if self.depthwise:
......@@ -203,7 +202,7 @@ class _OSA_module(nn.Module):
# feature aggregation
in_channel = in_ch + layer_per_block * stage_ch
self.concat = nn.Sequential(OrderedDict(conv1x1(in_channel, concat_ch, module_name, "concat")))
self.concat = nn.Sequential(OrderedDict(conv1x1(in_channel, concat_ch, module_name, 'concat')))
self.ese = eSEModule(concat_ch)
......@@ -232,24 +231,24 @@ class _OSA_module(nn.Module):
class _OSA_stage(nn.Sequential):
def __init__(
self, in_ch, stage_ch, concat_ch, block_per_stage, layer_per_block, stage_num, SE=False, depthwise=False
self, in_ch, stage_ch, concat_ch, block_per_stage, layer_per_block, stage_num, SE=False, depthwise=False
):
super(_OSA_stage, self).__init__()
if not stage_num == 2:
self.add_module("Pooling", nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True))
self.add_module('Pooling', nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True))
if block_per_stage != 1:
SE = False
module_name = f"OSA{stage_num}_1"
module_name = f'OSA{stage_num}_1'
self.add_module(
module_name, _OSA_module(in_ch, stage_ch, concat_ch, layer_per_block, module_name, SE, depthwise=depthwise)
)
for i in range(block_per_stage - 1):
if i != block_per_stage - 2: # last block
SE = False
module_name = f"OSA{stage_num}_{i + 2}"
module_name = f'OSA{stage_num}_{i + 2}'
self.add_module(
module_name,
_OSA_module(
......@@ -267,7 +266,7 @@ class _OSA_stage(nn.Sequential):
@BACKBONES.register_module()
class VoVNet(BaseModule):
def __init__(self, spec_name, input_ch=3, out_features=None,
def __init__(self, spec_name, input_ch=3, out_features=None,
frozen_stages=-1, norm_eval=True, pretrained=None, init_cfg=None):
"""
Args:
......@@ -285,32 +284,32 @@ class VoVNet(BaseModule):
self.init_cfg = dict(type='Pretrained', checkpoint=pretrained)
stage_specs = _STAGE_SPECS[spec_name]
stem_ch = stage_specs["stem"]
config_stage_ch = stage_specs["stage_conv_ch"]
config_concat_ch = stage_specs["stage_out_ch"]
block_per_stage = stage_specs["block_per_stage"]
layer_per_block = stage_specs["layer_per_block"]
SE = stage_specs["eSE"]
depthwise = stage_specs["dw"]
stem_ch = stage_specs['stem']
config_stage_ch = stage_specs['stage_conv_ch']
config_concat_ch = stage_specs['stage_out_ch']
block_per_stage = stage_specs['block_per_stage']
layer_per_block = stage_specs['layer_per_block']
SE = stage_specs['eSE']
depthwise = stage_specs['dw']
self._out_features = out_features
# Stem module
conv_type = dw_conv3x3 if depthwise else conv3x3
stem = conv3x3(input_ch, stem_ch[0], "stem", "1", 2)
stem += conv_type(stem_ch[0], stem_ch[1], "stem", "2", 1)
stem += conv_type(stem_ch[1], stem_ch[2], "stem", "3", 2)
self.add_module("stem", nn.Sequential((OrderedDict(stem))))
stem = conv3x3(input_ch, stem_ch[0], 'stem', '1', 2)
stem += conv_type(stem_ch[0], stem_ch[1], 'stem', '2', 1)
stem += conv_type(stem_ch[1], stem_ch[2], 'stem', '3', 2)
self.add_module('stem', nn.Sequential((OrderedDict(stem))))
current_stirde = 4
self._out_feature_strides = {"stem": current_stirde, "stage2": current_stirde}
self._out_feature_channels = {"stem": stem_ch[2]}
self._out_feature_strides = {'stem': current_stirde, 'stage2': current_stirde}
self._out_feature_channels = {'stem': stem_ch[2]}
stem_out_ch = [stem_ch[2]]
in_ch_list = stem_out_ch + config_concat_ch[:-1]
# OSA stages
self.stage_names = []
for i in range(4): # num_stages
name = "stage%d" % (i + 2) # stage 2 ... stage 5
name = 'stage%d' % (i + 2) # stage 2 ... stage 5
self.stage_names.append(name)
self.add_module(
name,
......@@ -341,8 +340,8 @@ class VoVNet(BaseModule):
def forward(self, x):
outputs = {}
x = self.stem(x)
if "stem" in self._out_features:
outputs["stem"] = x
if 'stem' in self._out_features:
outputs['stem'] = x
for name in self.stage_names:
x = getattr(self, name)(x)
if name in self._out_features:
......@@ -358,7 +357,7 @@ class VoVNet(BaseModule):
param.requires_grad = False
for i in range(1, self.frozen_stages + 1):
m = getattr(self, f'stage{i+1}')
m = getattr(self, f'stage{i + 1}')
m.eval()
for param in m.parameters():
param.requires_grad = False
......@@ -372,4 +371,4 @@ class VoVNet(BaseModule):
for m in self.modules():
# trick: eval have effect on BatchNorm only
if isinstance(m, _BatchNorm):
m.eval()
\ No newline at end of file
m.eval()
from .hooks import GradChecker
\ No newline at end of file
from .hooks import GradChecker
from mmcv.runner.hooks.hook import HOOKS, Hook
from projects.mmdet3d_plugin.models.utils import run_time
@HOOKS.register_module()
......@@ -7,7 +6,5 @@ class GradChecker(Hook):
def after_train_iter(self, runner):
for key, val in runner.model.named_parameters():
if val.grad == None and val.requires_grad:
if val.grad is None and val.requires_grad:
print('WARNNING: {key}\'s parameters are not be used!!!!'.format(key=key))
from .adamw import AdamW2
\ No newline at end of file
from .adamw import AdamW2
......@@ -4,8 +4,9 @@ except:
print('WARNING!!!, I recommend using torch>=1.8')
import torch
from torch.optim.optimizer import Optimizer
from mmcv.runner.optimizer.builder import OPTIMIZERS
from torch.optim.optimizer import Optimizer
@OPTIMIZERS.register_module()
class AdamW2(Optimizer):
......@@ -38,15 +39,15 @@ class AdamW2(Optimizer):
def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8,
weight_decay=1e-2, amsgrad=False):
if not 0.0 <= lr:
raise ValueError("Invalid learning rate: {}".format(lr))
raise ValueError('Invalid learning rate: {}'.format(lr))
if not 0.0 <= eps:
raise ValueError("Invalid epsilon value: {}".format(eps))
raise ValueError('Invalid epsilon value: {}'.format(eps))
if not 0.0 <= betas[0] < 1.0:
raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0]))
raise ValueError('Invalid beta parameter at index 0: {}'.format(betas[0]))
if not 0.0 <= betas[1] < 1.0:
raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1]))
raise ValueError('Invalid beta parameter at index 1: {}'.format(betas[1]))
if not 0.0 <= weight_decay:
raise ValueError("Invalid weight_decay value: {}".format(weight_decay))
raise ValueError('Invalid weight_decay value: {}'.format(weight_decay))
defaults = dict(lr=lr, betas=betas, eps=eps,
weight_decay=weight_decay, amsgrad=amsgrad)
super(AdamW2, self).__init__(params, defaults)
......@@ -109,7 +110,6 @@ class AdamW2(Optimizer):
if amsgrad:
max_exp_avg_sqs.append(state['max_exp_avg_sq'])
# update the steps for each param group update
state['step'] += 1
# record the step after step update
......@@ -128,4 +128,4 @@ class AdamW2(Optimizer):
group['weight_decay'],
group['eps'])
return loss
\ No newline at end of file
return loss
from .bricks import run_time
from .grid_mask import GridMask
from .position_embedding import RelPositionEmbedding
from .positional_encoding import LearnedPositionalEncoding3D
from .visual import save_tensor
from .positional_encoding import LearnedPositionalEncoding3D
\ No newline at end of file
import functools
import time
from collections import defaultdict
import torch
time_maps = defaultdict(lambda :0.)
count_maps = defaultdict(lambda :0.)
time_maps = defaultdict(lambda: 0.)
count_maps = defaultdict(lambda: 0.)
def run_time(name):
def middle(fn):
def wrapper(*args, **kwargs):
......@@ -11,10 +14,12 @@ def run_time(name):
start = time.time()
res = fn(*args, **kwargs)
torch.cuda.synchronize()
time_maps['%s : %s'%(name, fn.__name__) ] += time.time()-start
count_maps['%s : %s'%(name, fn.__name__) ] +=1
print("%s : %s takes up %f "% (name, fn.__name__,time_maps['%s : %s'%(name, fn.__name__) ] /count_maps['%s : %s'%(name, fn.__name__) ] ))
time_maps['%s : %s' % (name, fn.__name__)] += time.time() - start
count_maps['%s : %s' % (name, fn.__name__)] += 1
print('%s : %s takes up %f ' % (name, fn.__name__, time_maps['%s : %s' % (name, fn.__name__)] / count_maps[
'%s : %s' % (name, fn.__name__)]))
return res
return wrapper
return middle
\ No newline at end of file
import numpy as np
import torch
import torch.nn as nn
import numpy as np
from mmcv.runner import auto_fp16
from PIL import Image
from mmcv.runner import force_fp32, auto_fp16
class Grid(object):
def __init__(self, use_h, use_w, rotate = 1, offset=False, ratio = 0.5, mode=0, prob = 1.):
def __init__(self, use_h, use_w, rotate=1, offset=False, ratio=0.5, mode=0, prob=1.):
self.use_h = use_h
self.use_w = use_w
self.rotate = rotate
self.offset = offset
self.ratio = ratio
self.mode=mode
self.mode = mode
self.st_prob = prob
self.prob = prob
......@@ -25,50 +26,50 @@ class Grid(object):
w = img.size(2)
self.d1 = 2
self.d2 = min(h, w)
hh = int(1.5*h)
ww = int(1.5*w)
hh = int(1.5 * h)
ww = int(1.5 * w)
d = np.random.randint(self.d1, self.d2)
if self.ratio == 1:
self.l = np.random.randint(1, d)
else:
self.l = min(max(int(d*self.ratio+0.5),1),d-1)
self.l = min(max(int(d * self.ratio + 0.5), 1), d - 1)
mask = np.ones((hh, ww), np.float32)
st_h = np.random.randint(d)
st_w = np.random.randint(d)
if self.use_h:
for i in range(hh//d):
s = d*i + st_h
t = min(s+self.l, hh)
mask[s:t,:] *= 0
for i in range(hh // d):
s = d * i + st_h
t = min(s + self.l, hh)
mask[s:t, :] *= 0
if self.use_w:
for i in range(ww//d):
s = d*i + st_w
t = min(s+self.l, ww)
mask[:,s:t] *= 0
for i in range(ww // d):
s = d * i + st_w
t = min(s + self.l, ww)
mask[:, s:t] *= 0
r = np.random.randint(self.rotate)
mask = Image.fromarray(np.uint8(mask))
mask = mask.rotate(r)
mask = np.asarray(mask)
mask = mask[(hh-h)//2:(hh-h)//2+h, (ww-w)//2:(ww-w)//2+w]
mask = mask[(hh - h) // 2:(hh - h) // 2 + h, (ww - w) // 2:(ww - w) // 2 + w]
mask = torch.from_numpy(mask).float()
if self.mode == 1:
mask = 1-mask
mask = 1 - mask
mask = mask.expand_as(img)
if self.offset:
offset = torch.from_numpy(2 * (np.random.rand(h,w) - 0.5)).float()
offset = torch.from_numpy(2 * (np.random.rand(h, w) - 0.5)).float()
offset = (1 - mask) * offset
img = img * mask + offset
else:
img = img * mask
img = img * mask
return img, label
class GridMask(nn.Module):
def __init__(self, use_h, use_w, rotate = 1, offset=False, ratio = 0.5, mode=0, prob = 1.):
def __init__(self, use_h, use_w, rotate=1, offset=False, ratio=0.5, mode=0, prob=1.):
super(GridMask, self).__init__()
self.use_h = use_h
self.use_w = use_w
......@@ -79,46 +80,48 @@ class GridMask(nn.Module):
self.st_prob = prob
self.prob = prob
self.fp16_enable = False
def set_prob(self, epoch, max_epoch):
self.prob = self.st_prob * epoch / max_epoch #+ 1.#0.5
self.prob = self.st_prob * epoch / max_epoch # + 1.#0.5
@auto_fp16()
def forward(self, x):
if np.random.rand() > self.prob or not self.training:
return x
n,c,h,w = x.size()
x = x.view(-1,h,w)
hh = int(1.5*h)
ww = int(1.5*w)
n, c, h, w = x.size()
x = x.view(-1, h, w)
hh = int(1.5 * h)
ww = int(1.5 * w)
d = np.random.randint(2, h)
self.l = min(max(int(d*self.ratio+0.5),1),d-1)
self.l = min(max(int(d * self.ratio + 0.5), 1), d - 1)
mask = np.ones((hh, ww), np.float32)
st_h = np.random.randint(d)
st_w = np.random.randint(d)
if self.use_h:
for i in range(hh//d):
s = d*i + st_h
t = min(s+self.l, hh)
mask[s:t,:] *= 0
for i in range(hh // d):
s = d * i + st_h
t = min(s + self.l, hh)
mask[s:t, :] *= 0
if self.use_w:
for i in range(ww//d):
s = d*i + st_w
t = min(s+self.l, ww)
mask[:,s:t] *= 0
for i in range(ww // d):
s = d * i + st_w
t = min(s + self.l, ww)
mask[:, s:t] *= 0
r = np.random.randint(self.rotate)
mask = Image.fromarray(np.uint8(mask))
mask = mask.rotate(r)
mask = np.asarray(mask)
mask = mask[(hh-h)//2:(hh-h)//2+h, (ww-w)//2:(ww-w)//2+w]
mask = mask[(hh - h) // 2:(hh - h) // 2 + h, (ww - w) // 2:(ww - w) // 2 + w]
mask = torch.from_numpy(mask).to(x.dtype).cuda()
if self.mode == 1:
mask = 1-mask
mask = 1 - mask
mask = mask.expand_as(x)
if self.offset:
offset = torch.from_numpy(2 * (np.random.rand(h,w) - 0.5)).to(x.dtype).cuda()
offset = torch.from_numpy(2 * (np.random.rand(h, w) - 0.5)).to(x.dtype).cuda()
x = x * mask + offset * (1 - mask)
else:
x = x * mask
return x.view(n,c,h,w)
\ No newline at end of file
x = x * mask
return x.view(n, c, h, w)
import math
import torch
import torch.nn as nn
import math
class RelPositionEmbedding(nn.Module):
def __init__(self, num_pos_feats=64, pos_norm=True):
super().__init__()
self.num_pos_feats = num_pos_feats
self.fc = nn.Linear(4, self.num_pos_feats,bias=False)
#nn.init.orthogonal_(self.fc.weight)
#self.fc.weight.requires_grad = False
self.fc = nn.Linear(4, self.num_pos_feats, bias=False)
# nn.init.orthogonal_(self.fc.weight)
# self.fc.weight.requires_grad = False
self.pos_norm = pos_norm
if self.pos_norm:
self.norm = nn.LayerNorm(self.num_pos_feats)
def forward(self, tensor):
#mask = nesttensor.mask
B,C,H,W = tensor.shape
#print('tensor.shape', tensor.shape)
# mask = nesttensor.mask
B, C, H, W = tensor.shape
# print('tensor.shape', tensor.shape)
y_range = (torch.arange(H) / float(H - 1)).to(tensor.device)
#y_axis = torch.stack((y_range, 1-y_range),dim=1)
# y_axis = torch.stack((y_range, 1-y_range),dim=1)
y_axis = torch.stack((torch.cos(y_range * math.pi), torch.sin(y_range * math.pi)), dim=1)
y_axis = y_axis.reshape(H, 1, 2).repeat(1, W, 1).reshape(H * W, 2)
x_range = (torch.arange(W) / float(W - 1)).to(tensor.device)
#x_axis =torch.stack((x_range,1-x_range),dim=1)
# x_axis =torch.stack((x_range,1-x_range),dim=1)
x_axis = torch.stack((torch.cos(x_range * math.pi), torch.sin(x_range * math.pi)), dim=1)
x_axis = x_axis.reshape(1, W, 2).repeat(H, 1, 1).reshape(H * W, 2)
x_pos = torch.cat((y_axis, x_axis), dim=1)
......@@ -30,5 +33,5 @@ class RelPositionEmbedding(nn.Module):
if self.pos_norm:
x_pos = self.norm(x_pos)
#print('xpos,', x_pos.max(),x_pos.min())
return x_pos
\ No newline at end of file
# print('xpos,', x_pos.max(),x_pos.min())
return x_pos
import math
import torch
import torch.nn as nn
from mmcv.cnn.bricks.transformer import POSITIONAL_ENCODING
from mmcv.runner import BaseModule
@POSITIONAL_ENCODING.register_module()
class LearnedPositionalEncoding3D(BaseModule):
"""Position embedding with learnable embedding weights.
Args:
num_feats (int): The feature dimension for each position
along x-axis or y-axis. The final returned dimension for
each position is 2 times of this value.
row_num_embed (int, optional): The dictionary size of row embeddings.
Default 50.
col_num_embed (int, optional): The dictionary size of col embeddings.
Default 50.
init_cfg (dict or list[dict], optional): Initialization config dict.
"""
def __init__(self,
num_feats,
row_num_embed=50,
col_num_embed=50,
height_num_embed=50,
init_cfg=dict(type='Uniform', layer='Embedding')):
super(LearnedPositionalEncoding3D, self).__init__(init_cfg)
self.row_embed = nn.Embedding(row_num_embed, num_feats)
self.col_embed = nn.Embedding(col_num_embed, num_feats)
self.height_embed = nn.Embedding(height_num_embed, num_feats)
self.num_feats = num_feats
self.row_num_embed = row_num_embed
self.col_num_embed = col_num_embed
self.height_num_embed = height_num_embed
def forward(self, mask):
"""Forward function for `LearnedPositionalEncoding`.
Args:
mask (Tensor): ByteTensor mask. Non-zero values representing
ignored positions, while zero values means valid positions
for this image. Shape [bs, h, w].
Returns:
pos (Tensor): Returned position embedding with shape
[bs, num_feats*2, h, w].
"""
l, h, w = mask.shape[-3:]
x = torch.arange(w, device=mask.device)
y = torch.arange(h, device=mask.device)
z = torch.arange(l, device=mask.device)
x_embed = self.col_embed(x)
y_embed = self.row_embed(y)
z_embed = self.height_embed(z)
pos = torch.cat(
(x_embed.unsqueeze(0).unsqueeze(0).repeat(l, h, 1, 1),
y_embed.unsqueeze(1).unsqueeze(0).repeat(l, 1, w, 1),
z_embed.unsqueeze(1).unsqueeze(1).repeat(1, h, w, 1)),dim=-1).permute(3, 0, 1, 2).unsqueeze(0).repeat(mask.shape[0],1, 1, 1, 1)
return pos
def __repr__(self):
"""str: a string that describes the module"""
repr_str = self.__class__.__name__
repr_str += f'(num_feats={self.num_feats}, '
repr_str += f'row_num_embed={self.row_num_embed}, '
repr_str += f'col_num_embed={self.col_num_embed})'
repr_str += f'height_num_embed={self.height_num_embed})'
return repr_str
import torch
import torch.nn as nn
from mmcv.cnn.bricks.transformer import POSITIONAL_ENCODING
from mmcv.runner import BaseModule
@POSITIONAL_ENCODING.register_module()
class LearnedPositionalEncoding3D(BaseModule):
"""Position embedding with learnable embedding weights.
Args:
num_feats (int): The feature dimension for each position
along x-axis or y-axis. The final returned dimension for
each position is 2 times of this value.
row_num_embed (int, optional): The dictionary size of row embeddings.
Default 50.
col_num_embed (int, optional): The dictionary size of col embeddings.
Default 50.
init_cfg (dict or list[dict], optional): Initialization config dict.
"""
def __init__(self,
num_feats,
row_num_embed=50,
col_num_embed=50,
height_num_embed=50,
init_cfg=dict(type='Uniform', layer='Embedding')):
super(LearnedPositionalEncoding3D, self).__init__(init_cfg)
self.row_embed = nn.Embedding(row_num_embed, num_feats)
self.col_embed = nn.Embedding(col_num_embed, num_feats)
self.height_embed = nn.Embedding(height_num_embed, num_feats)
self.num_feats = num_feats
self.row_num_embed = row_num_embed
self.col_num_embed = col_num_embed
self.height_num_embed = height_num_embed
def forward(self, mask):
"""Forward function for `LearnedPositionalEncoding`.
Args:
mask (Tensor): ByteTensor mask. Non-zero values representing
ignored positions, while zero values means valid positions
for this image. Shape [bs, h, w].
Returns:
pos (Tensor): Returned position embedding with shape
[bs, num_feats*2, h, w].
"""
l, h, w = mask.shape[-3:]
x = torch.arange(w, device=mask.device)
y = torch.arange(h, device=mask.device)
z = torch.arange(l, device=mask.device)
x_embed = self.col_embed(x)
y_embed = self.row_embed(y)
z_embed = self.height_embed(z)
pos = torch.cat(
(x_embed.unsqueeze(0).unsqueeze(0).repeat(l, h, 1, 1),
y_embed.unsqueeze(1).unsqueeze(0).repeat(l, 1, w, 1),
z_embed.unsqueeze(1).unsqueeze(1).repeat(1, h, w, 1)), dim=-1).permute(3, 0, 1, 2).unsqueeze(0).repeat(
mask.shape[0], 1, 1, 1, 1)
return pos
def __repr__(self):
"""str: a string that describes the module"""
repr_str = self.__class__.__name__
repr_str += f'(num_feats={self.num_feats}, '
repr_str += f'row_num_embed={self.row_num_embed}, '
repr_str += f'col_num_embed={self.col_num_embed})'
repr_str += f'height_num_embed={self.height_num_embed})'
return repr_str
import cv2
import matplotlib.pyplot as plt
import torch
from torchvision.utils import make_grid
import torchvision
import matplotlib.pyplot as plt
import cv2
from torchvision.utils import make_grid
def convert_color(img_path):
......@@ -12,11 +12,11 @@ def convert_color(img_path):
plt.close()
def save_tensor(tensor, path, pad_value=254.0,):
def save_tensor(tensor, path, pad_value=254.0, ):
print('save_tensor', path)
tensor = tensor.to(torch.float).detach().cpu()
if tensor.type() == 'torch.BoolTensor':
tensor = tensor*255
tensor = tensor * 255
if len(tensor.shape) == 3:
tensor = tensor.unsqueeze(1)
tensor = make_grid(tensor, pad_value=pad_value, normalize=False).permute(1, 2, 0).numpy().copy()
......
......@@ -3,31 +3,30 @@
# ---------------------------------------------
# Modified by Zhiqi Li
# ---------------------------------------------
from __future__ import division
import argparse
import copy
import mmcv
import os
import time
import torch
import warnings
from mmcv import Config, DictAction
from mmcv.runner import get_dist_info, init_dist
from os import path as osp
import mmcv
import torch
from mmcv import Config, DictAction
from mmcv.runner import get_dist_info, init_dist
from mmcv.utils import TORCH_VERSION, digit_version
from mmdet import __version__ as mmdet_version
from mmdet3d import __version__ as mmdet3d_version
#from mmdet3d.apis import train_model
from mmdet3d.datasets import build_dataset
from mmdet3d.models import build_model
from mmdet3d.utils import collect_env, get_root_logger
from mmdet.apis import set_random_seed
from mmseg import __version__ as mmseg_version
from mmcv.utils import TORCH_VERSION, digit_version
# from mmdet3d.apis import train_model
def parse_args():
......@@ -45,13 +44,13 @@ def parse_args():
'--gpus',
type=int,
help='number of gpus to use '
'(only applicable to non-distributed training)')
'(only applicable to non-distributed training)')
group_gpus.add_argument(
'--gpu-ids',
type=int,
nargs='+',
help='ids of gpus to use '
'(only applicable to non-distributed training)')
'(only applicable to non-distributed training)')
parser.add_argument('--seed', type=int, default=0, help='random seed')
parser.add_argument(
'--deterministic',
......@@ -62,18 +61,18 @@ def parse_args():
nargs='+',
action=DictAction,
help='override some settings in the used config, the key-value pair '
'in xxx=yyy format will be merged into config file (deprecate), '
'change to --cfg-options instead.')
'in xxx=yyy format will be merged into config file (deprecate), '
'change to --cfg-options instead.')
parser.add_argument(
'--cfg-options',
nargs='+',
action=DictAction,
help='override some settings in the used config, the key-value pair '
'in xxx=yyy format will be merged into config file. If the value to '
'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
'Note that the quotation marks are necessary and that no white space '
'is allowed.')
'in xxx=yyy format will be merged into config file. If the value to '
'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
'Note that the quotation marks are necessary and that no white space '
'is allowed.')
parser.add_argument(
'--launcher',
choices=['none', 'pytorch', 'slurm', 'mpi'],
......@@ -134,7 +133,8 @@ def main():
print(_module_path)
plg_lib = importlib.import_module(_module_path)
from projects.mmdet3d_plugin.bevformer.apis.train import custom_train_model
from projects.mmdet3d_plugin.bevformer.apis.train import \
custom_train_model
# set cudnn_benchmark
if cfg.get('cudnn_benchmark', False):
torch.backends.cudnn.benchmark = True
......@@ -155,7 +155,7 @@ def main():
else:
cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus)
if digit_version(TORCH_VERSION) == digit_version('1.8.1') and cfg.optimizer['type'] == 'AdamW':
cfg.optimizer['type'] = 'AdamW2' # fix bug in Adamw
cfg.optimizer['type'] = 'AdamW2' # fix bug in Adamw
if args.autoscale_lr:
# apply the linear scaling rule (https://arxiv.org/abs/1706.02677)
cfg.optimizer['lr'] = cfg.optimizer['lr'] * len(cfg.gpu_ids) / 8
......
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import json
from collections import defaultdict
import numpy as np
import seaborn as sns
from collections import defaultdict
from matplotlib import pyplot as plt
......@@ -65,7 +66,7 @@ def plot_curve(log_dicts, args):
else:
# find the first epoch that do eval
x0 = min(epochs) + args.interval - \
min(epochs) % args.interval
min(epochs) % args.interval
xs = np.arange(x0, max(epochs) + 1, args.interval)
ys = []
for epoch in epochs[args.interval - 1::args.interval]:
......@@ -85,7 +86,7 @@ def plot_curve(log_dicts, args):
xs = []
ys = []
num_iters_per_epoch = \
log_dict[epochs[args.interval-1]]['iter'][-1]
log_dict[epochs[args.interval - 1]]['iter'][-1]
for epoch in epochs[args.interval - 1::args.interval]:
iters = log_dict[epoch]['iter']
if log_dict[epoch]['mode'][-1] == 'val':
......@@ -152,7 +153,7 @@ def add_time_parser(subparsers):
'--include-outliers',
action='store_true',
help='include the first value of every epoch when computing '
'the average time')
'the average time')
def parse_args():
......
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import sys
import time
import torch
from mmcv import Config
from mmcv.parallel import MMDataParallel
from mmcv.runner import load_checkpoint, wrap_fp16_model
import sys
sys.path.append('.')
from projects.mmdet3d_plugin.datasets.builder import build_dataloader
from projects.mmdet3d_plugin.datasets import custom_build_dataset
# from mmdet3d.datasets import build_dataloader, build_dataset
from mmdet3d.models import build_detector
#from tools.misc.fuse_conv_bn import fuse_module
from projects.mmdet3d_plugin.datasets import custom_build_dataset
from projects.mmdet3d_plugin.datasets.builder import build_dataloader
# from tools.misc.fuse_conv_bn import fuse_module
def parse_args():
......@@ -25,7 +28,7 @@ def parse_args():
'--fuse-conv-bn',
action='store_true',
help='Whether to fuse conv and bn, this will slightly increase'
'the inference speed')
'the inference speed')
args = parser.parse_args()
return args
......@@ -59,7 +62,7 @@ def main():
wrap_fp16_model(model)
if args.checkpoint is not None:
load_checkpoint(model, args.checkpoint, map_location='cpu')
#if args.fuse_conv_bn:
# if args.fuse_conv_bn:
# model = fuse_module(model)
model = MMDataParallel(model, device_ids=[0])
......
import torch
file_path = './ckpts/bevformer_v4.pth'
model = torch.load(file_path, map_location='cpu')
all = 0
for key in list(model['state_dict'].keys()):
all += model['state_dict'][key].nelement()
print(all)
# smaller 63374123
# v4 69140395
import torch
file_path = './ckpts/bevformer_v4.pth'
model = torch.load(file_path, map_location='cpu')
all = 0
for key in list(model['state_dict'].keys()):
all += model['state_dict'][key].nelement()
print(all)
# smaller 63374123
# v4 69140395
# Based on https://github.com/nutonomy/nuscenes-devkit
# ---------------------------------------------
# Modified by Zhiqi Li
# ---------------------------------------------
import mmcv
from nuscenes.nuscenes import NuScenes
from PIL import Image
from nuscenes.utils.geometry_utils import view_points, box_in_image, BoxVisibility, transform_matrix
from typing import Tuple, List, Iterable
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
from matplotlib import rcParams
from matplotlib.axes import Axes
from pyquaternion import Quaternion
from PIL import Image
from matplotlib import rcParams
from matplotlib.axes import Axes
from pyquaternion import Quaternion
from tqdm import tqdm
from nuscenes.utils.data_classes import LidarPointCloud, RadarPointCloud, Box
from nuscenes.utils.geometry_utils import view_points, box_in_image, BoxVisibility, transform_matrix
from nuscenes.eval.common.data_classes import EvalBoxes, EvalBox
from nuscenes.eval.detection.data_classes import DetectionBox
from nuscenes.eval.detection.utils import category_to_detection_name
from nuscenes.eval.detection.render import visualize_sample
cams = ['CAM_FRONT',
'CAM_FRONT_RIGHT',
'CAM_BACK_RIGHT',
'CAM_BACK',
'CAM_BACK_LEFT',
'CAM_FRONT_LEFT']
import numpy as np
import matplotlib.pyplot as plt
from nuscenes.utils.data_classes import LidarPointCloud, RadarPointCloud, Box
from PIL import Image
from matplotlib import rcParams
def render_annotation(
anntoken: str,
margin: float = 10,
view: np.ndarray = np.eye(4),
box_vis_level: BoxVisibility = BoxVisibility.ANY,
out_path: str = 'render.png',
extra_info: bool = False) -> None:
"""
Render selected annotation.
:param anntoken: Sample_annotation token.
:param margin: How many meters in each direction to include in LIDAR view.
:param view: LIDAR view point.
:param box_vis_level: If sample_data is an image, this sets required visibility for boxes.
:param out_path: Optional path to save the rendered figure to disk.
:param extra_info: Whether to render extra information below camera view.
"""
ann_record = nusc.get('sample_annotation', anntoken)
sample_record = nusc.get('sample', ann_record['sample_token'])
assert 'LIDAR_TOP' in sample_record['data'].keys(), 'Error: No LIDAR_TOP in data, unable to render.'
# Figure out which camera the object is fully visible in (this may return nothing).
boxes, cam = [], []
cams = [key for key in sample_record['data'].keys() if 'CAM' in key]
all_bboxes = []
select_cams = []
for cam in cams:
_, boxes, _ = nusc.get_sample_data(sample_record['data'][cam], box_vis_level=box_vis_level,
selected_anntokens=[anntoken])
if len(boxes) > 0:
all_bboxes.append(boxes)
select_cams.append(cam)
# We found an image that matches. Let's abort.
# assert len(boxes) > 0, 'Error: Could not find image where annotation is visible. ' \
# 'Try using e.g. BoxVisibility.ANY.'
# assert len(boxes) < 2, 'Error: Found multiple annotations. Something is wrong!'
num_cam = len(all_bboxes)
fig, axes = plt.subplots(1, num_cam + 1, figsize=(18, 9))
select_cams = [sample_record['data'][cam] for cam in select_cams]
print('bbox in cams:', select_cams)
# Plot LIDAR view.
lidar = sample_record['data']['LIDAR_TOP']
data_path, boxes, camera_intrinsic = nusc.get_sample_data(lidar, selected_anntokens=[anntoken])
LidarPointCloud.from_file(data_path).render_height(axes[0], view=view)
for box in boxes:
c = np.array(get_color(box.name)) / 255.0
box.render(axes[0], view=view, colors=(c, c, c))
corners = view_points(boxes[0].corners(), view, False)[:2, :]
axes[0].set_xlim([np.min(corners[0, :]) - margin, np.max(corners[0, :]) + margin])
axes[0].set_ylim([np.min(corners[1, :]) - margin, np.max(corners[1, :]) + margin])
axes[0].axis('off')
axes[0].set_aspect('equal')
# Plot CAMERA view.
for i in range(1, num_cam + 1):
cam = select_cams[i - 1]
data_path, boxes, camera_intrinsic = nusc.get_sample_data(cam, selected_anntokens=[anntoken])
im = Image.open(data_path)
axes[i].imshow(im)
axes[i].set_title(nusc.get('sample_data', cam)['channel'])
axes[i].axis('off')
axes[i].set_aspect('equal')
for box in boxes:
c = np.array(get_color(box.name)) / 255.0
box.render(axes[i], view=camera_intrinsic, normalize=True, colors=(c, c, c))
# Print extra information about the annotation below the camera view.
axes[i].set_xlim(0, im.size[0])
axes[i].set_ylim(im.size[1], 0)
if extra_info:
rcParams['font.family'] = 'monospace'
w, l, h = ann_record['size']
category = ann_record['category_name']
lidar_points = ann_record['num_lidar_pts']
radar_points = ann_record['num_radar_pts']
sample_data_record = nusc.get('sample_data', sample_record['data']['LIDAR_TOP'])
pose_record = nusc.get('ego_pose', sample_data_record['ego_pose_token'])
dist = np.linalg.norm(np.array(pose_record['translation']) - np.array(ann_record['translation']))
information = ' \n'.join(['category: {}'.format(category),
'',
'# lidar points: {0:>4}'.format(lidar_points),
'# radar points: {0:>4}'.format(radar_points),
'',
'distance: {:>7.3f}m'.format(dist),
'',
'width: {:>7.3f}m'.format(w),
'length: {:>7.3f}m'.format(l),
'height: {:>7.3f}m'.format(h)])
plt.annotate(information, (0, 0), (0, -20), xycoords='axes fraction', textcoords='offset points', va='top')
if out_path is not None:
plt.savefig(out_path)
def get_sample_data(sample_data_token: str,
box_vis_level: BoxVisibility = BoxVisibility.ANY,
selected_anntokens=None,
use_flat_vehicle_coordinates: bool = False):
"""
Returns the data path as well as all annotations related to that sample_data.
Note that the boxes are transformed into the current sensor's coordinate frame.
:param sample_data_token: Sample_data token.
:param box_vis_level: If sample_data is an image, this sets required visibility for boxes.
:param selected_anntokens: If provided only return the selected annotation.
:param use_flat_vehicle_coordinates: Instead of the current sensor's coordinate frame, use ego frame which is
aligned to z-plane in the world.
:return: (data_path, boxes, camera_intrinsic <np.array: 3, 3>)
"""
# Retrieve sensor & pose records
sd_record = nusc.get('sample_data', sample_data_token)
cs_record = nusc.get('calibrated_sensor', sd_record['calibrated_sensor_token'])
sensor_record = nusc.get('sensor', cs_record['sensor_token'])
pose_record = nusc.get('ego_pose', sd_record['ego_pose_token'])
data_path = nusc.get_sample_data_path(sample_data_token)
if sensor_record['modality'] == 'camera':
cam_intrinsic = np.array(cs_record['camera_intrinsic'])
imsize = (sd_record['width'], sd_record['height'])
else:
cam_intrinsic = None
imsize = None
# Retrieve all sample annotations and map to sensor coordinate system.
if selected_anntokens is not None:
boxes = list(map(nusc.get_box, selected_anntokens))
else:
boxes = nusc.get_boxes(sample_data_token)
# Make list of Box objects including coord system transforms.
box_list = []
for box in boxes:
if use_flat_vehicle_coordinates:
# Move box to ego vehicle coord system parallel to world z plane.
yaw = Quaternion(pose_record['rotation']).yaw_pitch_roll[0]
box.translate(-np.array(pose_record['translation']))
box.rotate(Quaternion(scalar=np.cos(yaw / 2), vector=[0, 0, np.sin(yaw / 2)]).inverse)
else:
# Move box to ego vehicle coord system.
box.translate(-np.array(pose_record['translation']))
box.rotate(Quaternion(pose_record['rotation']).inverse)
# Move box to sensor coord system.
box.translate(-np.array(cs_record['translation']))
box.rotate(Quaternion(cs_record['rotation']).inverse)
if sensor_record['modality'] == 'camera' and not \
box_in_image(box, cam_intrinsic, imsize, vis_level=box_vis_level):
continue
box_list.append(box)
return data_path, box_list, cam_intrinsic
def get_predicted_data(sample_data_token: str,
box_vis_level: BoxVisibility = BoxVisibility.ANY,
selected_anntokens=None,
use_flat_vehicle_coordinates: bool = False,
pred_anns=None
):
"""
Returns the data path as well as all annotations related to that sample_data.
Note that the boxes are transformed into the current sensor's coordinate frame.
:param sample_data_token: Sample_data token.
:param box_vis_level: If sample_data is an image, this sets required visibility for boxes.
:param selected_anntokens: If provided only return the selected annotation.
:param use_flat_vehicle_coordinates: Instead of the current sensor's coordinate frame, use ego frame which is
aligned to z-plane in the world.
:return: (data_path, boxes, camera_intrinsic <np.array: 3, 3>)
"""
# Retrieve sensor & pose records
sd_record = nusc.get('sample_data', sample_data_token)
cs_record = nusc.get('calibrated_sensor', sd_record['calibrated_sensor_token'])
sensor_record = nusc.get('sensor', cs_record['sensor_token'])
pose_record = nusc.get('ego_pose', sd_record['ego_pose_token'])
data_path = nusc.get_sample_data_path(sample_data_token)
if sensor_record['modality'] == 'camera':
cam_intrinsic = np.array(cs_record['camera_intrinsic'])
imsize = (sd_record['width'], sd_record['height'])
else:
cam_intrinsic = None
imsize = None
# Retrieve all sample annotations and map to sensor coordinate system.
# if selected_anntokens is not None:
# boxes = list(map(nusc.get_box, selected_anntokens))
# else:
# boxes = nusc.get_boxes(sample_data_token)
boxes = pred_anns
# Make list of Box objects including coord system transforms.
box_list = []
for box in boxes:
if use_flat_vehicle_coordinates:
# Move box to ego vehicle coord system parallel to world z plane.
yaw = Quaternion(pose_record['rotation']).yaw_pitch_roll[0]
box.translate(-np.array(pose_record['translation']))
box.rotate(Quaternion(scalar=np.cos(yaw / 2), vector=[0, 0, np.sin(yaw / 2)]).inverse)
else:
# Move box to ego vehicle coord system.
box.translate(-np.array(pose_record['translation']))
box.rotate(Quaternion(pose_record['rotation']).inverse)
# Move box to sensor coord system.
box.translate(-np.array(cs_record['translation']))
box.rotate(Quaternion(cs_record['rotation']).inverse)
if sensor_record['modality'] == 'camera' and not \
box_in_image(box, cam_intrinsic, imsize, vis_level=box_vis_level):
continue
box_list.append(box)
return data_path, box_list, cam_intrinsic
def lidiar_render(sample_token, data,out_path=None):
bbox_gt_list = []
bbox_pred_list = []
anns = nusc.get('sample', sample_token)['anns']
for ann in anns:
content = nusc.get('sample_annotation', ann)
try:
bbox_gt_list.append(DetectionBox(
sample_token=content['sample_token'],
translation=tuple(content['translation']),
size=tuple(content['size']),
rotation=tuple(content['rotation']),
velocity=nusc.box_velocity(content['token'])[:2],
ego_translation=(0.0, 0.0, 0.0) if 'ego_translation' not in content
else tuple(content['ego_translation']),
num_pts=-1 if 'num_pts' not in content else int(content['num_pts']),
detection_name=category_to_detection_name(content['category_name']),
detection_score=-1.0 if 'detection_score' not in content else float(content['detection_score']),
attribute_name=''))
except:
pass
bbox_anns = data['results'][sample_token]
for content in bbox_anns:
bbox_pred_list.append(DetectionBox(
sample_token=content['sample_token'],
translation=tuple(content['translation']),
size=tuple(content['size']),
rotation=tuple(content['rotation']),
velocity=tuple(content['velocity']),
ego_translation=(0.0, 0.0, 0.0) if 'ego_translation' not in content
else tuple(content['ego_translation']),
num_pts=-1 if 'num_pts' not in content else int(content['num_pts']),
detection_name=content['detection_name'],
detection_score=-1.0 if 'detection_score' not in content else float(content['detection_score']),
attribute_name=content['attribute_name']))
gt_annotations = EvalBoxes()
pred_annotations = EvalBoxes()
gt_annotations.add_boxes(sample_token, bbox_gt_list)
pred_annotations.add_boxes(sample_token, bbox_pred_list)
print('green is ground truth')
print('blue is the predited result')
visualize_sample(nusc, sample_token, gt_annotations, pred_annotations, savepath=out_path+'_bev')
def get_color(category_name: str):
"""
Provides the default colors based on the category names.
This method works for the general nuScenes categories, as well as the nuScenes detection categories.
"""
a = ['noise', 'animal', 'human.pedestrian.adult', 'human.pedestrian.child', 'human.pedestrian.construction_worker',
'human.pedestrian.personal_mobility', 'human.pedestrian.police_officer', 'human.pedestrian.stroller',
'human.pedestrian.wheelchair', 'movable_object.barrier', 'movable_object.debris',
'movable_object.pushable_pullable', 'movable_object.trafficcone', 'static_object.bicycle_rack', 'vehicle.bicycle',
'vehicle.bus.bendy', 'vehicle.bus.rigid', 'vehicle.car', 'vehicle.construction', 'vehicle.emergency.ambulance',
'vehicle.emergency.police', 'vehicle.motorcycle', 'vehicle.trailer', 'vehicle.truck', 'flat.driveable_surface',
'flat.other', 'flat.sidewalk', 'flat.terrain', 'static.manmade', 'static.other', 'static.vegetation',
'vehicle.ego']
class_names = [
'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
]
#print(category_name)
if category_name == 'bicycle':
return nusc.colormap['vehicle.bicycle']
elif category_name == 'construction_vehicle':
return nusc.colormap['vehicle.construction']
elif category_name == 'traffic_cone':
return nusc.colormap['movable_object.trafficcone']
for key in nusc.colormap.keys():
if category_name in key:
return nusc.colormap[key]
return [0, 0, 0]
def render_sample_data(
sample_toekn: str,
with_anns: bool = True,
box_vis_level: BoxVisibility = BoxVisibility.ANY,
axes_limit: float = 40,
ax=None,
nsweeps: int = 1,
out_path: str = None,
underlay_map: bool = True,
use_flat_vehicle_coordinates: bool = True,
show_lidarseg: bool = False,
show_lidarseg_legend: bool = False,
filter_lidarseg_labels=None,
lidarseg_preds_bin_path: str = None,
verbose: bool = True,
show_panoptic: bool = False,
pred_data=None,
) -> None:
"""
Render sample data onto axis.
:param sample_data_token: Sample_data token.
:param with_anns: Whether to draw box annotations.
:param box_vis_level: If sample_data is an image, this sets required visibility for boxes.
:param axes_limit: Axes limit for lidar and radar (measured in meters).
:param ax: Axes onto which to render.
:param nsweeps: Number of sweeps for lidar and radar.
:param out_path: Optional path to save the rendered figure to disk.
:param underlay_map: When set to true, lidar data is plotted onto the map. This can be slow.
:param use_flat_vehicle_coordinates: Instead of the current sensor's coordinate frame, use ego frame which is
aligned to z-plane in the world. Note: Previously this method did not use flat vehicle coordinates, which
can lead to small errors when the vertical axis of the global frame and lidar are not aligned. The new
setting is more correct and rotates the plot by ~90 degrees.
:param show_lidarseg: When set to True, the lidar data is colored with the segmentation labels. When set
to False, the colors of the lidar data represent the distance from the center of the ego vehicle.
:param show_lidarseg_legend: Whether to display the legend for the lidarseg labels in the frame.
:param filter_lidarseg_labels: Only show lidar points which belong to the given list of classes. If None
or the list is empty, all classes will be displayed.
:param lidarseg_preds_bin_path: A path to the .bin file which contains the user's lidar segmentation
predictions for the sample.
:param verbose: Whether to display the image after it is rendered.
:param show_panoptic: When set to True, the lidar data is colored with the panoptic labels. When set
to False, the colors of the lidar data represent the distance from the center of the ego vehicle.
If show_lidarseg is True, show_panoptic will be set to False.
"""
lidiar_render(sample_toekn, pred_data, out_path=out_path)
sample = nusc.get('sample', sample_toekn)
# sample = data['results'][sample_token_list[0]][0]
cams = [
'CAM_FRONT_LEFT',
'CAM_FRONT',
'CAM_FRONT_RIGHT',
'CAM_BACK_LEFT',
'CAM_BACK',
'CAM_BACK_RIGHT',
]
if ax is None:
_, ax = plt.subplots(4, 3, figsize=(24, 18))
j = 0
for ind, cam in enumerate(cams):
sample_data_token = sample['data'][cam]
sd_record = nusc.get('sample_data', sample_data_token)
sensor_modality = sd_record['sensor_modality']
if sensor_modality in ['lidar', 'radar']:
assert False
elif sensor_modality == 'camera':
# Load boxes and image.
boxes = [Box(record['translation'], record['size'], Quaternion(record['rotation']),
name=record['detection_name'], token='predicted') for record in
pred_data['results'][sample_toekn] if record['detection_score'] > 0.2]
data_path, boxes_pred, camera_intrinsic = get_predicted_data(sample_data_token,
box_vis_level=box_vis_level, pred_anns=boxes)
_, boxes_gt, _ = nusc.get_sample_data(sample_data_token, box_vis_level=box_vis_level)
if ind == 3:
j += 1
ind = ind % 3
data = Image.open(data_path)
# mmcv.imwrite(np.array(data)[:,:,::-1], f'{cam}.png')
# Init axes.
# Show image.
ax[j, ind].imshow(data)
ax[j + 2, ind].imshow(data)
# Show boxes.
if with_anns:
for box in boxes_pred:
c = np.array(get_color(box.name)) / 255.0
box.render(ax[j, ind], view=camera_intrinsic, normalize=True, colors=(c, c, c))
for box in boxes_gt:
c = np.array(get_color(box.name)) / 255.0
box.render(ax[j + 2, ind], view=camera_intrinsic, normalize=True, colors=(c, c, c))
# Limit visible range.
ax[j, ind].set_xlim(0, data.size[0])
ax[j, ind].set_ylim(data.size[1], 0)
ax[j + 2, ind].set_xlim(0, data.size[0])
ax[j + 2, ind].set_ylim(data.size[1], 0)
else:
raise ValueError("Error: Unknown sensor modality!")
ax[j, ind].axis('off')
ax[j, ind].set_title('PRED: {} {labels_type}'.format(
sd_record['channel'], labels_type='(predictions)' if lidarseg_preds_bin_path else ''))
ax[j, ind].set_aspect('equal')
ax[j + 2, ind].axis('off')
ax[j + 2, ind].set_title('GT:{} {labels_type}'.format(
sd_record['channel'], labels_type='(predictions)' if lidarseg_preds_bin_path else ''))
ax[j + 2, ind].set_aspect('equal')
if out_path is not None:
plt.savefig(out_path+'_camera', bbox_inches='tight', pad_inches=0, dpi=200)
if verbose:
plt.show()
plt.close()
if __name__ == '__main__':
nusc = NuScenes(version='v1.0-trainval', dataroot='./data/nuscenes', verbose=True)
# render_annotation('7603b030b42a4b1caa8c443ccc1a7d52')
bevformer_results = mmcv.load('test/bevformer_base/Thu_Jun__9_16_22_37_2022/pts_bbox/results_nusc.json')
sample_token_list = list(bevformer_results['results'].keys())
for id in range(0, 10):
render_sample_data(sample_token_list[id], pred_data=bevformer_results, out_path=sample_token_list[id])
# Based on https://github.com/nutonomy/nuscenes-devkit
# ---------------------------------------------
# Modified by Zhiqi Li
# ---------------------------------------------
import mmcv
from nuscenes.eval.common.data_classes import EvalBoxes
from nuscenes.eval.detection.data_classes import DetectionBox
from nuscenes.eval.detection.render import visualize_sample
from nuscenes.eval.detection.utils import category_to_detection_name
from nuscenes.nuscenes import NuScenes
from nuscenes.utils.geometry_utils import (BoxVisibility, box_in_image,
view_points)
from PIL import Image
from pyquaternion import Quaternion
cams = ['CAM_FRONT',
'CAM_FRONT_RIGHT',
'CAM_BACK_RIGHT',
'CAM_BACK',
'CAM_BACK_LEFT',
'CAM_FRONT_LEFT']
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import rcParams
from nuscenes.utils.data_classes import Box, LidarPointCloud
from PIL import Image
def render_annotation(
anntoken: str,
margin: float = 10,
view: np.ndarray = np.eye(4),
box_vis_level: BoxVisibility = BoxVisibility.ANY,
out_path: str = 'render.png',
extra_info: bool = False) -> None:
"""
Render selected annotation.
:param anntoken: Sample_annotation token.
:param margin: How many meters in each direction to include in LIDAR view.
:param view: LIDAR view point.
:param box_vis_level: If sample_data is an image, this sets required visibility for boxes.
:param out_path: Optional path to save the rendered figure to disk.
:param extra_info: Whether to render extra information below camera view.
"""
ann_record = nusc.get('sample_annotation', anntoken)
sample_record = nusc.get('sample', ann_record['sample_token'])
assert 'LIDAR_TOP' in sample_record['data'].keys(), 'Error: No LIDAR_TOP in data, unable to render.'
# Figure out which camera the object is fully visible in (this may return nothing).
boxes, cam = [], []
cams = [key for key in sample_record['data'].keys() if 'CAM' in key]
all_bboxes = []
select_cams = []
for cam in cams:
_, boxes, _ = nusc.get_sample_data(sample_record['data'][cam], box_vis_level=box_vis_level,
selected_anntokens=[anntoken])
if len(boxes) > 0:
all_bboxes.append(boxes)
select_cams.append(cam)
# We found an image that matches. Let's abort.
# assert len(boxes) > 0, 'Error: Could not find image where annotation is visible. ' \
# 'Try using e.g. BoxVisibility.ANY.'
# assert len(boxes) < 2, 'Error: Found multiple annotations. Something is wrong!'
num_cam = len(all_bboxes)
fig, axes = plt.subplots(1, num_cam + 1, figsize=(18, 9))
select_cams = [sample_record['data'][cam] for cam in select_cams]
print('bbox in cams:', select_cams)
# Plot LIDAR view.
lidar = sample_record['data']['LIDAR_TOP']
data_path, boxes, camera_intrinsic = nusc.get_sample_data(lidar, selected_anntokens=[anntoken])
LidarPointCloud.from_file(data_path).render_height(axes[0], view=view)
for box in boxes:
c = np.array(get_color(box.name)) / 255.0
box.render(axes[0], view=view, colors=(c, c, c))
corners = view_points(boxes[0].corners(), view, False)[:2, :]
axes[0].set_xlim([np.min(corners[0, :]) - margin, np.max(corners[0, :]) + margin])
axes[0].set_ylim([np.min(corners[1, :]) - margin, np.max(corners[1, :]) + margin])
axes[0].axis('off')
axes[0].set_aspect('equal')
# Plot CAMERA view.
for i in range(1, num_cam + 1):
cam = select_cams[i - 1]
data_path, boxes, camera_intrinsic = nusc.get_sample_data(cam, selected_anntokens=[anntoken])
im = Image.open(data_path)
axes[i].imshow(im)
axes[i].set_title(nusc.get('sample_data', cam)['channel'])
axes[i].axis('off')
axes[i].set_aspect('equal')
for box in boxes:
c = np.array(get_color(box.name)) / 255.0
box.render(axes[i], view=camera_intrinsic, normalize=True, colors=(c, c, c))
# Print extra information about the annotation below the camera view.
axes[i].set_xlim(0, im.size[0])
axes[i].set_ylim(im.size[1], 0)
if extra_info:
rcParams['font.family'] = 'monospace'
w, l, h = ann_record['size']
category = ann_record['category_name']
lidar_points = ann_record['num_lidar_pts']
radar_points = ann_record['num_radar_pts']
sample_data_record = nusc.get('sample_data', sample_record['data']['LIDAR_TOP'])
pose_record = nusc.get('ego_pose', sample_data_record['ego_pose_token'])
dist = np.linalg.norm(np.array(pose_record['translation']) - np.array(ann_record['translation']))
information = ' \n'.join(['category: {}'.format(category),
'',
'# lidar points: {0:>4}'.format(lidar_points),
'# radar points: {0:>4}'.format(radar_points),
'',
'distance: {:>7.3f}m'.format(dist),
'',
'width: {:>7.3f}m'.format(w),
'length: {:>7.3f}m'.format(l),
'height: {:>7.3f}m'.format(h)])
plt.annotate(information, (0, 0), (0, -20), xycoords='axes fraction', textcoords='offset points', va='top')
if out_path is not None:
plt.savefig(out_path)
def get_sample_data(sample_data_token: str,
box_vis_level: BoxVisibility = BoxVisibility.ANY,
selected_anntokens=None,
use_flat_vehicle_coordinates: bool = False):
"""
Returns the data path as well as all annotations related to that sample_data.
Note that the boxes are transformed into the current sensor's coordinate frame.
:param sample_data_token: Sample_data token.
:param box_vis_level: If sample_data is an image, this sets required visibility for boxes.
:param selected_anntokens: If provided only return the selected annotation.
:param use_flat_vehicle_coordinates: Instead of the current sensor's coordinate frame, use ego frame which is
aligned to z-plane in the world.
:return: (data_path, boxes, camera_intrinsic <np.array: 3, 3>)
"""
# Retrieve sensor & pose records
sd_record = nusc.get('sample_data', sample_data_token)
cs_record = nusc.get('calibrated_sensor', sd_record['calibrated_sensor_token'])
sensor_record = nusc.get('sensor', cs_record['sensor_token'])
pose_record = nusc.get('ego_pose', sd_record['ego_pose_token'])
data_path = nusc.get_sample_data_path(sample_data_token)
if sensor_record['modality'] == 'camera':
cam_intrinsic = np.array(cs_record['camera_intrinsic'])
imsize = (sd_record['width'], sd_record['height'])
else:
cam_intrinsic = None
imsize = None
# Retrieve all sample annotations and map to sensor coordinate system.
if selected_anntokens is not None:
boxes = list(map(nusc.get_box, selected_anntokens))
else:
boxes = nusc.get_boxes(sample_data_token)
# Make list of Box objects including coord system transforms.
box_list = []
for box in boxes:
if use_flat_vehicle_coordinates:
# Move box to ego vehicle coord system parallel to world z plane.
yaw = Quaternion(pose_record['rotation']).yaw_pitch_roll[0]
box.translate(-np.array(pose_record['translation']))
box.rotate(Quaternion(scalar=np.cos(yaw / 2), vector=[0, 0, np.sin(yaw / 2)]).inverse)
else:
# Move box to ego vehicle coord system.
box.translate(-np.array(pose_record['translation']))
box.rotate(Quaternion(pose_record['rotation']).inverse)
# Move box to sensor coord system.
box.translate(-np.array(cs_record['translation']))
box.rotate(Quaternion(cs_record['rotation']).inverse)
if sensor_record['modality'] == 'camera' and not \
box_in_image(box, cam_intrinsic, imsize, vis_level=box_vis_level):
continue
box_list.append(box)
return data_path, box_list, cam_intrinsic
def get_predicted_data(sample_data_token: str,
box_vis_level: BoxVisibility = BoxVisibility.ANY,
selected_anntokens=None,
use_flat_vehicle_coordinates: bool = False,
pred_anns=None
):
"""
Returns the data path as well as all annotations related to that sample_data.
Note that the boxes are transformed into the current sensor's coordinate frame.
:param sample_data_token: Sample_data token.
:param box_vis_level: If sample_data is an image, this sets required visibility for boxes.
:param selected_anntokens: If provided only return the selected annotation.
:param use_flat_vehicle_coordinates: Instead of the current sensor's coordinate frame, use ego frame which is
aligned to z-plane in the world.
:return: (data_path, boxes, camera_intrinsic <np.array: 3, 3>)
"""
# Retrieve sensor & pose records
sd_record = nusc.get('sample_data', sample_data_token)
cs_record = nusc.get('calibrated_sensor', sd_record['calibrated_sensor_token'])
sensor_record = nusc.get('sensor', cs_record['sensor_token'])
pose_record = nusc.get('ego_pose', sd_record['ego_pose_token'])
data_path = nusc.get_sample_data_path(sample_data_token)
if sensor_record['modality'] == 'camera':
cam_intrinsic = np.array(cs_record['camera_intrinsic'])
imsize = (sd_record['width'], sd_record['height'])
else:
cam_intrinsic = None
imsize = None
# Retrieve all sample annotations and map to sensor coordinate system.
# if selected_anntokens is not None:
# boxes = list(map(nusc.get_box, selected_anntokens))
# else:
# boxes = nusc.get_boxes(sample_data_token)
boxes = pred_anns
# Make list of Box objects including coord system transforms.
box_list = []
for box in boxes:
if use_flat_vehicle_coordinates:
# Move box to ego vehicle coord system parallel to world z plane.
yaw = Quaternion(pose_record['rotation']).yaw_pitch_roll[0]
box.translate(-np.array(pose_record['translation']))
box.rotate(Quaternion(scalar=np.cos(yaw / 2), vector=[0, 0, np.sin(yaw / 2)]).inverse)
else:
# Move box to ego vehicle coord system.
box.translate(-np.array(pose_record['translation']))
box.rotate(Quaternion(pose_record['rotation']).inverse)
# Move box to sensor coord system.
box.translate(-np.array(cs_record['translation']))
box.rotate(Quaternion(cs_record['rotation']).inverse)
if sensor_record['modality'] == 'camera' and not \
box_in_image(box, cam_intrinsic, imsize, vis_level=box_vis_level):
continue
box_list.append(box)
return data_path, box_list, cam_intrinsic
def lidiar_render(sample_token, data, out_path=None):
bbox_gt_list = []
bbox_pred_list = []
anns = nusc.get('sample', sample_token)['anns']
for ann in anns:
content = nusc.get('sample_annotation', ann)
try:
bbox_gt_list.append(DetectionBox(
sample_token=content['sample_token'],
translation=tuple(content['translation']),
size=tuple(content['size']),
rotation=tuple(content['rotation']),
velocity=nusc.box_velocity(content['token'])[:2],
ego_translation=(0.0, 0.0, 0.0) if 'ego_translation' not in content
else tuple(content['ego_translation']),
num_pts=-1 if 'num_pts' not in content else int(content['num_pts']),
detection_name=category_to_detection_name(content['category_name']),
detection_score=-1.0 if 'detection_score' not in content else float(content['detection_score']),
attribute_name=''))
except:
pass
bbox_anns = data['results'][sample_token]
for content in bbox_anns:
bbox_pred_list.append(DetectionBox(
sample_token=content['sample_token'],
translation=tuple(content['translation']),
size=tuple(content['size']),
rotation=tuple(content['rotation']),
velocity=tuple(content['velocity']),
ego_translation=(0.0, 0.0, 0.0) if 'ego_translation' not in content
else tuple(content['ego_translation']),
num_pts=-1 if 'num_pts' not in content else int(content['num_pts']),
detection_name=content['detection_name'],
detection_score=-1.0 if 'detection_score' not in content else float(content['detection_score']),
attribute_name=content['attribute_name']))
gt_annotations = EvalBoxes()
pred_annotations = EvalBoxes()
gt_annotations.add_boxes(sample_token, bbox_gt_list)
pred_annotations.add_boxes(sample_token, bbox_pred_list)
print('green is ground truth')
print('blue is the predited result')
visualize_sample(nusc, sample_token, gt_annotations, pred_annotations, savepath=out_path + '_bev')
def get_color(category_name: str):
"""
Provides the default colors based on the category names.
This method works for the general nuScenes categories, as well as the nuScenes detection categories.
"""
a = ['noise', 'animal', 'human.pedestrian.adult', 'human.pedestrian.child', 'human.pedestrian.construction_worker',
'human.pedestrian.personal_mobility', 'human.pedestrian.police_officer', 'human.pedestrian.stroller',
'human.pedestrian.wheelchair', 'movable_object.barrier', 'movable_object.debris',
'movable_object.pushable_pullable', 'movable_object.trafficcone', 'static_object.bicycle_rack',
'vehicle.bicycle',
'vehicle.bus.bendy', 'vehicle.bus.rigid', 'vehicle.car', 'vehicle.construction', 'vehicle.emergency.ambulance',
'vehicle.emergency.police', 'vehicle.motorcycle', 'vehicle.trailer', 'vehicle.truck', 'flat.driveable_surface',
'flat.other', 'flat.sidewalk', 'flat.terrain', 'static.manmade', 'static.other', 'static.vegetation',
'vehicle.ego']
class_names = [
'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
]
# print(category_name)
if category_name == 'bicycle':
return nusc.colormap['vehicle.bicycle']
elif category_name == 'construction_vehicle':
return nusc.colormap['vehicle.construction']
elif category_name == 'traffic_cone':
return nusc.colormap['movable_object.trafficcone']
for key in nusc.colormap.keys():
if category_name in key:
return nusc.colormap[key]
return [0, 0, 0]
def render_sample_data(
sample_toekn: str,
with_anns: bool = True,
box_vis_level: BoxVisibility = BoxVisibility.ANY,
axes_limit: float = 40,
ax=None,
nsweeps: int = 1,
out_path: str = None,
underlay_map: bool = True,
use_flat_vehicle_coordinates: bool = True,
show_lidarseg: bool = False,
show_lidarseg_legend: bool = False,
filter_lidarseg_labels=None,
lidarseg_preds_bin_path: str = None,
verbose: bool = True,
show_panoptic: bool = False,
pred_data=None,
) -> None:
"""
Render sample data onto axis.
:param sample_data_token: Sample_data token.
:param with_anns: Whether to draw box annotations.
:param box_vis_level: If sample_data is an image, this sets required visibility for boxes.
:param axes_limit: Axes limit for lidar and radar (measured in meters).
:param ax: Axes onto which to render.
:param nsweeps: Number of sweeps for lidar and radar.
:param out_path: Optional path to save the rendered figure to disk.
:param underlay_map: When set to true, lidar data is plotted onto the map. This can be slow.
:param use_flat_vehicle_coordinates: Instead of the current sensor's coordinate frame, use ego frame which is
aligned to z-plane in the world. Note: Previously this method did not use flat vehicle coordinates, which
can lead to small errors when the vertical axis of the global frame and lidar are not aligned. The new
setting is more correct and rotates the plot by ~90 degrees.
:param show_lidarseg: When set to True, the lidar data is colored with the segmentation labels. When set
to False, the colors of the lidar data represent the distance from the center of the ego vehicle.
:param show_lidarseg_legend: Whether to display the legend for the lidarseg labels in the frame.
:param filter_lidarseg_labels: Only show lidar points which belong to the given list of classes. If None
or the list is empty, all classes will be displayed.
:param lidarseg_preds_bin_path: A path to the .bin file which contains the user's lidar segmentation
predictions for the sample.
:param verbose: Whether to display the image after it is rendered.
:param show_panoptic: When set to True, the lidar data is colored with the panoptic labels. When set
to False, the colors of the lidar data represent the distance from the center of the ego vehicle.
If show_lidarseg is True, show_panoptic will be set to False.
"""
lidiar_render(sample_toekn, pred_data, out_path=out_path)
sample = nusc.get('sample', sample_toekn)
# sample = data['results'][sample_token_list[0]][0]
cams = [
'CAM_FRONT_LEFT',
'CAM_FRONT',
'CAM_FRONT_RIGHT',
'CAM_BACK_LEFT',
'CAM_BACK',
'CAM_BACK_RIGHT',
]
if ax is None:
_, ax = plt.subplots(4, 3, figsize=(24, 18))
j = 0
for ind, cam in enumerate(cams):
sample_data_token = sample['data'][cam]
sd_record = nusc.get('sample_data', sample_data_token)
sensor_modality = sd_record['sensor_modality']
if sensor_modality in ['lidar', 'radar']:
assert False
elif sensor_modality == 'camera':
# Load boxes and image.
boxes = [Box(record['translation'], record['size'], Quaternion(record['rotation']),
name=record['detection_name'], token='predicted') for record in
pred_data['results'][sample_toekn] if record['detection_score'] > 0.2]
data_path, boxes_pred, camera_intrinsic = get_predicted_data(sample_data_token,
box_vis_level=box_vis_level, pred_anns=boxes)
_, boxes_gt, _ = nusc.get_sample_data(sample_data_token, box_vis_level=box_vis_level)
if ind == 3:
j += 1
ind = ind % 3
data = Image.open(data_path)
# mmcv.imwrite(np.array(data)[:,:,::-1], f'{cam}.png')
# Init axes.
# Show image.
ax[j, ind].imshow(data)
ax[j + 2, ind].imshow(data)
# Show boxes.
if with_anns:
for box in boxes_pred:
c = np.array(get_color(box.name)) / 255.0
box.render(ax[j, ind], view=camera_intrinsic, normalize=True, colors=(c, c, c))
for box in boxes_gt:
c = np.array(get_color(box.name)) / 255.0
box.render(ax[j + 2, ind], view=camera_intrinsic, normalize=True, colors=(c, c, c))
# Limit visible range.
ax[j, ind].set_xlim(0, data.size[0])
ax[j, ind].set_ylim(data.size[1], 0)
ax[j + 2, ind].set_xlim(0, data.size[0])
ax[j + 2, ind].set_ylim(data.size[1], 0)
else:
raise ValueError('Error: Unknown sensor modality!')
ax[j, ind].axis('off')
ax[j, ind].set_title('PRED: {} {labels_type}'.format(
sd_record['channel'], labels_type='(predictions)' if lidarseg_preds_bin_path else ''))
ax[j, ind].set_aspect('equal')
ax[j + 2, ind].axis('off')
ax[j + 2, ind].set_title('GT:{} {labels_type}'.format(
sd_record['channel'], labels_type='(predictions)' if lidarseg_preds_bin_path else ''))
ax[j + 2, ind].set_aspect('equal')
if out_path is not None:
plt.savefig(out_path + '_camera', bbox_inches='tight', pad_inches=0, dpi=200)
if verbose:
plt.show()
plt.close()
if __name__ == '__main__':
nusc = NuScenes(version='v1.0-trainval', dataroot='./data/nuscenes', verbose=True)
# render_annotation('7603b030b42a4b1caa8c443ccc1a7d52')
bevformer_results = mmcv.load('test/bevformer_base/Thu_Jun__9_16_22_37_2022/pts_bbox/results_nusc.json')
sample_token_list = list(bevformer_results['results'].keys())
for id in range(0, 10):
render_sample_data(sample_token_list[id], pred_data=bevformer_results, out_path=sample_token_list[id])
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment