Commit 41b18fd8 authored by zhe chen's avatar zhe chen
Browse files

Use pre-commit to reformat code


Use pre-commit to reformat code
parent ff20ea39
...@@ -2,6 +2,7 @@ import math ...@@ -2,6 +2,7 @@ import math
import torch import torch
from torch.utils.data import DistributedSampler as _DistributedSampler from torch.utils.data import DistributedSampler as _DistributedSampler
from .sampler import SAMPLER from .sampler import SAMPLER
...@@ -33,9 +34,9 @@ class DistributedSampler(_DistributedSampler): ...@@ -33,9 +34,9 @@ class DistributedSampler(_DistributedSampler):
assert len(indices) == self.total_size assert len(indices) == self.total_size
# subsample # subsample
per_replicas = self.total_size//self.num_replicas per_replicas = self.total_size // self.num_replicas
# indices = indices[self.rank:self.total_size:self.num_replicas] # indices = indices[self.rank:self.total_size:self.num_replicas]
indices = indices[self.rank*per_replicas:(self.rank+1)*per_replicas] indices = indices[self.rank * per_replicas:(self.rank + 1) * per_replicas]
assert len(indices) == self.num_samples assert len(indices) == self.num_samples
return iter(indices) return iter(indices)
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
import math import math
...@@ -6,9 +5,8 @@ import numpy as np ...@@ -6,9 +5,8 @@ import numpy as np
import torch import torch
from mmcv.runner import get_dist_info from mmcv.runner import get_dist_info
from torch.utils.data import Sampler from torch.utils.data import Sampler
from .sampler import SAMPLER from .sampler import SAMPLER
import random
from IPython import embed
@SAMPLER.register_module() @SAMPLER.register_module()
...@@ -107,4 +105,3 @@ class DistributedGroupSampler(Sampler): ...@@ -107,4 +105,3 @@ class DistributedGroupSampler(Sampler):
def set_epoch(self, epoch): def set_epoch(self, epoch):
self.epoch = epoch self.epoch = epoch
from mmcv.utils.registry import Registry, build_from_cfg from mmcv.utils.registry import Registry, build_from_cfg
SAMPLER = Registry('sampler') SAMPLER = Registry('sampler')
def build_sampler(cfg, default_args): def build_sampler(cfg, default_args):
return build_from_cfg(cfg, SAMPLER, default_args) return build_from_cfg(cfg, SAMPLER, default_args)
from .vovnet import VoVNet from .vovnet import VoVNet
__all__ = ['VoVNet'] __all__ = ['VoVNet']
\ No newline at end of file
from collections import OrderedDict from collections import OrderedDict
from mmcv.runner import BaseModule
from mmdet.models.builder import BACKBONES
import torch import torch
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
from mmcv.runner import BaseModule
from mmdet.models.builder import BACKBONES
from torch.nn.modules.batchnorm import _BatchNorm from torch.nn.modules.batchnorm import _BatchNorm
VoVNet19_slim_dw_eSE = { VoVNet19_slim_dw_eSE = {
'stem': [64, 64, 64], 'stem': [64, 64, 64],
'stage_conv_ch': [64, 80, 96, 112], 'stage_conv_ch': [64, 80, 96, 112],
'stage_out_ch': [112, 256, 384, 512], 'stage_out_ch': [112, 256, 384, 512],
"layer_per_block": 3, 'layer_per_block': 3,
"block_per_stage": [1, 1, 1, 1], 'block_per_stage': [1, 1, 1, 1],
"eSE": True, 'eSE': True,
"dw": True 'dw': True
} }
VoVNet19_dw_eSE = { VoVNet19_dw_eSE = {
'stem': [64, 64, 64], 'stem': [64, 64, 64],
"stage_conv_ch": [128, 160, 192, 224], 'stage_conv_ch': [128, 160, 192, 224],
"stage_out_ch": [256, 512, 768, 1024], 'stage_out_ch': [256, 512, 768, 1024],
"layer_per_block": 3, 'layer_per_block': 3,
"block_per_stage": [1, 1, 1, 1], 'block_per_stage': [1, 1, 1, 1],
"eSE": True, 'eSE': True,
"dw": True 'dw': True
} }
VoVNet19_slim_eSE = { VoVNet19_slim_eSE = {
...@@ -35,57 +34,57 @@ VoVNet19_slim_eSE = { ...@@ -35,57 +34,57 @@ VoVNet19_slim_eSE = {
'layer_per_block': 3, 'layer_per_block': 3,
'block_per_stage': [1, 1, 1, 1], 'block_per_stage': [1, 1, 1, 1],
'eSE': True, 'eSE': True,
"dw": False 'dw': False
} }
VoVNet19_eSE = { VoVNet19_eSE = {
'stem': [64, 64, 128], 'stem': [64, 64, 128],
"stage_conv_ch": [128, 160, 192, 224], 'stage_conv_ch': [128, 160, 192, 224],
"stage_out_ch": [256, 512, 768, 1024], 'stage_out_ch': [256, 512, 768, 1024],
"layer_per_block": 3, 'layer_per_block': 3,
"block_per_stage": [1, 1, 1, 1], 'block_per_stage': [1, 1, 1, 1],
"eSE": True, 'eSE': True,
"dw": False 'dw': False
} }
VoVNet39_eSE = { VoVNet39_eSE = {
'stem': [64, 64, 128], 'stem': [64, 64, 128],
"stage_conv_ch": [128, 160, 192, 224], 'stage_conv_ch': [128, 160, 192, 224],
"stage_out_ch": [256, 512, 768, 1024], 'stage_out_ch': [256, 512, 768, 1024],
"layer_per_block": 5, 'layer_per_block': 5,
"block_per_stage": [1, 1, 2, 2], 'block_per_stage': [1, 1, 2, 2],
"eSE": True, 'eSE': True,
"dw": False 'dw': False
} }
VoVNet57_eSE = { VoVNet57_eSE = {
'stem': [64, 64, 128], 'stem': [64, 64, 128],
"stage_conv_ch": [128, 160, 192, 224], 'stage_conv_ch': [128, 160, 192, 224],
"stage_out_ch": [256, 512, 768, 1024], 'stage_out_ch': [256, 512, 768, 1024],
"layer_per_block": 5, 'layer_per_block': 5,
"block_per_stage": [1, 1, 4, 3], 'block_per_stage': [1, 1, 4, 3],
"eSE": True, 'eSE': True,
"dw": False 'dw': False
} }
VoVNet99_eSE = { VoVNet99_eSE = {
'stem': [64, 64, 128], 'stem': [64, 64, 128],
"stage_conv_ch": [128, 160, 192, 224], 'stage_conv_ch': [128, 160, 192, 224],
"stage_out_ch": [256, 512, 768, 1024], 'stage_out_ch': [256, 512, 768, 1024],
"layer_per_block": 5, 'layer_per_block': 5,
"block_per_stage": [1, 3, 9, 3], 'block_per_stage': [1, 3, 9, 3],
"eSE": True, 'eSE': True,
"dw": False 'dw': False
} }
_STAGE_SPECS = { _STAGE_SPECS = {
"V-19-slim-dw-eSE": VoVNet19_slim_dw_eSE, 'V-19-slim-dw-eSE': VoVNet19_slim_dw_eSE,
"V-19-dw-eSE": VoVNet19_dw_eSE, 'V-19-dw-eSE': VoVNet19_dw_eSE,
"V-19-slim-eSE": VoVNet19_slim_eSE, 'V-19-slim-eSE': VoVNet19_slim_eSE,
"V-19-eSE": VoVNet19_eSE, 'V-19-eSE': VoVNet19_eSE,
"V-39-eSE": VoVNet39_eSE, 'V-39-eSE': VoVNet39_eSE,
"V-57-eSE": VoVNet57_eSE, 'V-57-eSE': VoVNet57_eSE,
"V-99-eSE": VoVNet99_eSE, 'V-99-eSE': VoVNet99_eSE,
} }
...@@ -117,7 +116,7 @@ def conv3x3(in_channels, out_channels, module_name, postfix, stride=1, groups=1, ...@@ -117,7 +116,7 @@ def conv3x3(in_channels, out_channels, module_name, postfix, stride=1, groups=1,
"""3x3 convolution with padding""" """3x3 convolution with padding"""
return [ return [
( (
f"{module_name}_{postfix}/conv", f'{module_name}_{postfix}/conv',
nn.Conv2d( nn.Conv2d(
in_channels, in_channels,
out_channels, out_channels,
...@@ -128,8 +127,8 @@ def conv3x3(in_channels, out_channels, module_name, postfix, stride=1, groups=1, ...@@ -128,8 +127,8 @@ def conv3x3(in_channels, out_channels, module_name, postfix, stride=1, groups=1,
bias=False, bias=False,
), ),
), ),
(f"{module_name}_{postfix}/norm", nn.BatchNorm2d(out_channels)), (f'{module_name}_{postfix}/norm', nn.BatchNorm2d(out_channels)),
(f"{module_name}_{postfix}/relu", nn.ReLU(inplace=True)), (f'{module_name}_{postfix}/relu', nn.ReLU(inplace=True)),
] ]
...@@ -137,7 +136,7 @@ def conv1x1(in_channels, out_channels, module_name, postfix, stride=1, groups=1, ...@@ -137,7 +136,7 @@ def conv1x1(in_channels, out_channels, module_name, postfix, stride=1, groups=1,
"""1x1 convolution with padding""" """1x1 convolution with padding"""
return [ return [
( (
f"{module_name}_{postfix}/conv", f'{module_name}_{postfix}/conv',
nn.Conv2d( nn.Conv2d(
in_channels, in_channels,
out_channels, out_channels,
...@@ -148,8 +147,8 @@ def conv1x1(in_channels, out_channels, module_name, postfix, stride=1, groups=1, ...@@ -148,8 +147,8 @@ def conv1x1(in_channels, out_channels, module_name, postfix, stride=1, groups=1,
bias=False, bias=False,
), ),
), ),
(f"{module_name}_{postfix}/norm", nn.BatchNorm2d(out_channels)), (f'{module_name}_{postfix}/norm', nn.BatchNorm2d(out_channels)),
(f"{module_name}_{postfix}/relu", nn.ReLU(inplace=True)), (f'{module_name}_{postfix}/relu', nn.ReLU(inplace=True)),
] ]
...@@ -179,7 +178,7 @@ class eSEModule(nn.Module): ...@@ -179,7 +178,7 @@ class eSEModule(nn.Module):
class _OSA_module(nn.Module): class _OSA_module(nn.Module):
def __init__( def __init__(
self, in_ch, stage_ch, concat_ch, layer_per_block, module_name, SE=False, identity=False, depthwise=False self, in_ch, stage_ch, concat_ch, layer_per_block, module_name, SE=False, identity=False, depthwise=False
): ):
super(_OSA_module, self).__init__() super(_OSA_module, self).__init__()
...@@ -192,7 +191,7 @@ class _OSA_module(nn.Module): ...@@ -192,7 +191,7 @@ class _OSA_module(nn.Module):
if self.depthwise and in_channel != stage_ch: if self.depthwise and in_channel != stage_ch:
self.isReduced = True self.isReduced = True
self.conv_reduction = nn.Sequential( self.conv_reduction = nn.Sequential(
OrderedDict(conv1x1(in_channel, stage_ch, "{}_reduction".format(module_name), "0")) OrderedDict(conv1x1(in_channel, stage_ch, '{}_reduction'.format(module_name), '0'))
) )
for i in range(layer_per_block): for i in range(layer_per_block):
if self.depthwise: if self.depthwise:
...@@ -203,7 +202,7 @@ class _OSA_module(nn.Module): ...@@ -203,7 +202,7 @@ class _OSA_module(nn.Module):
# feature aggregation # feature aggregation
in_channel = in_ch + layer_per_block * stage_ch in_channel = in_ch + layer_per_block * stage_ch
self.concat = nn.Sequential(OrderedDict(conv1x1(in_channel, concat_ch, module_name, "concat"))) self.concat = nn.Sequential(OrderedDict(conv1x1(in_channel, concat_ch, module_name, 'concat')))
self.ese = eSEModule(concat_ch) self.ese = eSEModule(concat_ch)
...@@ -232,24 +231,24 @@ class _OSA_module(nn.Module): ...@@ -232,24 +231,24 @@ class _OSA_module(nn.Module):
class _OSA_stage(nn.Sequential): class _OSA_stage(nn.Sequential):
def __init__( def __init__(
self, in_ch, stage_ch, concat_ch, block_per_stage, layer_per_block, stage_num, SE=False, depthwise=False self, in_ch, stage_ch, concat_ch, block_per_stage, layer_per_block, stage_num, SE=False, depthwise=False
): ):
super(_OSA_stage, self).__init__() super(_OSA_stage, self).__init__()
if not stage_num == 2: if not stage_num == 2:
self.add_module("Pooling", nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)) self.add_module('Pooling', nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True))
if block_per_stage != 1: if block_per_stage != 1:
SE = False SE = False
module_name = f"OSA{stage_num}_1" module_name = f'OSA{stage_num}_1'
self.add_module( self.add_module(
module_name, _OSA_module(in_ch, stage_ch, concat_ch, layer_per_block, module_name, SE, depthwise=depthwise) module_name, _OSA_module(in_ch, stage_ch, concat_ch, layer_per_block, module_name, SE, depthwise=depthwise)
) )
for i in range(block_per_stage - 1): for i in range(block_per_stage - 1):
if i != block_per_stage - 2: # last block if i != block_per_stage - 2: # last block
SE = False SE = False
module_name = f"OSA{stage_num}_{i + 2}" module_name = f'OSA{stage_num}_{i + 2}'
self.add_module( self.add_module(
module_name, module_name,
_OSA_module( _OSA_module(
...@@ -267,7 +266,7 @@ class _OSA_stage(nn.Sequential): ...@@ -267,7 +266,7 @@ class _OSA_stage(nn.Sequential):
@BACKBONES.register_module() @BACKBONES.register_module()
class VoVNet(BaseModule): class VoVNet(BaseModule):
def __init__(self, spec_name, input_ch=3, out_features=None, def __init__(self, spec_name, input_ch=3, out_features=None,
frozen_stages=-1, norm_eval=True, pretrained=None, init_cfg=None): frozen_stages=-1, norm_eval=True, pretrained=None, init_cfg=None):
""" """
Args: Args:
...@@ -285,32 +284,32 @@ class VoVNet(BaseModule): ...@@ -285,32 +284,32 @@ class VoVNet(BaseModule):
self.init_cfg = dict(type='Pretrained', checkpoint=pretrained) self.init_cfg = dict(type='Pretrained', checkpoint=pretrained)
stage_specs = _STAGE_SPECS[spec_name] stage_specs = _STAGE_SPECS[spec_name]
stem_ch = stage_specs["stem"] stem_ch = stage_specs['stem']
config_stage_ch = stage_specs["stage_conv_ch"] config_stage_ch = stage_specs['stage_conv_ch']
config_concat_ch = stage_specs["stage_out_ch"] config_concat_ch = stage_specs['stage_out_ch']
block_per_stage = stage_specs["block_per_stage"] block_per_stage = stage_specs['block_per_stage']
layer_per_block = stage_specs["layer_per_block"] layer_per_block = stage_specs['layer_per_block']
SE = stage_specs["eSE"] SE = stage_specs['eSE']
depthwise = stage_specs["dw"] depthwise = stage_specs['dw']
self._out_features = out_features self._out_features = out_features
# Stem module # Stem module
conv_type = dw_conv3x3 if depthwise else conv3x3 conv_type = dw_conv3x3 if depthwise else conv3x3
stem = conv3x3(input_ch, stem_ch[0], "stem", "1", 2) stem = conv3x3(input_ch, stem_ch[0], 'stem', '1', 2)
stem += conv_type(stem_ch[0], stem_ch[1], "stem", "2", 1) stem += conv_type(stem_ch[0], stem_ch[1], 'stem', '2', 1)
stem += conv_type(stem_ch[1], stem_ch[2], "stem", "3", 2) stem += conv_type(stem_ch[1], stem_ch[2], 'stem', '3', 2)
self.add_module("stem", nn.Sequential((OrderedDict(stem)))) self.add_module('stem', nn.Sequential((OrderedDict(stem))))
current_stirde = 4 current_stirde = 4
self._out_feature_strides = {"stem": current_stirde, "stage2": current_stirde} self._out_feature_strides = {'stem': current_stirde, 'stage2': current_stirde}
self._out_feature_channels = {"stem": stem_ch[2]} self._out_feature_channels = {'stem': stem_ch[2]}
stem_out_ch = [stem_ch[2]] stem_out_ch = [stem_ch[2]]
in_ch_list = stem_out_ch + config_concat_ch[:-1] in_ch_list = stem_out_ch + config_concat_ch[:-1]
# OSA stages # OSA stages
self.stage_names = [] self.stage_names = []
for i in range(4): # num_stages for i in range(4): # num_stages
name = "stage%d" % (i + 2) # stage 2 ... stage 5 name = 'stage%d' % (i + 2) # stage 2 ... stage 5
self.stage_names.append(name) self.stage_names.append(name)
self.add_module( self.add_module(
name, name,
...@@ -341,8 +340,8 @@ class VoVNet(BaseModule): ...@@ -341,8 +340,8 @@ class VoVNet(BaseModule):
def forward(self, x): def forward(self, x):
outputs = {} outputs = {}
x = self.stem(x) x = self.stem(x)
if "stem" in self._out_features: if 'stem' in self._out_features:
outputs["stem"] = x outputs['stem'] = x
for name in self.stage_names: for name in self.stage_names:
x = getattr(self, name)(x) x = getattr(self, name)(x)
if name in self._out_features: if name in self._out_features:
...@@ -358,7 +357,7 @@ class VoVNet(BaseModule): ...@@ -358,7 +357,7 @@ class VoVNet(BaseModule):
param.requires_grad = False param.requires_grad = False
for i in range(1, self.frozen_stages + 1): for i in range(1, self.frozen_stages + 1):
m = getattr(self, f'stage{i+1}') m = getattr(self, f'stage{i + 1}')
m.eval() m.eval()
for param in m.parameters(): for param in m.parameters():
param.requires_grad = False param.requires_grad = False
...@@ -372,4 +371,4 @@ class VoVNet(BaseModule): ...@@ -372,4 +371,4 @@ class VoVNet(BaseModule):
for m in self.modules(): for m in self.modules():
# trick: eval have effect on BatchNorm only # trick: eval have effect on BatchNorm only
if isinstance(m, _BatchNorm): if isinstance(m, _BatchNorm):
m.eval() m.eval()
\ No newline at end of file
from .hooks import GradChecker from .hooks import GradChecker
\ No newline at end of file
from mmcv.runner.hooks.hook import HOOKS, Hook from mmcv.runner.hooks.hook import HOOKS, Hook
from projects.mmdet3d_plugin.models.utils import run_time
@HOOKS.register_module() @HOOKS.register_module()
...@@ -7,7 +6,5 @@ class GradChecker(Hook): ...@@ -7,7 +6,5 @@ class GradChecker(Hook):
def after_train_iter(self, runner): def after_train_iter(self, runner):
for key, val in runner.model.named_parameters(): for key, val in runner.model.named_parameters():
if val.grad == None and val.requires_grad: if val.grad is None and val.requires_grad:
print('WARNNING: {key}\'s parameters are not be used!!!!'.format(key=key)) print('WARNNING: {key}\'s parameters are not be used!!!!'.format(key=key))
from .adamw import AdamW2 from .adamw import AdamW2
\ No newline at end of file
...@@ -4,8 +4,9 @@ except: ...@@ -4,8 +4,9 @@ except:
print('WARNING!!!, I recommend using torch>=1.8') print('WARNING!!!, I recommend using torch>=1.8')
import torch import torch
from torch.optim.optimizer import Optimizer
from mmcv.runner.optimizer.builder import OPTIMIZERS from mmcv.runner.optimizer.builder import OPTIMIZERS
from torch.optim.optimizer import Optimizer
@OPTIMIZERS.register_module() @OPTIMIZERS.register_module()
class AdamW2(Optimizer): class AdamW2(Optimizer):
...@@ -38,15 +39,15 @@ class AdamW2(Optimizer): ...@@ -38,15 +39,15 @@ class AdamW2(Optimizer):
def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8,
weight_decay=1e-2, amsgrad=False): weight_decay=1e-2, amsgrad=False):
if not 0.0 <= lr: if not 0.0 <= lr:
raise ValueError("Invalid learning rate: {}".format(lr)) raise ValueError('Invalid learning rate: {}'.format(lr))
if not 0.0 <= eps: if not 0.0 <= eps:
raise ValueError("Invalid epsilon value: {}".format(eps)) raise ValueError('Invalid epsilon value: {}'.format(eps))
if not 0.0 <= betas[0] < 1.0: if not 0.0 <= betas[0] < 1.0:
raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0])) raise ValueError('Invalid beta parameter at index 0: {}'.format(betas[0]))
if not 0.0 <= betas[1] < 1.0: if not 0.0 <= betas[1] < 1.0:
raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1])) raise ValueError('Invalid beta parameter at index 1: {}'.format(betas[1]))
if not 0.0 <= weight_decay: if not 0.0 <= weight_decay:
raise ValueError("Invalid weight_decay value: {}".format(weight_decay)) raise ValueError('Invalid weight_decay value: {}'.format(weight_decay))
defaults = dict(lr=lr, betas=betas, eps=eps, defaults = dict(lr=lr, betas=betas, eps=eps,
weight_decay=weight_decay, amsgrad=amsgrad) weight_decay=weight_decay, amsgrad=amsgrad)
super(AdamW2, self).__init__(params, defaults) super(AdamW2, self).__init__(params, defaults)
...@@ -109,7 +110,6 @@ class AdamW2(Optimizer): ...@@ -109,7 +110,6 @@ class AdamW2(Optimizer):
if amsgrad: if amsgrad:
max_exp_avg_sqs.append(state['max_exp_avg_sq']) max_exp_avg_sqs.append(state['max_exp_avg_sq'])
# update the steps for each param group update # update the steps for each param group update
state['step'] += 1 state['step'] += 1
# record the step after step update # record the step after step update
...@@ -128,4 +128,4 @@ class AdamW2(Optimizer): ...@@ -128,4 +128,4 @@ class AdamW2(Optimizer):
group['weight_decay'], group['weight_decay'],
group['eps']) group['eps'])
return loss return loss
\ No newline at end of file
from .bricks import run_time from .bricks import run_time
from .grid_mask import GridMask from .grid_mask import GridMask
from .position_embedding import RelPositionEmbedding from .position_embedding import RelPositionEmbedding
from .positional_encoding import LearnedPositionalEncoding3D
from .visual import save_tensor from .visual import save_tensor
from .positional_encoding import LearnedPositionalEncoding3D
\ No newline at end of file
import functools
import time import time
from collections import defaultdict from collections import defaultdict
import torch import torch
time_maps = defaultdict(lambda :0.)
count_maps = defaultdict(lambda :0.) time_maps = defaultdict(lambda: 0.)
count_maps = defaultdict(lambda: 0.)
def run_time(name): def run_time(name):
def middle(fn): def middle(fn):
def wrapper(*args, **kwargs): def wrapper(*args, **kwargs):
...@@ -11,10 +14,12 @@ def run_time(name): ...@@ -11,10 +14,12 @@ def run_time(name):
start = time.time() start = time.time()
res = fn(*args, **kwargs) res = fn(*args, **kwargs)
torch.cuda.synchronize() torch.cuda.synchronize()
time_maps['%s : %s'%(name, fn.__name__) ] += time.time()-start time_maps['%s : %s' % (name, fn.__name__)] += time.time() - start
count_maps['%s : %s'%(name, fn.__name__) ] +=1 count_maps['%s : %s' % (name, fn.__name__)] += 1
print("%s : %s takes up %f "% (name, fn.__name__,time_maps['%s : %s'%(name, fn.__name__) ] /count_maps['%s : %s'%(name, fn.__name__) ] )) print('%s : %s takes up %f ' % (name, fn.__name__, time_maps['%s : %s' % (name, fn.__name__)] / count_maps[
'%s : %s' % (name, fn.__name__)]))
return res return res
return wrapper return wrapper
return middle return middle
\ No newline at end of file
import numpy as np
import torch import torch
import torch.nn as nn import torch.nn as nn
import numpy as np from mmcv.runner import auto_fp16
from PIL import Image from PIL import Image
from mmcv.runner import force_fp32, auto_fp16
class Grid(object): class Grid(object):
def __init__(self, use_h, use_w, rotate = 1, offset=False, ratio = 0.5, mode=0, prob = 1.): def __init__(self, use_h, use_w, rotate=1, offset=False, ratio=0.5, mode=0, prob=1.):
self.use_h = use_h self.use_h = use_h
self.use_w = use_w self.use_w = use_w
self.rotate = rotate self.rotate = rotate
self.offset = offset self.offset = offset
self.ratio = ratio self.ratio = ratio
self.mode=mode self.mode = mode
self.st_prob = prob self.st_prob = prob
self.prob = prob self.prob = prob
...@@ -25,50 +26,50 @@ class Grid(object): ...@@ -25,50 +26,50 @@ class Grid(object):
w = img.size(2) w = img.size(2)
self.d1 = 2 self.d1 = 2
self.d2 = min(h, w) self.d2 = min(h, w)
hh = int(1.5*h) hh = int(1.5 * h)
ww = int(1.5*w) ww = int(1.5 * w)
d = np.random.randint(self.d1, self.d2) d = np.random.randint(self.d1, self.d2)
if self.ratio == 1: if self.ratio == 1:
self.l = np.random.randint(1, d) self.l = np.random.randint(1, d)
else: else:
self.l = min(max(int(d*self.ratio+0.5),1),d-1) self.l = min(max(int(d * self.ratio + 0.5), 1), d - 1)
mask = np.ones((hh, ww), np.float32) mask = np.ones((hh, ww), np.float32)
st_h = np.random.randint(d) st_h = np.random.randint(d)
st_w = np.random.randint(d) st_w = np.random.randint(d)
if self.use_h: if self.use_h:
for i in range(hh//d): for i in range(hh // d):
s = d*i + st_h s = d * i + st_h
t = min(s+self.l, hh) t = min(s + self.l, hh)
mask[s:t,:] *= 0 mask[s:t, :] *= 0
if self.use_w: if self.use_w:
for i in range(ww//d): for i in range(ww // d):
s = d*i + st_w s = d * i + st_w
t = min(s+self.l, ww) t = min(s + self.l, ww)
mask[:,s:t] *= 0 mask[:, s:t] *= 0
r = np.random.randint(self.rotate) r = np.random.randint(self.rotate)
mask = Image.fromarray(np.uint8(mask)) mask = Image.fromarray(np.uint8(mask))
mask = mask.rotate(r) mask = mask.rotate(r)
mask = np.asarray(mask) mask = np.asarray(mask)
mask = mask[(hh-h)//2:(hh-h)//2+h, (ww-w)//2:(ww-w)//2+w] mask = mask[(hh - h) // 2:(hh - h) // 2 + h, (ww - w) // 2:(ww - w) // 2 + w]
mask = torch.from_numpy(mask).float() mask = torch.from_numpy(mask).float()
if self.mode == 1: if self.mode == 1:
mask = 1-mask mask = 1 - mask
mask = mask.expand_as(img) mask = mask.expand_as(img)
if self.offset: if self.offset:
offset = torch.from_numpy(2 * (np.random.rand(h,w) - 0.5)).float() offset = torch.from_numpy(2 * (np.random.rand(h, w) - 0.5)).float()
offset = (1 - mask) * offset offset = (1 - mask) * offset
img = img * mask + offset img = img * mask + offset
else: else:
img = img * mask img = img * mask
return img, label return img, label
class GridMask(nn.Module): class GridMask(nn.Module):
def __init__(self, use_h, use_w, rotate = 1, offset=False, ratio = 0.5, mode=0, prob = 1.): def __init__(self, use_h, use_w, rotate=1, offset=False, ratio=0.5, mode=0, prob=1.):
super(GridMask, self).__init__() super(GridMask, self).__init__()
self.use_h = use_h self.use_h = use_h
self.use_w = use_w self.use_w = use_w
...@@ -79,46 +80,48 @@ class GridMask(nn.Module): ...@@ -79,46 +80,48 @@ class GridMask(nn.Module):
self.st_prob = prob self.st_prob = prob
self.prob = prob self.prob = prob
self.fp16_enable = False self.fp16_enable = False
def set_prob(self, epoch, max_epoch): def set_prob(self, epoch, max_epoch):
self.prob = self.st_prob * epoch / max_epoch #+ 1.#0.5 self.prob = self.st_prob * epoch / max_epoch # + 1.#0.5
@auto_fp16() @auto_fp16()
def forward(self, x): def forward(self, x):
if np.random.rand() > self.prob or not self.training: if np.random.rand() > self.prob or not self.training:
return x return x
n,c,h,w = x.size() n, c, h, w = x.size()
x = x.view(-1,h,w) x = x.view(-1, h, w)
hh = int(1.5*h) hh = int(1.5 * h)
ww = int(1.5*w) ww = int(1.5 * w)
d = np.random.randint(2, h) d = np.random.randint(2, h)
self.l = min(max(int(d*self.ratio+0.5),1),d-1) self.l = min(max(int(d * self.ratio + 0.5), 1), d - 1)
mask = np.ones((hh, ww), np.float32) mask = np.ones((hh, ww), np.float32)
st_h = np.random.randint(d) st_h = np.random.randint(d)
st_w = np.random.randint(d) st_w = np.random.randint(d)
if self.use_h: if self.use_h:
for i in range(hh//d): for i in range(hh // d):
s = d*i + st_h s = d * i + st_h
t = min(s+self.l, hh) t = min(s + self.l, hh)
mask[s:t,:] *= 0 mask[s:t, :] *= 0
if self.use_w: if self.use_w:
for i in range(ww//d): for i in range(ww // d):
s = d*i + st_w s = d * i + st_w
t = min(s+self.l, ww) t = min(s + self.l, ww)
mask[:,s:t] *= 0 mask[:, s:t] *= 0
r = np.random.randint(self.rotate) r = np.random.randint(self.rotate)
mask = Image.fromarray(np.uint8(mask)) mask = Image.fromarray(np.uint8(mask))
mask = mask.rotate(r) mask = mask.rotate(r)
mask = np.asarray(mask) mask = np.asarray(mask)
mask = mask[(hh-h)//2:(hh-h)//2+h, (ww-w)//2:(ww-w)//2+w] mask = mask[(hh - h) // 2:(hh - h) // 2 + h, (ww - w) // 2:(ww - w) // 2 + w]
mask = torch.from_numpy(mask).to(x.dtype).cuda() mask = torch.from_numpy(mask).to(x.dtype).cuda()
if self.mode == 1: if self.mode == 1:
mask = 1-mask mask = 1 - mask
mask = mask.expand_as(x) mask = mask.expand_as(x)
if self.offset: if self.offset:
offset = torch.from_numpy(2 * (np.random.rand(h,w) - 0.5)).to(x.dtype).cuda() offset = torch.from_numpy(2 * (np.random.rand(h, w) - 0.5)).to(x.dtype).cuda()
x = x * mask + offset * (1 - mask) x = x * mask + offset * (1 - mask)
else: else:
x = x * mask x = x * mask
return x.view(n,c,h,w) return x.view(n, c, h, w)
\ No newline at end of file
import math
import torch import torch
import torch.nn as nn import torch.nn as nn
import math
class RelPositionEmbedding(nn.Module): class RelPositionEmbedding(nn.Module):
def __init__(self, num_pos_feats=64, pos_norm=True): def __init__(self, num_pos_feats=64, pos_norm=True):
super().__init__() super().__init__()
self.num_pos_feats = num_pos_feats self.num_pos_feats = num_pos_feats
self.fc = nn.Linear(4, self.num_pos_feats,bias=False) self.fc = nn.Linear(4, self.num_pos_feats, bias=False)
#nn.init.orthogonal_(self.fc.weight) # nn.init.orthogonal_(self.fc.weight)
#self.fc.weight.requires_grad = False # self.fc.weight.requires_grad = False
self.pos_norm = pos_norm self.pos_norm = pos_norm
if self.pos_norm: if self.pos_norm:
self.norm = nn.LayerNorm(self.num_pos_feats) self.norm = nn.LayerNorm(self.num_pos_feats)
def forward(self, tensor): def forward(self, tensor):
#mask = nesttensor.mask # mask = nesttensor.mask
B,C,H,W = tensor.shape B, C, H, W = tensor.shape
#print('tensor.shape', tensor.shape) # print('tensor.shape', tensor.shape)
y_range = (torch.arange(H) / float(H - 1)).to(tensor.device) y_range = (torch.arange(H) / float(H - 1)).to(tensor.device)
#y_axis = torch.stack((y_range, 1-y_range),dim=1) # y_axis = torch.stack((y_range, 1-y_range),dim=1)
y_axis = torch.stack((torch.cos(y_range * math.pi), torch.sin(y_range * math.pi)), dim=1) y_axis = torch.stack((torch.cos(y_range * math.pi), torch.sin(y_range * math.pi)), dim=1)
y_axis = y_axis.reshape(H, 1, 2).repeat(1, W, 1).reshape(H * W, 2) y_axis = y_axis.reshape(H, 1, 2).repeat(1, W, 1).reshape(H * W, 2)
x_range = (torch.arange(W) / float(W - 1)).to(tensor.device) x_range = (torch.arange(W) / float(W - 1)).to(tensor.device)
#x_axis =torch.stack((x_range,1-x_range),dim=1) # x_axis =torch.stack((x_range,1-x_range),dim=1)
x_axis = torch.stack((torch.cos(x_range * math.pi), torch.sin(x_range * math.pi)), dim=1) x_axis = torch.stack((torch.cos(x_range * math.pi), torch.sin(x_range * math.pi)), dim=1)
x_axis = x_axis.reshape(1, W, 2).repeat(H, 1, 1).reshape(H * W, 2) x_axis = x_axis.reshape(1, W, 2).repeat(H, 1, 1).reshape(H * W, 2)
x_pos = torch.cat((y_axis, x_axis), dim=1) x_pos = torch.cat((y_axis, x_axis), dim=1)
...@@ -30,5 +33,5 @@ class RelPositionEmbedding(nn.Module): ...@@ -30,5 +33,5 @@ class RelPositionEmbedding(nn.Module):
if self.pos_norm: if self.pos_norm:
x_pos = self.norm(x_pos) x_pos = self.norm(x_pos)
#print('xpos,', x_pos.max(),x_pos.min()) # print('xpos,', x_pos.max(),x_pos.min())
return x_pos return x_pos
\ No newline at end of file
import math import torch
import torch.nn as nn
import torch from mmcv.cnn.bricks.transformer import POSITIONAL_ENCODING
import torch.nn as nn from mmcv.runner import BaseModule
from mmcv.cnn.bricks.transformer import POSITIONAL_ENCODING
from mmcv.runner import BaseModule
@POSITIONAL_ENCODING.register_module()
class LearnedPositionalEncoding3D(BaseModule):
@POSITIONAL_ENCODING.register_module() """Position embedding with learnable embedding weights.
class LearnedPositionalEncoding3D(BaseModule):
"""Position embedding with learnable embedding weights. Args:
num_feats (int): The feature dimension for each position
Args: along x-axis or y-axis. The final returned dimension for
num_feats (int): The feature dimension for each position each position is 2 times of this value.
along x-axis or y-axis. The final returned dimension for row_num_embed (int, optional): The dictionary size of row embeddings.
each position is 2 times of this value. Default 50.
row_num_embed (int, optional): The dictionary size of row embeddings. col_num_embed (int, optional): The dictionary size of col embeddings.
Default 50. Default 50.
col_num_embed (int, optional): The dictionary size of col embeddings. init_cfg (dict or list[dict], optional): Initialization config dict.
Default 50. """
init_cfg (dict or list[dict], optional): Initialization config dict.
""" def __init__(self,
num_feats,
def __init__(self, row_num_embed=50,
num_feats, col_num_embed=50,
row_num_embed=50, height_num_embed=50,
col_num_embed=50, init_cfg=dict(type='Uniform', layer='Embedding')):
height_num_embed=50, super(LearnedPositionalEncoding3D, self).__init__(init_cfg)
init_cfg=dict(type='Uniform', layer='Embedding')): self.row_embed = nn.Embedding(row_num_embed, num_feats)
super(LearnedPositionalEncoding3D, self).__init__(init_cfg) self.col_embed = nn.Embedding(col_num_embed, num_feats)
self.row_embed = nn.Embedding(row_num_embed, num_feats) self.height_embed = nn.Embedding(height_num_embed, num_feats)
self.col_embed = nn.Embedding(col_num_embed, num_feats) self.num_feats = num_feats
self.height_embed = nn.Embedding(height_num_embed, num_feats) self.row_num_embed = row_num_embed
self.num_feats = num_feats self.col_num_embed = col_num_embed
self.row_num_embed = row_num_embed self.height_num_embed = height_num_embed
self.col_num_embed = col_num_embed
self.height_num_embed = height_num_embed def forward(self, mask):
"""Forward function for `LearnedPositionalEncoding`.
def forward(self, mask):
"""Forward function for `LearnedPositionalEncoding`. Args:
mask (Tensor): ByteTensor mask. Non-zero values representing
Args: ignored positions, while zero values means valid positions
mask (Tensor): ByteTensor mask. Non-zero values representing for this image. Shape [bs, h, w].
ignored positions, while zero values means valid positions
for this image. Shape [bs, h, w]. Returns:
pos (Tensor): Returned position embedding with shape
Returns: [bs, num_feats*2, h, w].
pos (Tensor): Returned position embedding with shape """
[bs, num_feats*2, h, w]. l, h, w = mask.shape[-3:]
""" x = torch.arange(w, device=mask.device)
l, h, w = mask.shape[-3:] y = torch.arange(h, device=mask.device)
x = torch.arange(w, device=mask.device) z = torch.arange(l, device=mask.device)
y = torch.arange(h, device=mask.device) x_embed = self.col_embed(x)
z = torch.arange(l, device=mask.device) y_embed = self.row_embed(y)
x_embed = self.col_embed(x) z_embed = self.height_embed(z)
y_embed = self.row_embed(y) pos = torch.cat(
z_embed = self.height_embed(z) (x_embed.unsqueeze(0).unsqueeze(0).repeat(l, h, 1, 1),
pos = torch.cat( y_embed.unsqueeze(1).unsqueeze(0).repeat(l, 1, w, 1),
(x_embed.unsqueeze(0).unsqueeze(0).repeat(l, h, 1, 1), z_embed.unsqueeze(1).unsqueeze(1).repeat(1, h, w, 1)), dim=-1).permute(3, 0, 1, 2).unsqueeze(0).repeat(
y_embed.unsqueeze(1).unsqueeze(0).repeat(l, 1, w, 1), mask.shape[0], 1, 1, 1, 1)
z_embed.unsqueeze(1).unsqueeze(1).repeat(1, h, w, 1)),dim=-1).permute(3, 0, 1, 2).unsqueeze(0).repeat(mask.shape[0],1, 1, 1, 1) return pos
return pos
def __repr__(self):
def __repr__(self): """str: a string that describes the module"""
"""str: a string that describes the module""" repr_str = self.__class__.__name__
repr_str = self.__class__.__name__ repr_str += f'(num_feats={self.num_feats}, '
repr_str += f'(num_feats={self.num_feats}, ' repr_str += f'row_num_embed={self.row_num_embed}, '
repr_str += f'row_num_embed={self.row_num_embed}, ' repr_str += f'col_num_embed={self.col_num_embed})'
repr_str += f'col_num_embed={self.col_num_embed})' repr_str += f'height_num_embed={self.height_num_embed})'
repr_str += f'height_num_embed={self.height_num_embed})' return repr_str
return repr_str
import cv2
import matplotlib.pyplot as plt
import torch import torch
from torchvision.utils import make_grid
import torchvision import torchvision
import matplotlib.pyplot as plt from torchvision.utils import make_grid
import cv2
def convert_color(img_path): def convert_color(img_path):
...@@ -12,11 +12,11 @@ def convert_color(img_path): ...@@ -12,11 +12,11 @@ def convert_color(img_path):
plt.close() plt.close()
def save_tensor(tensor, path, pad_value=254.0,): def save_tensor(tensor, path, pad_value=254.0, ):
print('save_tensor', path) print('save_tensor', path)
tensor = tensor.to(torch.float).detach().cpu() tensor = tensor.to(torch.float).detach().cpu()
if tensor.type() == 'torch.BoolTensor': if tensor.type() == 'torch.BoolTensor':
tensor = tensor*255 tensor = tensor * 255
if len(tensor.shape) == 3: if len(tensor.shape) == 3:
tensor = tensor.unsqueeze(1) tensor = tensor.unsqueeze(1)
tensor = make_grid(tensor, pad_value=pad_value, normalize=False).permute(1, 2, 0).numpy().copy() tensor = make_grid(tensor, pad_value=pad_value, normalize=False).permute(1, 2, 0).numpy().copy()
......
...@@ -3,31 +3,30 @@ ...@@ -3,31 +3,30 @@
# --------------------------------------------- # ---------------------------------------------
# Modified by Zhiqi Li # Modified by Zhiqi Li
# --------------------------------------------- # ---------------------------------------------
from __future__ import division from __future__ import division
import argparse import argparse
import copy import copy
import mmcv
import os import os
import time import time
import torch
import warnings import warnings
from mmcv import Config, DictAction
from mmcv.runner import get_dist_info, init_dist
from os import path as osp from os import path as osp
import mmcv
import torch
from mmcv import Config, DictAction
from mmcv.runner import get_dist_info, init_dist
from mmcv.utils import TORCH_VERSION, digit_version
from mmdet import __version__ as mmdet_version from mmdet import __version__ as mmdet_version
from mmdet3d import __version__ as mmdet3d_version from mmdet3d import __version__ as mmdet3d_version
#from mmdet3d.apis import train_model
from mmdet3d.datasets import build_dataset from mmdet3d.datasets import build_dataset
from mmdet3d.models import build_model from mmdet3d.models import build_model
from mmdet3d.utils import collect_env, get_root_logger from mmdet3d.utils import collect_env, get_root_logger
from mmdet.apis import set_random_seed from mmdet.apis import set_random_seed
from mmseg import __version__ as mmseg_version from mmseg import __version__ as mmseg_version
from mmcv.utils import TORCH_VERSION, digit_version # from mmdet3d.apis import train_model
def parse_args(): def parse_args():
...@@ -45,13 +44,13 @@ def parse_args(): ...@@ -45,13 +44,13 @@ def parse_args():
'--gpus', '--gpus',
type=int, type=int,
help='number of gpus to use ' help='number of gpus to use '
'(only applicable to non-distributed training)') '(only applicable to non-distributed training)')
group_gpus.add_argument( group_gpus.add_argument(
'--gpu-ids', '--gpu-ids',
type=int, type=int,
nargs='+', nargs='+',
help='ids of gpus to use ' help='ids of gpus to use '
'(only applicable to non-distributed training)') '(only applicable to non-distributed training)')
parser.add_argument('--seed', type=int, default=0, help='random seed') parser.add_argument('--seed', type=int, default=0, help='random seed')
parser.add_argument( parser.add_argument(
'--deterministic', '--deterministic',
...@@ -62,18 +61,18 @@ def parse_args(): ...@@ -62,18 +61,18 @@ def parse_args():
nargs='+', nargs='+',
action=DictAction, action=DictAction,
help='override some settings in the used config, the key-value pair ' help='override some settings in the used config, the key-value pair '
'in xxx=yyy format will be merged into config file (deprecate), ' 'in xxx=yyy format will be merged into config file (deprecate), '
'change to --cfg-options instead.') 'change to --cfg-options instead.')
parser.add_argument( parser.add_argument(
'--cfg-options', '--cfg-options',
nargs='+', nargs='+',
action=DictAction, action=DictAction,
help='override some settings in the used config, the key-value pair ' help='override some settings in the used config, the key-value pair '
'in xxx=yyy format will be merged into config file. If the value to ' 'in xxx=yyy format will be merged into config file. If the value to '
'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
'Note that the quotation marks are necessary and that no white space ' 'Note that the quotation marks are necessary and that no white space '
'is allowed.') 'is allowed.')
parser.add_argument( parser.add_argument(
'--launcher', '--launcher',
choices=['none', 'pytorch', 'slurm', 'mpi'], choices=['none', 'pytorch', 'slurm', 'mpi'],
...@@ -134,7 +133,8 @@ def main(): ...@@ -134,7 +133,8 @@ def main():
print(_module_path) print(_module_path)
plg_lib = importlib.import_module(_module_path) plg_lib = importlib.import_module(_module_path)
from projects.mmdet3d_plugin.bevformer.apis.train import custom_train_model from projects.mmdet3d_plugin.bevformer.apis.train import \
custom_train_model
# set cudnn_benchmark # set cudnn_benchmark
if cfg.get('cudnn_benchmark', False): if cfg.get('cudnn_benchmark', False):
torch.backends.cudnn.benchmark = True torch.backends.cudnn.benchmark = True
...@@ -155,7 +155,7 @@ def main(): ...@@ -155,7 +155,7 @@ def main():
else: else:
cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus) cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus)
if digit_version(TORCH_VERSION) == digit_version('1.8.1') and cfg.optimizer['type'] == 'AdamW': if digit_version(TORCH_VERSION) == digit_version('1.8.1') and cfg.optimizer['type'] == 'AdamW':
cfg.optimizer['type'] = 'AdamW2' # fix bug in Adamw cfg.optimizer['type'] = 'AdamW2' # fix bug in Adamw
if args.autoscale_lr: if args.autoscale_lr:
# apply the linear scaling rule (https://arxiv.org/abs/1706.02677) # apply the linear scaling rule (https://arxiv.org/abs/1706.02677)
cfg.optimizer['lr'] = cfg.optimizer['lr'] * len(cfg.gpu_ids) / 8 cfg.optimizer['lr'] = cfg.optimizer['lr'] * len(cfg.gpu_ids) / 8
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
import argparse import argparse
import json import json
from collections import defaultdict
import numpy as np import numpy as np
import seaborn as sns import seaborn as sns
from collections import defaultdict
from matplotlib import pyplot as plt from matplotlib import pyplot as plt
...@@ -65,7 +66,7 @@ def plot_curve(log_dicts, args): ...@@ -65,7 +66,7 @@ def plot_curve(log_dicts, args):
else: else:
# find the first epoch that do eval # find the first epoch that do eval
x0 = min(epochs) + args.interval - \ x0 = min(epochs) + args.interval - \
min(epochs) % args.interval min(epochs) % args.interval
xs = np.arange(x0, max(epochs) + 1, args.interval) xs = np.arange(x0, max(epochs) + 1, args.interval)
ys = [] ys = []
for epoch in epochs[args.interval - 1::args.interval]: for epoch in epochs[args.interval - 1::args.interval]:
...@@ -85,7 +86,7 @@ def plot_curve(log_dicts, args): ...@@ -85,7 +86,7 @@ def plot_curve(log_dicts, args):
xs = [] xs = []
ys = [] ys = []
num_iters_per_epoch = \ num_iters_per_epoch = \
log_dict[epochs[args.interval-1]]['iter'][-1] log_dict[epochs[args.interval - 1]]['iter'][-1]
for epoch in epochs[args.interval - 1::args.interval]: for epoch in epochs[args.interval - 1::args.interval]:
iters = log_dict[epoch]['iter'] iters = log_dict[epoch]['iter']
if log_dict[epoch]['mode'][-1] == 'val': if log_dict[epoch]['mode'][-1] == 'val':
...@@ -152,7 +153,7 @@ def add_time_parser(subparsers): ...@@ -152,7 +153,7 @@ def add_time_parser(subparsers):
'--include-outliers', '--include-outliers',
action='store_true', action='store_true',
help='include the first value of every epoch when computing ' help='include the first value of every epoch when computing '
'the average time') 'the average time')
def parse_args(): def parse_args():
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
import argparse import argparse
import sys
import time import time
import torch import torch
from mmcv import Config from mmcv import Config
from mmcv.parallel import MMDataParallel from mmcv.parallel import MMDataParallel
from mmcv.runner import load_checkpoint, wrap_fp16_model from mmcv.runner import load_checkpoint, wrap_fp16_model
import sys
sys.path.append('.') sys.path.append('.')
from projects.mmdet3d_plugin.datasets.builder import build_dataloader
from projects.mmdet3d_plugin.datasets import custom_build_dataset
# from mmdet3d.datasets import build_dataloader, build_dataset # from mmdet3d.datasets import build_dataloader, build_dataset
from mmdet3d.models import build_detector from mmdet3d.models import build_detector
#from tools.misc.fuse_conv_bn import fuse_module from projects.mmdet3d_plugin.datasets import custom_build_dataset
from projects.mmdet3d_plugin.datasets.builder import build_dataloader
# from tools.misc.fuse_conv_bn import fuse_module
def parse_args(): def parse_args():
...@@ -25,7 +28,7 @@ def parse_args(): ...@@ -25,7 +28,7 @@ def parse_args():
'--fuse-conv-bn', '--fuse-conv-bn',
action='store_true', action='store_true',
help='Whether to fuse conv and bn, this will slightly increase' help='Whether to fuse conv and bn, this will slightly increase'
'the inference speed') 'the inference speed')
args = parser.parse_args() args = parser.parse_args()
return args return args
...@@ -59,7 +62,7 @@ def main(): ...@@ -59,7 +62,7 @@ def main():
wrap_fp16_model(model) wrap_fp16_model(model)
if args.checkpoint is not None: if args.checkpoint is not None:
load_checkpoint(model, args.checkpoint, map_location='cpu') load_checkpoint(model, args.checkpoint, map_location='cpu')
#if args.fuse_conv_bn: # if args.fuse_conv_bn:
# model = fuse_module(model) # model = fuse_module(model)
model = MMDataParallel(model, device_ids=[0]) model = MMDataParallel(model, device_ids=[0])
......
import torch import torch
file_path = './ckpts/bevformer_v4.pth'
model = torch.load(file_path, map_location='cpu') file_path = './ckpts/bevformer_v4.pth'
all = 0 model = torch.load(file_path, map_location='cpu')
for key in list(model['state_dict'].keys()): all = 0
all += model['state_dict'][key].nelement() for key in list(model['state_dict'].keys()):
print(all) all += model['state_dict'][key].nelement()
print(all)
# smaller 63374123
# v4 69140395 # smaller 63374123
# v4 69140395
# Based on https://github.com/nutonomy/nuscenes-devkit # Based on https://github.com/nutonomy/nuscenes-devkit
# --------------------------------------------- # ---------------------------------------------
# Modified by Zhiqi Li # Modified by Zhiqi Li
# --------------------------------------------- # ---------------------------------------------
import mmcv import mmcv
from nuscenes.nuscenes import NuScenes from nuscenes.eval.common.data_classes import EvalBoxes
from PIL import Image from nuscenes.eval.detection.data_classes import DetectionBox
from nuscenes.utils.geometry_utils import view_points, box_in_image, BoxVisibility, transform_matrix from nuscenes.eval.detection.render import visualize_sample
from typing import Tuple, List, Iterable from nuscenes.eval.detection.utils import category_to_detection_name
import matplotlib.pyplot as plt from nuscenes.nuscenes import NuScenes
import numpy as np from nuscenes.utils.geometry_utils import (BoxVisibility, box_in_image,
from PIL import Image view_points)
from matplotlib import rcParams from PIL import Image
from matplotlib.axes import Axes from pyquaternion import Quaternion
from pyquaternion import Quaternion
from PIL import Image cams = ['CAM_FRONT',
from matplotlib import rcParams 'CAM_FRONT_RIGHT',
from matplotlib.axes import Axes 'CAM_BACK_RIGHT',
from pyquaternion import Quaternion 'CAM_BACK',
from tqdm import tqdm 'CAM_BACK_LEFT',
from nuscenes.utils.data_classes import LidarPointCloud, RadarPointCloud, Box 'CAM_FRONT_LEFT']
from nuscenes.utils.geometry_utils import view_points, box_in_image, BoxVisibility, transform_matrix
from nuscenes.eval.common.data_classes import EvalBoxes, EvalBox import matplotlib.pyplot as plt
from nuscenes.eval.detection.data_classes import DetectionBox import numpy as np
from nuscenes.eval.detection.utils import category_to_detection_name from matplotlib import rcParams
from nuscenes.eval.detection.render import visualize_sample from nuscenes.utils.data_classes import Box, LidarPointCloud
from PIL import Image
def render_annotation(
cams = ['CAM_FRONT', anntoken: str,
'CAM_FRONT_RIGHT', margin: float = 10,
'CAM_BACK_RIGHT', view: np.ndarray = np.eye(4),
'CAM_BACK', box_vis_level: BoxVisibility = BoxVisibility.ANY,
'CAM_BACK_LEFT', out_path: str = 'render.png',
'CAM_FRONT_LEFT'] extra_info: bool = False) -> None:
"""
import numpy as np Render selected annotation.
import matplotlib.pyplot as plt :param anntoken: Sample_annotation token.
from nuscenes.utils.data_classes import LidarPointCloud, RadarPointCloud, Box :param margin: How many meters in each direction to include in LIDAR view.
from PIL import Image :param view: LIDAR view point.
from matplotlib import rcParams :param box_vis_level: If sample_data is an image, this sets required visibility for boxes.
:param out_path: Optional path to save the rendered figure to disk.
:param extra_info: Whether to render extra information below camera view.
def render_annotation( """
anntoken: str, ann_record = nusc.get('sample_annotation', anntoken)
margin: float = 10, sample_record = nusc.get('sample', ann_record['sample_token'])
view: np.ndarray = np.eye(4), assert 'LIDAR_TOP' in sample_record['data'].keys(), 'Error: No LIDAR_TOP in data, unable to render.'
box_vis_level: BoxVisibility = BoxVisibility.ANY,
out_path: str = 'render.png', # Figure out which camera the object is fully visible in (this may return nothing).
extra_info: bool = False) -> None: boxes, cam = [], []
""" cams = [key for key in sample_record['data'].keys() if 'CAM' in key]
Render selected annotation. all_bboxes = []
:param anntoken: Sample_annotation token. select_cams = []
:param margin: How many meters in each direction to include in LIDAR view. for cam in cams:
:param view: LIDAR view point. _, boxes, _ = nusc.get_sample_data(sample_record['data'][cam], box_vis_level=box_vis_level,
:param box_vis_level: If sample_data is an image, this sets required visibility for boxes. selected_anntokens=[anntoken])
:param out_path: Optional path to save the rendered figure to disk. if len(boxes) > 0:
:param extra_info: Whether to render extra information below camera view. all_bboxes.append(boxes)
""" select_cams.append(cam)
ann_record = nusc.get('sample_annotation', anntoken) # We found an image that matches. Let's abort.
sample_record = nusc.get('sample', ann_record['sample_token']) # assert len(boxes) > 0, 'Error: Could not find image where annotation is visible. ' \
assert 'LIDAR_TOP' in sample_record['data'].keys(), 'Error: No LIDAR_TOP in data, unable to render.' # 'Try using e.g. BoxVisibility.ANY.'
# assert len(boxes) < 2, 'Error: Found multiple annotations. Something is wrong!'
# Figure out which camera the object is fully visible in (this may return nothing).
boxes, cam = [], [] num_cam = len(all_bboxes)
cams = [key for key in sample_record['data'].keys() if 'CAM' in key]
all_bboxes = [] fig, axes = plt.subplots(1, num_cam + 1, figsize=(18, 9))
select_cams = [] select_cams = [sample_record['data'][cam] for cam in select_cams]
for cam in cams: print('bbox in cams:', select_cams)
_, boxes, _ = nusc.get_sample_data(sample_record['data'][cam], box_vis_level=box_vis_level, # Plot LIDAR view.
selected_anntokens=[anntoken]) lidar = sample_record['data']['LIDAR_TOP']
if len(boxes) > 0: data_path, boxes, camera_intrinsic = nusc.get_sample_data(lidar, selected_anntokens=[anntoken])
all_bboxes.append(boxes) LidarPointCloud.from_file(data_path).render_height(axes[0], view=view)
select_cams.append(cam) for box in boxes:
# We found an image that matches. Let's abort. c = np.array(get_color(box.name)) / 255.0
# assert len(boxes) > 0, 'Error: Could not find image where annotation is visible. ' \ box.render(axes[0], view=view, colors=(c, c, c))
# 'Try using e.g. BoxVisibility.ANY.' corners = view_points(boxes[0].corners(), view, False)[:2, :]
# assert len(boxes) < 2, 'Error: Found multiple annotations. Something is wrong!' axes[0].set_xlim([np.min(corners[0, :]) - margin, np.max(corners[0, :]) + margin])
axes[0].set_ylim([np.min(corners[1, :]) - margin, np.max(corners[1, :]) + margin])
num_cam = len(all_bboxes) axes[0].axis('off')
axes[0].set_aspect('equal')
fig, axes = plt.subplots(1, num_cam + 1, figsize=(18, 9))
select_cams = [sample_record['data'][cam] for cam in select_cams] # Plot CAMERA view.
print('bbox in cams:', select_cams) for i in range(1, num_cam + 1):
# Plot LIDAR view. cam = select_cams[i - 1]
lidar = sample_record['data']['LIDAR_TOP'] data_path, boxes, camera_intrinsic = nusc.get_sample_data(cam, selected_anntokens=[anntoken])
data_path, boxes, camera_intrinsic = nusc.get_sample_data(lidar, selected_anntokens=[anntoken]) im = Image.open(data_path)
LidarPointCloud.from_file(data_path).render_height(axes[0], view=view) axes[i].imshow(im)
for box in boxes: axes[i].set_title(nusc.get('sample_data', cam)['channel'])
c = np.array(get_color(box.name)) / 255.0 axes[i].axis('off')
box.render(axes[0], view=view, colors=(c, c, c)) axes[i].set_aspect('equal')
corners = view_points(boxes[0].corners(), view, False)[:2, :] for box in boxes:
axes[0].set_xlim([np.min(corners[0, :]) - margin, np.max(corners[0, :]) + margin]) c = np.array(get_color(box.name)) / 255.0
axes[0].set_ylim([np.min(corners[1, :]) - margin, np.max(corners[1, :]) + margin]) box.render(axes[i], view=camera_intrinsic, normalize=True, colors=(c, c, c))
axes[0].axis('off')
axes[0].set_aspect('equal') # Print extra information about the annotation below the camera view.
axes[i].set_xlim(0, im.size[0])
# Plot CAMERA view. axes[i].set_ylim(im.size[1], 0)
for i in range(1, num_cam + 1):
cam = select_cams[i - 1] if extra_info:
data_path, boxes, camera_intrinsic = nusc.get_sample_data(cam, selected_anntokens=[anntoken]) rcParams['font.family'] = 'monospace'
im = Image.open(data_path)
axes[i].imshow(im) w, l, h = ann_record['size']
axes[i].set_title(nusc.get('sample_data', cam)['channel']) category = ann_record['category_name']
axes[i].axis('off') lidar_points = ann_record['num_lidar_pts']
axes[i].set_aspect('equal') radar_points = ann_record['num_radar_pts']
for box in boxes:
c = np.array(get_color(box.name)) / 255.0 sample_data_record = nusc.get('sample_data', sample_record['data']['LIDAR_TOP'])
box.render(axes[i], view=camera_intrinsic, normalize=True, colors=(c, c, c)) pose_record = nusc.get('ego_pose', sample_data_record['ego_pose_token'])
dist = np.linalg.norm(np.array(pose_record['translation']) - np.array(ann_record['translation']))
# Print extra information about the annotation below the camera view.
axes[i].set_xlim(0, im.size[0]) information = ' \n'.join(['category: {}'.format(category),
axes[i].set_ylim(im.size[1], 0) '',
'# lidar points: {0:>4}'.format(lidar_points),
if extra_info: '# radar points: {0:>4}'.format(radar_points),
rcParams['font.family'] = 'monospace' '',
'distance: {:>7.3f}m'.format(dist),
w, l, h = ann_record['size'] '',
category = ann_record['category_name'] 'width: {:>7.3f}m'.format(w),
lidar_points = ann_record['num_lidar_pts'] 'length: {:>7.3f}m'.format(l),
radar_points = ann_record['num_radar_pts'] 'height: {:>7.3f}m'.format(h)])
sample_data_record = nusc.get('sample_data', sample_record['data']['LIDAR_TOP']) plt.annotate(information, (0, 0), (0, -20), xycoords='axes fraction', textcoords='offset points', va='top')
pose_record = nusc.get('ego_pose', sample_data_record['ego_pose_token'])
dist = np.linalg.norm(np.array(pose_record['translation']) - np.array(ann_record['translation'])) if out_path is not None:
plt.savefig(out_path)
information = ' \n'.join(['category: {}'.format(category),
'',
'# lidar points: {0:>4}'.format(lidar_points), def get_sample_data(sample_data_token: str,
'# radar points: {0:>4}'.format(radar_points), box_vis_level: BoxVisibility = BoxVisibility.ANY,
'', selected_anntokens=None,
'distance: {:>7.3f}m'.format(dist), use_flat_vehicle_coordinates: bool = False):
'', """
'width: {:>7.3f}m'.format(w), Returns the data path as well as all annotations related to that sample_data.
'length: {:>7.3f}m'.format(l), Note that the boxes are transformed into the current sensor's coordinate frame.
'height: {:>7.3f}m'.format(h)]) :param sample_data_token: Sample_data token.
:param box_vis_level: If sample_data is an image, this sets required visibility for boxes.
plt.annotate(information, (0, 0), (0, -20), xycoords='axes fraction', textcoords='offset points', va='top') :param selected_anntokens: If provided only return the selected annotation.
:param use_flat_vehicle_coordinates: Instead of the current sensor's coordinate frame, use ego frame which is
if out_path is not None: aligned to z-plane in the world.
plt.savefig(out_path) :return: (data_path, boxes, camera_intrinsic <np.array: 3, 3>)
"""
# Retrieve sensor & pose records
def get_sample_data(sample_data_token: str, sd_record = nusc.get('sample_data', sample_data_token)
box_vis_level: BoxVisibility = BoxVisibility.ANY, cs_record = nusc.get('calibrated_sensor', sd_record['calibrated_sensor_token'])
selected_anntokens=None, sensor_record = nusc.get('sensor', cs_record['sensor_token'])
use_flat_vehicle_coordinates: bool = False): pose_record = nusc.get('ego_pose', sd_record['ego_pose_token'])
"""
Returns the data path as well as all annotations related to that sample_data. data_path = nusc.get_sample_data_path(sample_data_token)
Note that the boxes are transformed into the current sensor's coordinate frame.
:param sample_data_token: Sample_data token. if sensor_record['modality'] == 'camera':
:param box_vis_level: If sample_data is an image, this sets required visibility for boxes. cam_intrinsic = np.array(cs_record['camera_intrinsic'])
:param selected_anntokens: If provided only return the selected annotation. imsize = (sd_record['width'], sd_record['height'])
:param use_flat_vehicle_coordinates: Instead of the current sensor's coordinate frame, use ego frame which is else:
aligned to z-plane in the world. cam_intrinsic = None
:return: (data_path, boxes, camera_intrinsic <np.array: 3, 3>) imsize = None
"""
# Retrieve all sample annotations and map to sensor coordinate system.
# Retrieve sensor & pose records if selected_anntokens is not None:
sd_record = nusc.get('sample_data', sample_data_token) boxes = list(map(nusc.get_box, selected_anntokens))
cs_record = nusc.get('calibrated_sensor', sd_record['calibrated_sensor_token']) else:
sensor_record = nusc.get('sensor', cs_record['sensor_token']) boxes = nusc.get_boxes(sample_data_token)
pose_record = nusc.get('ego_pose', sd_record['ego_pose_token'])
# Make list of Box objects including coord system transforms.
data_path = nusc.get_sample_data_path(sample_data_token) box_list = []
for box in boxes:
if sensor_record['modality'] == 'camera': if use_flat_vehicle_coordinates:
cam_intrinsic = np.array(cs_record['camera_intrinsic']) # Move box to ego vehicle coord system parallel to world z plane.
imsize = (sd_record['width'], sd_record['height']) yaw = Quaternion(pose_record['rotation']).yaw_pitch_roll[0]
else: box.translate(-np.array(pose_record['translation']))
cam_intrinsic = None box.rotate(Quaternion(scalar=np.cos(yaw / 2), vector=[0, 0, np.sin(yaw / 2)]).inverse)
imsize = None else:
# Move box to ego vehicle coord system.
# Retrieve all sample annotations and map to sensor coordinate system. box.translate(-np.array(pose_record['translation']))
if selected_anntokens is not None: box.rotate(Quaternion(pose_record['rotation']).inverse)
boxes = list(map(nusc.get_box, selected_anntokens))
else: # Move box to sensor coord system.
boxes = nusc.get_boxes(sample_data_token) box.translate(-np.array(cs_record['translation']))
box.rotate(Quaternion(cs_record['rotation']).inverse)
# Make list of Box objects including coord system transforms.
box_list = [] if sensor_record['modality'] == 'camera' and not \
for box in boxes: box_in_image(box, cam_intrinsic, imsize, vis_level=box_vis_level):
if use_flat_vehicle_coordinates: continue
# Move box to ego vehicle coord system parallel to world z plane.
yaw = Quaternion(pose_record['rotation']).yaw_pitch_roll[0] box_list.append(box)
box.translate(-np.array(pose_record['translation']))
box.rotate(Quaternion(scalar=np.cos(yaw / 2), vector=[0, 0, np.sin(yaw / 2)]).inverse) return data_path, box_list, cam_intrinsic
else:
# Move box to ego vehicle coord system.
box.translate(-np.array(pose_record['translation'])) def get_predicted_data(sample_data_token: str,
box.rotate(Quaternion(pose_record['rotation']).inverse) box_vis_level: BoxVisibility = BoxVisibility.ANY,
selected_anntokens=None,
# Move box to sensor coord system. use_flat_vehicle_coordinates: bool = False,
box.translate(-np.array(cs_record['translation'])) pred_anns=None
box.rotate(Quaternion(cs_record['rotation']).inverse) ):
"""
if sensor_record['modality'] == 'camera' and not \ Returns the data path as well as all annotations related to that sample_data.
box_in_image(box, cam_intrinsic, imsize, vis_level=box_vis_level): Note that the boxes are transformed into the current sensor's coordinate frame.
continue :param sample_data_token: Sample_data token.
:param box_vis_level: If sample_data is an image, this sets required visibility for boxes.
box_list.append(box) :param selected_anntokens: If provided only return the selected annotation.
:param use_flat_vehicle_coordinates: Instead of the current sensor's coordinate frame, use ego frame which is
return data_path, box_list, cam_intrinsic aligned to z-plane in the world.
:return: (data_path, boxes, camera_intrinsic <np.array: 3, 3>)
"""
def get_predicted_data(sample_data_token: str, # Retrieve sensor & pose records
box_vis_level: BoxVisibility = BoxVisibility.ANY, sd_record = nusc.get('sample_data', sample_data_token)
selected_anntokens=None, cs_record = nusc.get('calibrated_sensor', sd_record['calibrated_sensor_token'])
use_flat_vehicle_coordinates: bool = False, sensor_record = nusc.get('sensor', cs_record['sensor_token'])
pred_anns=None pose_record = nusc.get('ego_pose', sd_record['ego_pose_token'])
):
""" data_path = nusc.get_sample_data_path(sample_data_token)
Returns the data path as well as all annotations related to that sample_data.
Note that the boxes are transformed into the current sensor's coordinate frame. if sensor_record['modality'] == 'camera':
:param sample_data_token: Sample_data token. cam_intrinsic = np.array(cs_record['camera_intrinsic'])
:param box_vis_level: If sample_data is an image, this sets required visibility for boxes. imsize = (sd_record['width'], sd_record['height'])
:param selected_anntokens: If provided only return the selected annotation. else:
:param use_flat_vehicle_coordinates: Instead of the current sensor's coordinate frame, use ego frame which is cam_intrinsic = None
aligned to z-plane in the world. imsize = None
:return: (data_path, boxes, camera_intrinsic <np.array: 3, 3>)
""" # Retrieve all sample annotations and map to sensor coordinate system.
# if selected_anntokens is not None:
# Retrieve sensor & pose records # boxes = list(map(nusc.get_box, selected_anntokens))
sd_record = nusc.get('sample_data', sample_data_token) # else:
cs_record = nusc.get('calibrated_sensor', sd_record['calibrated_sensor_token']) # boxes = nusc.get_boxes(sample_data_token)
sensor_record = nusc.get('sensor', cs_record['sensor_token']) boxes = pred_anns
pose_record = nusc.get('ego_pose', sd_record['ego_pose_token']) # Make list of Box objects including coord system transforms.
box_list = []
data_path = nusc.get_sample_data_path(sample_data_token) for box in boxes:
if use_flat_vehicle_coordinates:
if sensor_record['modality'] == 'camera': # Move box to ego vehicle coord system parallel to world z plane.
cam_intrinsic = np.array(cs_record['camera_intrinsic']) yaw = Quaternion(pose_record['rotation']).yaw_pitch_roll[0]
imsize = (sd_record['width'], sd_record['height']) box.translate(-np.array(pose_record['translation']))
else: box.rotate(Quaternion(scalar=np.cos(yaw / 2), vector=[0, 0, np.sin(yaw / 2)]).inverse)
cam_intrinsic = None else:
imsize = None # Move box to ego vehicle coord system.
box.translate(-np.array(pose_record['translation']))
# Retrieve all sample annotations and map to sensor coordinate system. box.rotate(Quaternion(pose_record['rotation']).inverse)
# if selected_anntokens is not None:
# boxes = list(map(nusc.get_box, selected_anntokens)) # Move box to sensor coord system.
# else: box.translate(-np.array(cs_record['translation']))
# boxes = nusc.get_boxes(sample_data_token) box.rotate(Quaternion(cs_record['rotation']).inverse)
boxes = pred_anns
# Make list of Box objects including coord system transforms. if sensor_record['modality'] == 'camera' and not \
box_list = [] box_in_image(box, cam_intrinsic, imsize, vis_level=box_vis_level):
for box in boxes: continue
if use_flat_vehicle_coordinates: box_list.append(box)
# Move box to ego vehicle coord system parallel to world z plane.
yaw = Quaternion(pose_record['rotation']).yaw_pitch_roll[0] return data_path, box_list, cam_intrinsic
box.translate(-np.array(pose_record['translation']))
box.rotate(Quaternion(scalar=np.cos(yaw / 2), vector=[0, 0, np.sin(yaw / 2)]).inverse)
else: def lidiar_render(sample_token, data, out_path=None):
# Move box to ego vehicle coord system. bbox_gt_list = []
box.translate(-np.array(pose_record['translation'])) bbox_pred_list = []
box.rotate(Quaternion(pose_record['rotation']).inverse) anns = nusc.get('sample', sample_token)['anns']
for ann in anns:
# Move box to sensor coord system. content = nusc.get('sample_annotation', ann)
box.translate(-np.array(cs_record['translation'])) try:
box.rotate(Quaternion(cs_record['rotation']).inverse) bbox_gt_list.append(DetectionBox(
sample_token=content['sample_token'],
if sensor_record['modality'] == 'camera' and not \ translation=tuple(content['translation']),
box_in_image(box, cam_intrinsic, imsize, vis_level=box_vis_level): size=tuple(content['size']),
continue rotation=tuple(content['rotation']),
box_list.append(box) velocity=nusc.box_velocity(content['token'])[:2],
ego_translation=(0.0, 0.0, 0.0) if 'ego_translation' not in content
return data_path, box_list, cam_intrinsic else tuple(content['ego_translation']),
num_pts=-1 if 'num_pts' not in content else int(content['num_pts']),
detection_name=category_to_detection_name(content['category_name']),
detection_score=-1.0 if 'detection_score' not in content else float(content['detection_score']),
attribute_name=''))
def lidiar_render(sample_token, data,out_path=None): except:
bbox_gt_list = [] pass
bbox_pred_list = []
anns = nusc.get('sample', sample_token)['anns'] bbox_anns = data['results'][sample_token]
for ann in anns: for content in bbox_anns:
content = nusc.get('sample_annotation', ann) bbox_pred_list.append(DetectionBox(
try: sample_token=content['sample_token'],
bbox_gt_list.append(DetectionBox( translation=tuple(content['translation']),
sample_token=content['sample_token'], size=tuple(content['size']),
translation=tuple(content['translation']), rotation=tuple(content['rotation']),
size=tuple(content['size']), velocity=tuple(content['velocity']),
rotation=tuple(content['rotation']), ego_translation=(0.0, 0.0, 0.0) if 'ego_translation' not in content
velocity=nusc.box_velocity(content['token'])[:2], else tuple(content['ego_translation']),
ego_translation=(0.0, 0.0, 0.0) if 'ego_translation' not in content num_pts=-1 if 'num_pts' not in content else int(content['num_pts']),
else tuple(content['ego_translation']), detection_name=content['detection_name'],
num_pts=-1 if 'num_pts' not in content else int(content['num_pts']), detection_score=-1.0 if 'detection_score' not in content else float(content['detection_score']),
detection_name=category_to_detection_name(content['category_name']), attribute_name=content['attribute_name']))
detection_score=-1.0 if 'detection_score' not in content else float(content['detection_score']), gt_annotations = EvalBoxes()
attribute_name='')) pred_annotations = EvalBoxes()
except: gt_annotations.add_boxes(sample_token, bbox_gt_list)
pass pred_annotations.add_boxes(sample_token, bbox_pred_list)
print('green is ground truth')
bbox_anns = data['results'][sample_token] print('blue is the predited result')
for content in bbox_anns: visualize_sample(nusc, sample_token, gt_annotations, pred_annotations, savepath=out_path + '_bev')
bbox_pred_list.append(DetectionBox(
sample_token=content['sample_token'],
translation=tuple(content['translation']), def get_color(category_name: str):
size=tuple(content['size']), """
rotation=tuple(content['rotation']), Provides the default colors based on the category names.
velocity=tuple(content['velocity']), This method works for the general nuScenes categories, as well as the nuScenes detection categories.
ego_translation=(0.0, 0.0, 0.0) if 'ego_translation' not in content """
else tuple(content['ego_translation']), a = ['noise', 'animal', 'human.pedestrian.adult', 'human.pedestrian.child', 'human.pedestrian.construction_worker',
num_pts=-1 if 'num_pts' not in content else int(content['num_pts']), 'human.pedestrian.personal_mobility', 'human.pedestrian.police_officer', 'human.pedestrian.stroller',
detection_name=content['detection_name'], 'human.pedestrian.wheelchair', 'movable_object.barrier', 'movable_object.debris',
detection_score=-1.0 if 'detection_score' not in content else float(content['detection_score']), 'movable_object.pushable_pullable', 'movable_object.trafficcone', 'static_object.bicycle_rack',
attribute_name=content['attribute_name'])) 'vehicle.bicycle',
gt_annotations = EvalBoxes() 'vehicle.bus.bendy', 'vehicle.bus.rigid', 'vehicle.car', 'vehicle.construction', 'vehicle.emergency.ambulance',
pred_annotations = EvalBoxes() 'vehicle.emergency.police', 'vehicle.motorcycle', 'vehicle.trailer', 'vehicle.truck', 'flat.driveable_surface',
gt_annotations.add_boxes(sample_token, bbox_gt_list) 'flat.other', 'flat.sidewalk', 'flat.terrain', 'static.manmade', 'static.other', 'static.vegetation',
pred_annotations.add_boxes(sample_token, bbox_pred_list) 'vehicle.ego']
print('green is ground truth') class_names = [
print('blue is the predited result') 'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
visualize_sample(nusc, sample_token, gt_annotations, pred_annotations, savepath=out_path+'_bev') 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
]
# print(category_name)
def get_color(category_name: str): if category_name == 'bicycle':
""" return nusc.colormap['vehicle.bicycle']
Provides the default colors based on the category names. elif category_name == 'construction_vehicle':
This method works for the general nuScenes categories, as well as the nuScenes detection categories. return nusc.colormap['vehicle.construction']
""" elif category_name == 'traffic_cone':
a = ['noise', 'animal', 'human.pedestrian.adult', 'human.pedestrian.child', 'human.pedestrian.construction_worker', return nusc.colormap['movable_object.trafficcone']
'human.pedestrian.personal_mobility', 'human.pedestrian.police_officer', 'human.pedestrian.stroller',
'human.pedestrian.wheelchair', 'movable_object.barrier', 'movable_object.debris', for key in nusc.colormap.keys():
'movable_object.pushable_pullable', 'movable_object.trafficcone', 'static_object.bicycle_rack', 'vehicle.bicycle', if category_name in key:
'vehicle.bus.bendy', 'vehicle.bus.rigid', 'vehicle.car', 'vehicle.construction', 'vehicle.emergency.ambulance', return nusc.colormap[key]
'vehicle.emergency.police', 'vehicle.motorcycle', 'vehicle.trailer', 'vehicle.truck', 'flat.driveable_surface', return [0, 0, 0]
'flat.other', 'flat.sidewalk', 'flat.terrain', 'static.manmade', 'static.other', 'static.vegetation',
'vehicle.ego']
class_names = [ def render_sample_data(
'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier', sample_toekn: str,
'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone' with_anns: bool = True,
] box_vis_level: BoxVisibility = BoxVisibility.ANY,
#print(category_name) axes_limit: float = 40,
if category_name == 'bicycle': ax=None,
return nusc.colormap['vehicle.bicycle'] nsweeps: int = 1,
elif category_name == 'construction_vehicle': out_path: str = None,
return nusc.colormap['vehicle.construction'] underlay_map: bool = True,
elif category_name == 'traffic_cone': use_flat_vehicle_coordinates: bool = True,
return nusc.colormap['movable_object.trafficcone'] show_lidarseg: bool = False,
show_lidarseg_legend: bool = False,
for key in nusc.colormap.keys(): filter_lidarseg_labels=None,
if category_name in key: lidarseg_preds_bin_path: str = None,
return nusc.colormap[key] verbose: bool = True,
return [0, 0, 0] show_panoptic: bool = False,
pred_data=None,
) -> None:
def render_sample_data( """
sample_toekn: str, Render sample data onto axis.
with_anns: bool = True, :param sample_data_token: Sample_data token.
box_vis_level: BoxVisibility = BoxVisibility.ANY, :param with_anns: Whether to draw box annotations.
axes_limit: float = 40, :param box_vis_level: If sample_data is an image, this sets required visibility for boxes.
ax=None, :param axes_limit: Axes limit for lidar and radar (measured in meters).
nsweeps: int = 1, :param ax: Axes onto which to render.
out_path: str = None, :param nsweeps: Number of sweeps for lidar and radar.
underlay_map: bool = True, :param out_path: Optional path to save the rendered figure to disk.
use_flat_vehicle_coordinates: bool = True, :param underlay_map: When set to true, lidar data is plotted onto the map. This can be slow.
show_lidarseg: bool = False, :param use_flat_vehicle_coordinates: Instead of the current sensor's coordinate frame, use ego frame which is
show_lidarseg_legend: bool = False, aligned to z-plane in the world. Note: Previously this method did not use flat vehicle coordinates, which
filter_lidarseg_labels=None, can lead to small errors when the vertical axis of the global frame and lidar are not aligned. The new
lidarseg_preds_bin_path: str = None, setting is more correct and rotates the plot by ~90 degrees.
verbose: bool = True, :param show_lidarseg: When set to True, the lidar data is colored with the segmentation labels. When set
show_panoptic: bool = False, to False, the colors of the lidar data represent the distance from the center of the ego vehicle.
pred_data=None, :param show_lidarseg_legend: Whether to display the legend for the lidarseg labels in the frame.
) -> None: :param filter_lidarseg_labels: Only show lidar points which belong to the given list of classes. If None
""" or the list is empty, all classes will be displayed.
Render sample data onto axis. :param lidarseg_preds_bin_path: A path to the .bin file which contains the user's lidar segmentation
:param sample_data_token: Sample_data token. predictions for the sample.
:param with_anns: Whether to draw box annotations. :param verbose: Whether to display the image after it is rendered.
:param box_vis_level: If sample_data is an image, this sets required visibility for boxes. :param show_panoptic: When set to True, the lidar data is colored with the panoptic labels. When set
:param axes_limit: Axes limit for lidar and radar (measured in meters). to False, the colors of the lidar data represent the distance from the center of the ego vehicle.
:param ax: Axes onto which to render. If show_lidarseg is True, show_panoptic will be set to False.
:param nsweeps: Number of sweeps for lidar and radar. """
:param out_path: Optional path to save the rendered figure to disk. lidiar_render(sample_toekn, pred_data, out_path=out_path)
:param underlay_map: When set to true, lidar data is plotted onto the map. This can be slow. sample = nusc.get('sample', sample_toekn)
:param use_flat_vehicle_coordinates: Instead of the current sensor's coordinate frame, use ego frame which is # sample = data['results'][sample_token_list[0]][0]
aligned to z-plane in the world. Note: Previously this method did not use flat vehicle coordinates, which cams = [
can lead to small errors when the vertical axis of the global frame and lidar are not aligned. The new 'CAM_FRONT_LEFT',
setting is more correct and rotates the plot by ~90 degrees. 'CAM_FRONT',
:param show_lidarseg: When set to True, the lidar data is colored with the segmentation labels. When set 'CAM_FRONT_RIGHT',
to False, the colors of the lidar data represent the distance from the center of the ego vehicle. 'CAM_BACK_LEFT',
:param show_lidarseg_legend: Whether to display the legend for the lidarseg labels in the frame. 'CAM_BACK',
:param filter_lidarseg_labels: Only show lidar points which belong to the given list of classes. If None 'CAM_BACK_RIGHT',
or the list is empty, all classes will be displayed. ]
:param lidarseg_preds_bin_path: A path to the .bin file which contains the user's lidar segmentation if ax is None:
predictions for the sample. _, ax = plt.subplots(4, 3, figsize=(24, 18))
:param verbose: Whether to display the image after it is rendered. j = 0
:param show_panoptic: When set to True, the lidar data is colored with the panoptic labels. When set for ind, cam in enumerate(cams):
to False, the colors of the lidar data represent the distance from the center of the ego vehicle. sample_data_token = sample['data'][cam]
If show_lidarseg is True, show_panoptic will be set to False.
""" sd_record = nusc.get('sample_data', sample_data_token)
lidiar_render(sample_toekn, pred_data, out_path=out_path) sensor_modality = sd_record['sensor_modality']
sample = nusc.get('sample', sample_toekn)
# sample = data['results'][sample_token_list[0]][0] if sensor_modality in ['lidar', 'radar']:
cams = [ assert False
'CAM_FRONT_LEFT', elif sensor_modality == 'camera':
'CAM_FRONT', # Load boxes and image.
'CAM_FRONT_RIGHT', boxes = [Box(record['translation'], record['size'], Quaternion(record['rotation']),
'CAM_BACK_LEFT', name=record['detection_name'], token='predicted') for record in
'CAM_BACK', pred_data['results'][sample_toekn] if record['detection_score'] > 0.2]
'CAM_BACK_RIGHT',
] data_path, boxes_pred, camera_intrinsic = get_predicted_data(sample_data_token,
if ax is None: box_vis_level=box_vis_level, pred_anns=boxes)
_, ax = plt.subplots(4, 3, figsize=(24, 18)) _, boxes_gt, _ = nusc.get_sample_data(sample_data_token, box_vis_level=box_vis_level)
j = 0 if ind == 3:
for ind, cam in enumerate(cams): j += 1
sample_data_token = sample['data'][cam] ind = ind % 3
data = Image.open(data_path)
sd_record = nusc.get('sample_data', sample_data_token) # mmcv.imwrite(np.array(data)[:,:,::-1], f'{cam}.png')
sensor_modality = sd_record['sensor_modality'] # Init axes.
if sensor_modality in ['lidar', 'radar']: # Show image.
assert False ax[j, ind].imshow(data)
elif sensor_modality == 'camera': ax[j + 2, ind].imshow(data)
# Load boxes and image.
boxes = [Box(record['translation'], record['size'], Quaternion(record['rotation']), # Show boxes.
name=record['detection_name'], token='predicted') for record in if with_anns:
pred_data['results'][sample_toekn] if record['detection_score'] > 0.2] for box in boxes_pred:
c = np.array(get_color(box.name)) / 255.0
data_path, boxes_pred, camera_intrinsic = get_predicted_data(sample_data_token, box.render(ax[j, ind], view=camera_intrinsic, normalize=True, colors=(c, c, c))
box_vis_level=box_vis_level, pred_anns=boxes) for box in boxes_gt:
_, boxes_gt, _ = nusc.get_sample_data(sample_data_token, box_vis_level=box_vis_level) c = np.array(get_color(box.name)) / 255.0
if ind == 3: box.render(ax[j + 2, ind], view=camera_intrinsic, normalize=True, colors=(c, c, c))
j += 1
ind = ind % 3 # Limit visible range.
data = Image.open(data_path) ax[j, ind].set_xlim(0, data.size[0])
# mmcv.imwrite(np.array(data)[:,:,::-1], f'{cam}.png') ax[j, ind].set_ylim(data.size[1], 0)
# Init axes. ax[j + 2, ind].set_xlim(0, data.size[0])
ax[j + 2, ind].set_ylim(data.size[1], 0)
# Show image.
ax[j, ind].imshow(data) else:
ax[j + 2, ind].imshow(data) raise ValueError('Error: Unknown sensor modality!')
# Show boxes. ax[j, ind].axis('off')
if with_anns: ax[j, ind].set_title('PRED: {} {labels_type}'.format(
for box in boxes_pred: sd_record['channel'], labels_type='(predictions)' if lidarseg_preds_bin_path else ''))
c = np.array(get_color(box.name)) / 255.0 ax[j, ind].set_aspect('equal')
box.render(ax[j, ind], view=camera_intrinsic, normalize=True, colors=(c, c, c))
for box in boxes_gt: ax[j + 2, ind].axis('off')
c = np.array(get_color(box.name)) / 255.0 ax[j + 2, ind].set_title('GT:{} {labels_type}'.format(
box.render(ax[j + 2, ind], view=camera_intrinsic, normalize=True, colors=(c, c, c)) sd_record['channel'], labels_type='(predictions)' if lidarseg_preds_bin_path else ''))
ax[j + 2, ind].set_aspect('equal')
# Limit visible range.
ax[j, ind].set_xlim(0, data.size[0]) if out_path is not None:
ax[j, ind].set_ylim(data.size[1], 0) plt.savefig(out_path + '_camera', bbox_inches='tight', pad_inches=0, dpi=200)
ax[j + 2, ind].set_xlim(0, data.size[0]) if verbose:
ax[j + 2, ind].set_ylim(data.size[1], 0) plt.show()
plt.close()
else:
raise ValueError("Error: Unknown sensor modality!")
if __name__ == '__main__':
ax[j, ind].axis('off') nusc = NuScenes(version='v1.0-trainval', dataroot='./data/nuscenes', verbose=True)
ax[j, ind].set_title('PRED: {} {labels_type}'.format( # render_annotation('7603b030b42a4b1caa8c443ccc1a7d52')
sd_record['channel'], labels_type='(predictions)' if lidarseg_preds_bin_path else '')) bevformer_results = mmcv.load('test/bevformer_base/Thu_Jun__9_16_22_37_2022/pts_bbox/results_nusc.json')
ax[j, ind].set_aspect('equal') sample_token_list = list(bevformer_results['results'].keys())
for id in range(0, 10):
ax[j + 2, ind].axis('off') render_sample_data(sample_token_list[id], pred_data=bevformer_results, out_path=sample_token_list[id])
ax[j + 2, ind].set_title('GT:{} {labels_type}'.format(
sd_record['channel'], labels_type='(predictions)' if lidarseg_preds_bin_path else ''))
ax[j + 2, ind].set_aspect('equal')
if out_path is not None:
plt.savefig(out_path+'_camera', bbox_inches='tight', pad_inches=0, dpi=200)
if verbose:
plt.show()
plt.close()
if __name__ == '__main__':
nusc = NuScenes(version='v1.0-trainval', dataroot='./data/nuscenes', verbose=True)
# render_annotation('7603b030b42a4b1caa8c443ccc1a7d52')
bevformer_results = mmcv.load('test/bevformer_base/Thu_Jun__9_16_22_37_2022/pts_bbox/results_nusc.json')
sample_token_list = list(bevformer_results['results'].keys())
for id in range(0, 10):
render_sample_data(sample_token_list[id], pred_data=bevformer_results, out_path=sample_token_list[id])
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment