Commit f3b13cad authored by yeshenglong1's avatar yeshenglong1
Browse files

UpDate README.md

parent 0797920d
# This schedule is mainly used by models with dynamic voxelization # This schedule is mainly used by models with dynamic voxelization
# optimizer # optimizer
lr = 0.003 # max learning rate lr = 0.003 # max learning rate
optimizer = dict( optimizer = dict(
type='AdamW', type='AdamW',
lr=lr, lr=lr,
betas=(0.95, 0.99), # the momentum is change during training betas=(0.95, 0.99), # the momentum is change during training
weight_decay=0.001) weight_decay=0.001)
optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2)) optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
lr_config = dict( lr_config = dict(
policy='CosineAnnealing', policy='CosineAnnealing',
warmup='linear', warmup='linear',
warmup_iters=1000, warmup_iters=1000,
warmup_ratio=1.0 / 10, warmup_ratio=1.0 / 10,
min_lr_ratio=1e-5) min_lr_ratio=1e-5)
momentum_config = None momentum_config = None
runner = dict(type='EpochBasedRunner', max_epochs=40) runner = dict(type='EpochBasedRunner', max_epochs=40)
# For nuScenes dataset, we usually evaluate the model at the end of training. # For nuScenes dataset, we usually evaluate the model at the end of training.
# Since the models are trained by 24 epochs by default, we set evaluation # Since the models are trained by 24 epochs by default, we set evaluation
# interval to be 20. Please change the interval accordingly if you do not # interval to be 20. Please change the interval accordingly if you do not
# use a default schedule. # use a default schedule.
# optimizer # optimizer
# This schedule is mainly used by models on nuScenes dataset # This schedule is mainly used by models on nuScenes dataset
optimizer = dict(type='AdamW', lr=1e-4, weight_decay=0.01) optimizer = dict(type='AdamW', lr=1e-4, weight_decay=0.01)
# max_norm=10 is better for SECOND # max_norm=10 is better for SECOND
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
lr_config = dict( lr_config = dict(
policy='cyclic', policy='cyclic',
target_ratio=(10, 1e-4), target_ratio=(10, 1e-4),
cyclic_times=1, cyclic_times=1,
step_ratio_up=0.4, step_ratio_up=0.4,
) )
momentum_config = dict( momentum_config = dict(
policy='cyclic', policy='cyclic',
target_ratio=(0.85 / 0.95, 1), target_ratio=(0.85 / 0.95, 1),
cyclic_times=1, cyclic_times=1,
step_ratio_up=0.4, step_ratio_up=0.4,
) )
# runtime settings # runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=20) runner = dict(type='EpochBasedRunner', max_epochs=20)
# The schedule is usually used by models trained on KITTI dataset # The schedule is usually used by models trained on KITTI dataset
# The learning rate set in the cyclic schedule is the initial learning rate # The learning rate set in the cyclic schedule is the initial learning rate
# rather than the max learning rate. Since the target_ratio is (10, 1e-4), # rather than the max learning rate. Since the target_ratio is (10, 1e-4),
# the learning rate will change from 0.0018 to 0.018, than go to 0.0018*1e-4 # the learning rate will change from 0.0018 to 0.018, than go to 0.0018*1e-4
lr = 0.0018 lr = 0.0018
# The optimizer follows the setting in SECOND.Pytorch, but here we use # The optimizer follows the setting in SECOND.Pytorch, but here we use
# the offcial AdamW optimizer implemented by PyTorch. # the offcial AdamW optimizer implemented by PyTorch.
optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01) optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)
optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2)) optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
# We use cyclic learning rate and momentum schedule following SECOND.Pytorch # We use cyclic learning rate and momentum schedule following SECOND.Pytorch
# https://github.com/traveller59/second.pytorch/blob/3aba19c9688274f75ebb5e576f65cfe54773c021/torchplus/train/learning_schedules_fastai.py#L69 # noqa # https://github.com/traveller59/second.pytorch/blob/3aba19c9688274f75ebb5e576f65cfe54773c021/torchplus/train/learning_schedules_fastai.py#L69 # noqa
# We implement them in mmcv, for more details, please refer to # We implement them in mmcv, for more details, please refer to
# https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/lr_updater.py#L327 # noqa # https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/lr_updater.py#L327 # noqa
# https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/momentum_updater.py#L130 # noqa # https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/momentum_updater.py#L130 # noqa
lr_config = dict( lr_config = dict(
policy='cyclic', policy='cyclic',
target_ratio=(10, 1e-4), target_ratio=(10, 1e-4),
cyclic_times=1, cyclic_times=1,
step_ratio_up=0.4, step_ratio_up=0.4,
) )
momentum_config = dict( momentum_config = dict(
policy='cyclic', policy='cyclic',
target_ratio=(0.85 / 0.95, 1), target_ratio=(0.85 / 0.95, 1),
cyclic_times=1, cyclic_times=1,
step_ratio_up=0.4, step_ratio_up=0.4,
) )
# Although the max_epochs is 40, this schedule is usually used we # Although the max_epochs is 40, this schedule is usually used we
# RepeatDataset with repeat ratio N, thus the actual max epoch # RepeatDataset with repeat ratio N, thus the actual max epoch
# number could be Nx40 # number could be Nx40
runner = dict(type='EpochBasedRunner', max_epochs=40) runner = dict(type='EpochBasedRunner', max_epochs=40)
# optimizer # optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=None) optimizer_config = dict(grad_clip=None)
# learning policy # learning policy
lr_config = dict( lr_config = dict(
policy='step', policy='step',
warmup='linear', warmup='linear',
warmup_iters=500, warmup_iters=500,
warmup_ratio=0.001, warmup_ratio=0.001,
step=[8, 11]) step=[8, 11])
runner = dict(type='EpochBasedRunner', max_epochs=12) runner = dict(type='EpochBasedRunner', max_epochs=12)
# optimizer # optimizer
# This schedule is mainly used by models on nuScenes dataset # This schedule is mainly used by models on nuScenes dataset
optimizer = dict(type='AdamW', lr=0.001, weight_decay=0.01) optimizer = dict(type='AdamW', lr=0.001, weight_decay=0.01)
# max_norm=10 is better for SECOND # max_norm=10 is better for SECOND
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
lr_config = dict( lr_config = dict(
policy='step', policy='step',
warmup='linear', warmup='linear',
warmup_iters=1000, warmup_iters=1000,
warmup_ratio=1.0 / 1000, warmup_ratio=1.0 / 1000,
step=[20, 23]) step=[20, 23])
momentum_config = None momentum_config = None
# runtime settings # runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=24) runner = dict(type='EpochBasedRunner', max_epochs=24)
# optimizer # optimizer
# This schedule is mainly used by models on indoor dataset, # This schedule is mainly used by models on indoor dataset,
# e.g., VoteNet on SUNRGBD and ScanNet # e.g., VoteNet on SUNRGBD and ScanNet
lr = 0.008 # max learning rate lr = 0.008 # max learning rate
optimizer = dict(type='AdamW', lr=lr, weight_decay=0.01) optimizer = dict(type='AdamW', lr=lr, weight_decay=0.01)
optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2)) optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
lr_config = dict(policy='step', warmup=None, step=[24, 32]) lr_config = dict(policy='step', warmup=None, step=[24, 32])
# runtime settings # runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=36) runner = dict(type='EpochBasedRunner', max_epochs=36)
# optimizer # optimizer
# This schedule is mainly used on S3DIS dataset in segmentation task # This schedule is mainly used on S3DIS dataset in segmentation task
optimizer = dict(type='SGD', lr=0.2, weight_decay=0.0001, momentum=0.9) optimizer = dict(type='SGD', lr=0.2, weight_decay=0.0001, momentum=0.9)
optimizer_config = dict(grad_clip=None) optimizer_config = dict(grad_clip=None)
lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=0.002) lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=0.002)
momentum_config = None momentum_config = None
# runtime settings # runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=150) runner = dict(type='EpochBasedRunner', max_epochs=150)
# optimizer # optimizer
# This schedule is mainly used on ScanNet dataset in segmentation task # This schedule is mainly used on ScanNet dataset in segmentation task
optimizer = dict(type='Adam', lr=0.001, weight_decay=0.01) optimizer = dict(type='Adam', lr=0.001, weight_decay=0.01)
optimizer_config = dict(grad_clip=None) optimizer_config = dict(grad_clip=None)
lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5) lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5)
momentum_config = None momentum_config = None
# runtime settings # runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=200) runner = dict(type='EpochBasedRunner', max_epochs=200)
# optimizer # optimizer
# This schedule is mainly used on S3DIS dataset in segmentation task # This schedule is mainly used on S3DIS dataset in segmentation task
optimizer = dict(type='Adam', lr=0.001, weight_decay=0.001) optimizer = dict(type='Adam', lr=0.001, weight_decay=0.001)
optimizer_config = dict(grad_clip=None) optimizer_config = dict(grad_clip=None)
lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5) lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5)
momentum_config = None momentum_config = None
# runtime settings # runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=50) runner = dict(type='EpochBasedRunner', max_epochs=50)
_base_ = [ _base_ = [
'./_base_/default_runtime.py' './_base_/default_runtime.py'
] ]
# meta info for submission file # meta info for submission file
meta = { meta = {
'use_lidar': False, 'use_lidar': False,
'use_camera': True, 'use_camera': True,
'use_external': False, 'use_external': False,
'output_format': 'vector', 'output_format': 'vector',
# NOTE: please modify the information below # NOTE: please modify the information below
'method': 'VectorMapNet', # name of your method 'method': 'VectorMapNet', # name of your method
'authors': ['Yicheng Liu', 'Tianyuan Yuan', 'Yue Wang', 'authors': ['Yicheng Liu', 'Tianyuan Yuan', 'Yue Wang',
'Yilun Wang', 'Hang Zhao'], # author names 'Yilun Wang', 'Hang Zhao'], # author names
'e-mail': 'yuantianyuan01@gmail.com', # your e-mail address 'e-mail': 'yuantianyuan01@gmail.com', # your e-mail address
'institution / company': 'MarsLab, Tsinghua University', # your organization 'institution / company': 'MarsLab, Tsinghua University', # your organization
'country / region': 'xxx', # (IMPORTANT) your country/region in iso3166 standard 'country / region': 'xxx', # (IMPORTANT) your country/region in iso3166 standard
} }
# model type # model type
type = 'Mapper' type = 'Mapper'
plugin = True plugin = True
# plugin code dir # plugin code dir
plugin_dir = 'src/' plugin_dir = 'src/'
# img configs # img configs
img_norm_cfg = dict( img_norm_cfg = dict(
mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
img_size = (int(128*2), int((16/9*128)*2)) img_size = (int(128*2), int((16/9*128)*2))
# category configs # category configs
cat2id = { cat2id = {
'ped_crossing': 0, 'ped_crossing': 0,
'divider': 1, 'divider': 1,
'boundary': 2, 'boundary': 2,
} }
num_class = max(list(cat2id.values())) + 1 num_class = max(list(cat2id.values())) + 1
# bev configs # bev configs
roi_size = (60, 30) # bev range, 60m in x-axis, 30m in y-axis roi_size = (60, 30) # bev range, 60m in x-axis, 30m in y-axis
canvas_size = (200, 100) # bev feature size canvas_size = (200, 100) # bev feature size
# vectorize params # vectorize params
coords_dim = 2 # polylines coordinates dimension, 2 or 3 coords_dim = 2 # polylines coordinates dimension, 2 or 3
sample_dist = -1 # sampling params, vectormapnet uses simplify sample_dist = -1 # sampling params, vectormapnet uses simplify
sample_num = -1 # sampling params, vectormapnet uses simplify sample_num = -1 # sampling params, vectormapnet uses simplify
simplify = True # sampling params, vectormapnet uses simplify simplify = True # sampling params, vectormapnet uses simplify
# model configs # model configs
head_dim = 256 head_dim = 256
norm_cfg = dict(type='BN2d') norm_cfg = dict(type='BN2d')
num_points = 30 num_points = 30
model = dict( model = dict(
type='VectorMapNet', type='VectorMapNet',
backbone_cfg=dict( backbone_cfg=dict(
type='IPMEncoder', type='IPMEncoder',
img_backbone=dict( img_backbone=dict(
type='ResNet', type='ResNet',
with_cp=False, with_cp=False,
pretrained='open-mmlab://detectron2/resnet50_caffe', pretrained='open-mmlab://detectron2/resnet50_caffe',
depth=50, depth=50,
num_stages=4, num_stages=4,
out_indices=(0, 1, 2, 3), out_indices=(0, 1, 2, 3),
frozen_stages=-1, frozen_stages=-1,
norm_cfg=norm_cfg, norm_cfg=norm_cfg,
norm_eval=True, norm_eval=True,
style='caffe', style='caffe',
dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False), dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),
stage_with_dcn=(False, False, True, True)), stage_with_dcn=(False, False, True, True)),
img_neck=dict( img_neck=dict(
type='FPN', type='FPN',
in_channels=[256, 512, 1024, 2048], in_channels=[256, 512, 1024, 2048],
out_channels=128, out_channels=128,
start_level=0, start_level=0,
add_extra_convs=True, add_extra_convs=True,
# extra_convs_on_inputs=False, # use P5 # extra_convs_on_inputs=False, # use P5
num_outs=4, num_outs=4,
norm_cfg=norm_cfg, norm_cfg=norm_cfg,
relu_before_extra_convs=True), relu_before_extra_convs=True),
upsample=dict( upsample=dict(
zoom_size=(1, 2, 4, 8), zoom_size=(1, 2, 4, 8),
in_channels=128, in_channels=128,
out_channels=128,), out_channels=128,),
xbound=[-roi_size[0]/2, roi_size[0]/2, roi_size[0]/canvas_size[0]], xbound=[-roi_size[0]/2, roi_size[0]/2, roi_size[0]/canvas_size[0]],
ybound=[-roi_size[1]/2, roi_size[1]/2, roi_size[1]/canvas_size[1]], ybound=[-roi_size[1]/2, roi_size[1]/2, roi_size[1]/canvas_size[1]],
heights=[-1.1, 0, 0.5, 1.1], heights=[-1.1, 0, 0.5, 1.1],
out_channels=128, out_channels=128,
pretrained=None, pretrained=None,
num_cam=7, num_cam=7,
), ),
head_cfg=dict( head_cfg=dict(
type='DGHead', type='DGHead',
augmentation=True, augmentation=True,
augmentation_kwargs=dict( augmentation_kwargs=dict(
p=0.3,scale=0.01, p=0.3,scale=0.01,
bbox_type='xyxy', bbox_type='xyxy',
), ),
det_net_cfg=dict( det_net_cfg=dict(
type='MapElementDetector', type='MapElementDetector',
num_query=120, num_query=120,
max_lines=35, max_lines=35,
bbox_size=2, bbox_size=2,
mode='xyxy', mode='xyxy',
canvas_size=canvas_size, canvas_size=canvas_size,
separate_detect=False, separate_detect=False,
discrete_output=False, discrete_output=False,
num_classes=num_class, num_classes=num_class,
in_channels=128, in_channels=128,
score_thre=0.1, score_thre=0.1,
num_reg_fcs=2, num_reg_fcs=2,
num_points=4, num_points=4,
iterative=False, iterative=False,
sync_cls_avg_factor=True, sync_cls_avg_factor=True,
transformer=dict( transformer=dict(
type='DeformableDetrTransformer_', type='DeformableDetrTransformer_',
encoder=dict( encoder=dict(
type='PlaceHolderEncoder', type='PlaceHolderEncoder',
embed_dims=head_dim, embed_dims=head_dim,
), ),
decoder=dict( decoder=dict(
type='DeformableDetrTransformerDecoder_', type='DeformableDetrTransformerDecoder_',
num_layers=6, num_layers=6,
return_intermediate=True, return_intermediate=True,
transformerlayers=dict( transformerlayers=dict(
type='DetrTransformerDecoderLayer', type='DetrTransformerDecoderLayer',
attn_cfgs=[ attn_cfgs=[
dict( dict(
type='MultiheadAttention', type='MultiheadAttention',
embed_dims=head_dim, embed_dims=head_dim,
num_heads=8, num_heads=8,
attn_drop=0.1, attn_drop=0.1,
proj_drop=0.1, proj_drop=0.1,
dropout_layer=dict(type='Dropout', drop_prob=0.1),), dropout_layer=dict(type='Dropout', drop_prob=0.1),),
dict( dict(
type='MultiScaleDeformableAttention', type='MultiScaleDeformableAttention',
embed_dims=head_dim, embed_dims=head_dim,
num_heads=8, num_heads=8,
num_levels=1, num_levels=1,
), ),
], ],
ffn_cfgs=dict( ffn_cfgs=dict(
type='FFN', type='FFN',
embed_dims=head_dim, embed_dims=head_dim,
feedforward_channels=head_dim*2, feedforward_channels=head_dim*2,
num_fcs=2, num_fcs=2,
ffn_drop=0.1, ffn_drop=0.1,
act_cfg=dict(type='ReLU', inplace=True), act_cfg=dict(type='ReLU', inplace=True),
), ),
feedforward_channels=head_dim*2, feedforward_channels=head_dim*2,
ffn_dropout=0.1, ffn_dropout=0.1,
operation_order=('norm', 'self_attn', 'norm', 'cross_attn', operation_order=('norm', 'self_attn', 'norm', 'cross_attn',
'norm', 'ffn',))) 'norm', 'ffn',)))
), ),
positional_encoding=dict( positional_encoding=dict(
type='SinePositionalEncoding', type='SinePositionalEncoding',
num_feats=head_dim//2, num_feats=head_dim//2,
normalize=True, normalize=True,
offset=-0.5), offset=-0.5),
loss_cls=dict( loss_cls=dict(
type='FocalLoss', type='FocalLoss',
use_sigmoid=True, use_sigmoid=True,
gamma=2.0, gamma=2.0,
alpha=0.25, alpha=0.25,
loss_weight=2.0), loss_weight=2.0),
loss_reg=dict( loss_reg=dict(
type='LinesLoss', type='LinesLoss',
loss_weight=0.1), loss_weight=0.1),
train_cfg=dict( train_cfg=dict(
assigner=dict( assigner=dict(
type='HungarianLinesAssigner', type='HungarianLinesAssigner',
cost=dict( cost=dict(
type='MapQueriesCost', type='MapQueriesCost',
cls_cost=dict(type='FocalLossCost', weight=2.0), cls_cost=dict(type='FocalLossCost', weight=2.0),
reg_cost=dict(type='BBoxCostC', weight=0.1), # continues reg_cost=dict(type='BBoxCostC', weight=0.1), # continues
iou_cost=dict(type='IoUCostC', weight=1,box_format='xyxy'), # continues iou_cost=dict(type='IoUCostC', weight=1,box_format='xyxy'), # continues
), ),
), ),
), ),
), ),
gen_net_cfg=dict( gen_net_cfg=dict(
type='PolylineGenerator', type='PolylineGenerator',
in_channels=128, in_channels=128,
encoder_config=None, encoder_config=None,
decoder_config={ decoder_config={
'layer_config': { 'layer_config': {
'd_model': 256, 'd_model': 256,
'nhead': 8, 'nhead': 8,
'dim_feedforward': 512, 'dim_feedforward': 512,
'dropout': 0.2, 'dropout': 0.2,
'norm_first': True, 'norm_first': True,
're_zero': True, 're_zero': True,
}, },
'num_layers': 6, 'num_layers': 6,
}, },
class_conditional=True, class_conditional=True,
num_classes=num_class, num_classes=num_class,
canvas_size=canvas_size, #xy canvas_size=canvas_size, #xy
max_seq_length=500, max_seq_length=500,
decoder_cross_attention=False, decoder_cross_attention=False,
use_discrete_vertex_embeddings=True, use_discrete_vertex_embeddings=True,
), ),
max_num_vertices=80, max_num_vertices=80,
top_p_gen_model=0.9, top_p_gen_model=0.9,
sync_cls_avg_factor=True, sync_cls_avg_factor=True,
), ),
with_auxiliary_head=False, with_auxiliary_head=False,
model_name='VectorMapNet' model_name='VectorMapNet'
) )
# data processing pipelines # data processing pipelines
train_pipeline = [ train_pipeline = [
dict( dict(
type='VectorizeMap', type='VectorizeMap',
coords_dim=coords_dim, coords_dim=coords_dim,
roi_size=roi_size, roi_size=roi_size,
simplify=True, simplify=True,
normalize=True, normalize=True,
), ),
dict( dict(
type='PolygonizeLocalMapBbox', type='PolygonizeLocalMapBbox',
canvas_size=canvas_size, # xy canvas_size=canvas_size, # xy
coord_dim=2, coord_dim=2,
num_class=num_class, num_class=num_class,
threshold=4/200, threshold=4/200,
), ),
dict(type='LoadMultiViewImagesFromFiles'), dict(type='LoadMultiViewImagesFromFiles'),
dict(type='ResizeMultiViewImages', dict(type='ResizeMultiViewImages',
size = (int(128*2), int((16/9*128)*2)), # H, W size = (int(128*2), int((16/9*128)*2)), # H, W
change_intrinsics=True, change_intrinsics=True,
), ),
dict(type='Normalize3D', **img_norm_cfg), dict(type='Normalize3D', **img_norm_cfg),
dict(type='PadMultiViewImages', size_divisor=32, change_intrinsics=True), dict(type='PadMultiViewImages', size_divisor=32, change_intrinsics=True),
dict(type='FormatBundleMap'), dict(type='FormatBundleMap'),
dict(type='Collect3D', keys=['img', 'polys', 'vectors'], meta_keys=( dict(type='Collect3D', keys=['img', 'polys', 'vectors'], meta_keys=(
'token', 'ego2img')) 'token', 'ego2img'))
] ]
test_pipeline = [ test_pipeline = [
dict(type='LoadMultiViewImagesFromFiles'), dict(type='LoadMultiViewImagesFromFiles'),
dict(type='ResizeMultiViewImages', dict(type='ResizeMultiViewImages',
size=img_size, # H, W size=img_size, # H, W
change_intrinsics=True, change_intrinsics=True,
), ),
dict(type='Normalize3D', **img_norm_cfg), dict(type='Normalize3D', **img_norm_cfg),
dict(type='PadMultiViewImages', size_divisor=32, change_intrinsics=True), dict(type='PadMultiViewImages', size_divisor=32, change_intrinsics=True),
dict(type='FormatBundleMap'), dict(type='FormatBundleMap'),
dict(type='Collect3D', keys=['img'], meta_keys=( dict(type='Collect3D', keys=['img'], meta_keys=(
'token', 'ego2img')) 'token', 'ego2img'))
] ]
# dataset configs # dataset configs
data = dict( data = dict(
samples_per_gpu=5, samples_per_gpu=5,
workers_per_gpu=5, workers_per_gpu=5,
train=dict( train=dict(
type='AV2Dataset', type='AV2Dataset',
ann_file='./data/train_annotations.json', ann_file='./data/train_annotations.json',
root_path='./data/argoverse2/', root_path='./data/argoverse2/',
meta=meta, meta=meta,
roi_size=roi_size, roi_size=roi_size,
cat2id=cat2id, cat2id=cat2id,
pipeline=train_pipeline, pipeline=train_pipeline,
interval=1, interval=1,
), ),
val=dict( val=dict(
type='AV2Dataset', type='AV2Dataset',
ann_file='./data/val_annotations.json', ann_file='./data/val_annotations.json',
root_path='./data/argoverse2/', root_path='./data/argoverse2/',
meta=meta, meta=meta,
roi_size=roi_size, roi_size=roi_size,
cat2id=cat2id, cat2id=cat2id,
pipeline=test_pipeline, pipeline=test_pipeline,
test_mode=True, test_mode=True,
interval=1, interval=1,
), ),
test=dict( test=dict(
type='AV2Dataset', type='AV2Dataset',
ann_file='./data/test_annotations.json', ann_file='./data/test_annotations.json',
root_path='./data/argoverse2/', root_path='./data/argoverse2/',
meta=meta, meta=meta,
roi_size=roi_size, roi_size=roi_size,
cat2id=cat2id, cat2id=cat2id,
pipeline=test_pipeline, pipeline=test_pipeline,
test_mode=True, test_mode=True,
interval=1, interval=1,
), ),
) )
# optimizer # optimizer
optimizer = dict( optimizer = dict(
type='AdamW', type='AdamW',
lr=1e-3, lr=1e-3,
paramwise_cfg=dict( paramwise_cfg=dict(
custom_keys={ custom_keys={
'backbone': dict(lr_mult=0.1), 'backbone': dict(lr_mult=0.1),
}), }),
weight_decay=0.01) weight_decay=0.01)
optimizer_config = dict(grad_clip=dict(max_norm=3.5, norm_type=2)) optimizer_config = dict(grad_clip=dict(max_norm=3.5, norm_type=2))
# learning policy & schedule # learning policy & schedule
lr_config = dict( lr_config = dict(
policy='step', policy='step',
warmup='linear', warmup='linear',
warmup_iters=400, warmup_iters=400,
warmup_ratio=0.1, warmup_ratio=0.1,
step=[100, 120]) step=[100, 120])
checkpoint_config = dict(interval=5) checkpoint_config = dict(interval=5)
total_epochs = 130 total_epochs = 130
# kwargs for dataset evaluation # kwargs for dataset evaluation
eval_kwargs = dict() eval_kwargs = dict()
evaluation = dict( evaluation = dict(
interval=5, interval=5,
**eval_kwargs) **eval_kwargs)
runner = dict(type='EpochBasedRunner', max_epochs=total_epochs) runner = dict(type='EpochBasedRunner', max_epochs=total_epochs)
find_unused_parameters = True find_unused_parameters = True
log_config = dict( log_config = dict(
interval=50, interval=50,
hooks=[ hooks=[
dict(type='TextLoggerHook'), dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook') dict(type='TensorboardLoggerHook')
]) ])
_base_ = [ _base_ = [
'./_base_/default_runtime.py' './_base_/default_runtime.py'
] ]
# meta info for submission file # meta info for submission file
meta = { meta = {
'use_lidar': False, 'use_lidar': False,
'use_camera': True, 'use_camera': True,
'use_external': False, 'use_external': False,
'output_format': 'vector', 'output_format': 'vector',
# NOTE: please modify the information below # NOTE: please modify the information below
'method': 'VectorMapNet', # name of your method 'method': 'VectorMapNet', # name of your method
'authors': ['Yicheng Liu', 'Tianyuan Yuan', 'Yue Wang', 'authors': ['Yicheng Liu', 'Tianyuan Yuan', 'Yue Wang',
'Yilun Wang', 'Hang Zhao'], # author names 'Yilun Wang', 'Hang Zhao'], # author names
'e-mail': 'yuantianyuan01@gmail.com', # your e-mail address 'e-mail': 'yuantianyuan01@gmail.com', # your e-mail address
'institution / company': 'MarsLab, Tsinghua University', # your organization 'institution / company': 'MarsLab, Tsinghua University', # your organization
'country / region': 'xxx', # (IMPORTANT) your country/region in iso3166 standard 'country / region': 'xxx', # (IMPORTANT) your country/region in iso3166 standard
} }
# model type # model type
type = 'Mapper' type = 'Mapper'
plugin = True plugin = True
# plugin code dir # plugin code dir
plugin_dir = 'src/' plugin_dir = 'src/'
# img configs # img configs
# img_norm_cfg = dict( # img_norm_cfg = dict(
# mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) # mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
img_norm_cfg = dict( img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
img_size = (int(128*2), int((16/9*128)*2)) img_size = (int(128*2), int((16/9*128)*2))
# category configs # category configs
cat2id = { cat2id = {
'ped_crossing': 0, 'ped_crossing': 0,
'divider': 1, 'divider': 1,
'boundary': 2, 'boundary': 2,
} }
num_class = max(list(cat2id.values())) + 1 num_class = max(list(cat2id.values())) + 1
# bev configs # bev configs
roi_size = (60, 30) # bev range, 60m in x-axis, 30m in y-axis roi_size = (60, 30) # bev range, 60m in x-axis, 30m in y-axis
canvas_size = (200, 100) # bev feature size canvas_size = (200, 100) # bev feature size
# vectorize params # vectorize params
coords_dim = 2 # polylines coordinates dimension, 2 or 3 coords_dim = 2 # polylines coordinates dimension, 2 or 3
sample_dist = -1 # sampling params, vectormapnet uses simplify sample_dist = -1 # sampling params, vectormapnet uses simplify
sample_num = -1 # sampling params, vectormapnet uses simplify sample_num = -1 # sampling params, vectormapnet uses simplify
simplify = True # sampling params, vectormapnet uses simplify simplify = True # sampling params, vectormapnet uses simplify
# model configs # model configs
head_dim = 256 head_dim = 256
norm_cfg = dict(type='BN2d') norm_cfg = dict(type='BN2d')
num_points = 30 num_points = 30
pretrained = 'https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_s_1k_224.pth' pretrained = 'https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_s_1k_224.pth'
model = dict( model = dict(
type='VectorMapNet', type='VectorMapNet',
backbone_cfg=dict( backbone_cfg=dict(
type='IPMEncoder', type='IPMEncoder',
img_backbone=dict( img_backbone=dict(
_delete_=True, _delete_=True,
type='InternImage', type='InternImage',
core_op='DCNv3', core_op='DCNv3',
channels=80, channels=80,
depths=[4, 4, 21, 4], depths=[4, 4, 21, 4],
groups=[5, 10, 20, 40], groups=[5, 10, 20, 40],
mlp_ratio=4., mlp_ratio=4.,
drop_path_rate=0.3, drop_path_rate=0.3,
norm_layer='LN', norm_layer='LN',
layer_scale=1.0, layer_scale=1.0,
offset_scale=1.0, offset_scale=1.0,
post_norm=True, post_norm=True,
with_cp=False, with_cp=False,
init_cfg=dict(type='Pretrained', checkpoint=pretrained)), init_cfg=dict(type='Pretrained', checkpoint=pretrained)),
img_neck=dict( img_neck=dict(
type='FPN', type='FPN',
in_channels=[80, 160, 320, 640], in_channels=[80, 160, 320, 640],
out_channels=128, out_channels=128,
start_level=0, start_level=0,
add_extra_convs=True, add_extra_convs=True,
# extra_convs_on_inputs=False, # use P5 # extra_convs_on_inputs=False, # use P5
num_outs=4, num_outs=4,
norm_cfg=norm_cfg, norm_cfg=norm_cfg,
relu_before_extra_convs=True), relu_before_extra_convs=True),
upsample=dict( upsample=dict(
zoom_size=(1, 2, 4, 8), zoom_size=(1, 2, 4, 8),
in_channels=128, in_channels=128,
out_channels=128,), out_channels=128,),
xbound=[-roi_size[0]/2, roi_size[0]/2, roi_size[0]/canvas_size[0]], xbound=[-roi_size[0]/2, roi_size[0]/2, roi_size[0]/canvas_size[0]],
ybound=[-roi_size[1]/2, roi_size[1]/2, roi_size[1]/canvas_size[1]], ybound=[-roi_size[1]/2, roi_size[1]/2, roi_size[1]/canvas_size[1]],
heights=[-1.1, 0, 0.5, 1.1], heights=[-1.1, 0, 0.5, 1.1],
out_channels=128, out_channels=128,
pretrained=None, pretrained=None,
num_cam=7, num_cam=7,
), ),
head_cfg=dict( head_cfg=dict(
type='DGHead', type='DGHead',
augmentation=True, augmentation=True,
augmentation_kwargs=dict( augmentation_kwargs=dict(
p=0.3,scale=0.01, p=0.3,scale=0.01,
bbox_type='xyxy', bbox_type='xyxy',
), ),
det_net_cfg=dict( det_net_cfg=dict(
type='MapElementDetector', type='MapElementDetector',
num_query=120, num_query=120,
max_lines=35, max_lines=35,
bbox_size=2, bbox_size=2,
mode='xyxy', mode='xyxy',
canvas_size=canvas_size, canvas_size=canvas_size,
separate_detect=False, separate_detect=False,
discrete_output=False, discrete_output=False,
num_classes=num_class, num_classes=num_class,
in_channels=128, in_channels=128,
score_thre=0.1, score_thre=0.1,
num_reg_fcs=2, num_reg_fcs=2,
num_points=4, num_points=4,
iterative=False, iterative=False,
sync_cls_avg_factor=True, sync_cls_avg_factor=True,
transformer=dict( transformer=dict(
type='DeformableDetrTransformer_', type='DeformableDetrTransformer_',
encoder=dict( encoder=dict(
type='PlaceHolderEncoder', type='PlaceHolderEncoder',
embed_dims=head_dim, embed_dims=head_dim,
), ),
decoder=dict( decoder=dict(
type='DeformableDetrTransformerDecoder_', type='DeformableDetrTransformerDecoder_',
num_layers=6, num_layers=6,
return_intermediate=True, return_intermediate=True,
transformerlayers=dict( transformerlayers=dict(
type='DetrTransformerDecoderLayer', type='DetrTransformerDecoderLayer',
attn_cfgs=[ attn_cfgs=[
dict( dict(
type='MultiheadAttention', type='MultiheadAttention',
embed_dims=head_dim, embed_dims=head_dim,
num_heads=8, num_heads=8,
attn_drop=0.1, attn_drop=0.1,
proj_drop=0.1, proj_drop=0.1,
dropout_layer=dict(type='Dropout', drop_prob=0.1),), dropout_layer=dict(type='Dropout', drop_prob=0.1),),
dict( dict(
type='MultiScaleDeformableAttention', type='MultiScaleDeformableAttention',
embed_dims=head_dim, embed_dims=head_dim,
num_heads=8, num_heads=8,
num_levels=1, num_levels=1,
), ),
], ],
ffn_cfgs=dict( ffn_cfgs=dict(
type='FFN', type='FFN',
embed_dims=head_dim, embed_dims=head_dim,
feedforward_channels=head_dim*2, feedforward_channels=head_dim*2,
num_fcs=2, num_fcs=2,
ffn_drop=0.1, ffn_drop=0.1,
act_cfg=dict(type='ReLU', inplace=True), act_cfg=dict(type='ReLU', inplace=True),
), ),
feedforward_channels=head_dim*2, feedforward_channels=head_dim*2,
ffn_dropout=0.1, ffn_dropout=0.1,
operation_order=('norm', 'self_attn', 'norm', 'cross_attn', operation_order=('norm', 'self_attn', 'norm', 'cross_attn',
'norm', 'ffn',))) 'norm', 'ffn',)))
), ),
positional_encoding=dict( positional_encoding=dict(
type='SinePositionalEncoding', type='SinePositionalEncoding',
num_feats=head_dim//2, num_feats=head_dim//2,
normalize=True, normalize=True,
offset=-0.5), offset=-0.5),
loss_cls=dict( loss_cls=dict(
type='FocalLoss', type='FocalLoss',
use_sigmoid=True, use_sigmoid=True,
gamma=2.0, gamma=2.0,
alpha=0.25, alpha=0.25,
loss_weight=2.0), loss_weight=2.0),
loss_reg=dict( loss_reg=dict(
type='LinesLoss', type='LinesLoss',
loss_weight=0.1), loss_weight=0.1),
train_cfg=dict( train_cfg=dict(
assigner=dict( assigner=dict(
type='HungarianLinesAssigner', type='HungarianLinesAssigner',
cost=dict( cost=dict(
type='MapQueriesCost', type='MapQueriesCost',
cls_cost=dict(type='FocalLossCost', weight=2.0), cls_cost=dict(type='FocalLossCost', weight=2.0),
reg_cost=dict(type='BBoxCostC', weight=0.1), # continues reg_cost=dict(type='BBoxCostC', weight=0.1), # continues
iou_cost=dict(type='IoUCostC', weight=1,box_format='xyxy'), # continues iou_cost=dict(type='IoUCostC', weight=1,box_format='xyxy'), # continues
), ),
), ),
), ),
), ),
gen_net_cfg=dict( gen_net_cfg=dict(
type='PolylineGenerator', type='PolylineGenerator',
in_channels=128, in_channels=128,
encoder_config=None, encoder_config=None,
decoder_config={ decoder_config={
'layer_config': { 'layer_config': {
'd_model': 256, 'd_model': 256,
'nhead': 8, 'nhead': 8,
'dim_feedforward': 512, 'dim_feedforward': 512,
'dropout': 0.2, 'dropout': 0.2,
'norm_first': True, 'norm_first': True,
're_zero': True, 're_zero': True,
}, },
'num_layers': 6, 'num_layers': 6,
}, },
class_conditional=True, class_conditional=True,
num_classes=num_class, num_classes=num_class,
canvas_size=canvas_size, #xy canvas_size=canvas_size, #xy
max_seq_length=500, max_seq_length=500,
decoder_cross_attention=False, decoder_cross_attention=False,
use_discrete_vertex_embeddings=True, use_discrete_vertex_embeddings=True,
), ),
max_num_vertices=80, max_num_vertices=80,
top_p_gen_model=0.9, top_p_gen_model=0.9,
sync_cls_avg_factor=True, sync_cls_avg_factor=True,
), ),
with_auxiliary_head=False, with_auxiliary_head=False,
model_name='VectorMapNet' model_name='VectorMapNet'
) )
# data processing pipelines # data processing pipelines
train_pipeline = [ train_pipeline = [
dict( dict(
type='VectorizeMap', type='VectorizeMap',
coords_dim=coords_dim, coords_dim=coords_dim,
roi_size=roi_size, roi_size=roi_size,
simplify=True, simplify=True,
normalize=True, normalize=True,
), ),
dict( dict(
type='PolygonizeLocalMapBbox', type='PolygonizeLocalMapBbox',
canvas_size=canvas_size, # xy canvas_size=canvas_size, # xy
coord_dim=2, coord_dim=2,
num_class=num_class, num_class=num_class,
threshold=4/200, threshold=4/200,
), ),
dict(type='LoadMultiViewImagesFromFiles'), dict(type='LoadMultiViewImagesFromFiles'),
dict(type='ResizeMultiViewImages', dict(type='ResizeMultiViewImages',
size = (int(128*2), int((16/9*128)*2)), # H, W size = (int(128*2), int((16/9*128)*2)), # H, W
change_intrinsics=True, change_intrinsics=True,
), ),
dict(type='Normalize3D', **img_norm_cfg), dict(type='Normalize3D', **img_norm_cfg),
dict(type='PadMultiViewImages', size_divisor=32, change_intrinsics=True), dict(type='PadMultiViewImages', size_divisor=32, change_intrinsics=True),
dict(type='FormatBundleMap'), dict(type='FormatBundleMap'),
dict(type='Collect3D', keys=['img', 'polys', 'vectors'], meta_keys=( dict(type='Collect3D', keys=['img', 'polys', 'vectors'], meta_keys=(
'token', 'ego2img')) 'token', 'ego2img'))
] ]
test_pipeline = [ test_pipeline = [
dict(type='LoadMultiViewImagesFromFiles'), dict(type='LoadMultiViewImagesFromFiles'),
dict(type='ResizeMultiViewImages', dict(type='ResizeMultiViewImages',
size=img_size, # H, W size=img_size, # H, W
change_intrinsics=True, change_intrinsics=True,
), ),
dict(type='Normalize3D', **img_norm_cfg), dict(type='Normalize3D', **img_norm_cfg),
dict(type='PadMultiViewImages', size_divisor=32, change_intrinsics=True), dict(type='PadMultiViewImages', size_divisor=32, change_intrinsics=True),
dict(type='FormatBundleMap'), dict(type='FormatBundleMap'),
dict(type='Collect3D', keys=['img'], meta_keys=( dict(type='Collect3D', keys=['img'], meta_keys=(
'token', 'ego2img')) 'token', 'ego2img'))
] ]
# dataset configs # dataset configs
data = dict( data = dict(
samples_per_gpu=5, samples_per_gpu=5,
workers_per_gpu=5, workers_per_gpu=5,
train=dict( train=dict(
type='AV2Dataset', type='AV2Dataset',
ann_file='/mnt/petrelfs/yeshenglong/Dataset/track_data/train_annotations.json', ann_file='/mnt/petrelfs/yeshenglong/Dataset/track_data/train_annotations.json',
root_path='/mnt/petrelfs/yeshenglong/Dataset/track_data/train', root_path='/mnt/petrelfs/yeshenglong/Dataset/track_data/train',
meta=meta, meta=meta,
roi_size=roi_size, roi_size=roi_size,
cat2id=cat2id, cat2id=cat2id,
pipeline=train_pipeline, pipeline=train_pipeline,
interval=1, interval=1,
), ),
val=dict( val=dict(
type='AV2Dataset', type='AV2Dataset',
ann_file='/mnt/petrelfs/yeshenglong/Dataset/track_data/val_annotations.json', ann_file='/mnt/petrelfs/yeshenglong/Dataset/track_data/val_annotations.json',
root_path='/mnt/petrelfs/yeshenglong/Dataset/track_data/val', root_path='/mnt/petrelfs/yeshenglong/Dataset/track_data/val',
meta=meta, meta=meta,
roi_size=roi_size, roi_size=roi_size,
cat2id=cat2id, cat2id=cat2id,
pipeline=test_pipeline, pipeline=test_pipeline,
test_mode=True, test_mode=True,
interval=1, interval=1,
), ),
test=dict( test=dict(
type='AV2Dataset', type='AV2Dataset',
ann_file='/mnt/petrelfs/yeshenglong/Dataset/track_data/test_annotations.json', ann_file='/mnt/petrelfs/yeshenglong/Dataset/track_data/test_annotations.json',
root_path='/mnt/petrelfs/yeshenglong/Dataset/track_data/test', root_path='/mnt/petrelfs/yeshenglong/Dataset/track_data/test',
meta=meta, meta=meta,
roi_size=roi_size, roi_size=roi_size,
cat2id=cat2id, cat2id=cat2id,
pipeline=test_pipeline, pipeline=test_pipeline,
test_mode=True, test_mode=True,
interval=1, interval=1,
), ),
) )
# optimizer # optimizer
optimizer = dict( optimizer = dict(
type='AdamW', type='AdamW',
lr=1e-3, lr=1e-3,
paramwise_cfg=dict( paramwise_cfg=dict(
custom_keys={ custom_keys={
'backbone': dict(lr_mult=0.1), 'backbone': dict(lr_mult=0.1),
}), }),
weight_decay=0.01) weight_decay=0.01)
optimizer_config = dict(grad_clip=dict(max_norm=3.5, norm_type=2)) optimizer_config = dict(grad_clip=dict(max_norm=3.5, norm_type=2))
# learning policy & schedule # learning policy & schedule
lr_config = dict( lr_config = dict(
policy='step', policy='step',
warmup='linear', warmup='linear',
warmup_iters=400, warmup_iters=400,
warmup_ratio=0.1, warmup_ratio=0.1,
step=[100, 120]) step=[100, 120])
checkpoint_config = dict(interval=5) checkpoint_config = dict(interval=5)
total_epochs = 130 total_epochs = 130
# kwargs for dataset evaluation # kwargs for dataset evaluation
eval_kwargs = dict() eval_kwargs = dict()
evaluation = dict( evaluation = dict(
interval=5, interval=5,
**eval_kwargs) **eval_kwargs)
runner = dict(type='EpochBasedRunner', max_epochs=total_epochs) runner = dict(type='EpochBasedRunner', max_epochs=total_epochs)
find_unused_parameters = True find_unused_parameters = True
log_config = dict( log_config = dict(
interval=50, interval=50,
hooks=[ hooks=[
dict(type='TextLoggerHook'), dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook') dict(type='TensorboardLoggerHook')
]) ])
from .pipelines import * from .pipelines import *
from .argo_dataset import AV2Dataset from .argo_dataset import AV2Dataset
\ No newline at end of file
from .base_dataset import BaseMapDataset from .base_dataset import BaseMapDataset
from mmdet.datasets import DATASETS from mmdet.datasets import DATASETS
import numpy as np import numpy as np
from time import time from time import time
import mmcv import mmcv
import os import os
from shapely.geometry import LineString from shapely.geometry import LineString
@DATASETS.register_module() @DATASETS.register_module()
class AV2Dataset(BaseMapDataset): class AV2Dataset(BaseMapDataset):
"""Argoverse2 map dataset class. """Argoverse2 map dataset class.
Args: Args:
ann_file (str): annotation file path ann_file (str): annotation file path
cat2id (dict): category to class id cat2id (dict): category to class id
roi_size (tuple): bev range roi_size (tuple): bev range
eval_config (Config): evaluation config eval_config (Config): evaluation config
meta (dict): meta information meta (dict): meta information
pipeline (Config): data processing pipeline config, pipeline (Config): data processing pipeline config,
interval (int): annotation load interval interval (int): annotation load interval
work_dir (str): path to work dir work_dir (str): path to work dir
test_mode (bool): whether in test mode test_mode (bool): whether in test mode
""" """
def __init__(self, **kwargs,): def __init__(self, **kwargs,):
super().__init__(**kwargs) super().__init__(**kwargs)
def load_annotations(self, ann_file): def load_annotations(self, ann_file):
"""Load annotations from ann_file. """Load annotations from ann_file.
Args: Args:
ann_file (str): Path of the annotation file. ann_file (str): Path of the annotation file.
Returns: Returns:
list[dict]: List of annotations. list[dict]: List of annotations.
""" """
start_time = time() start_time = time()
ann = mmcv.load(ann_file) ann = mmcv.load(ann_file)
samples = [] samples = []
for seg_id, sequence in ann.items(): for seg_id, sequence in ann.items():
samples.extend(sequence) samples.extend(sequence)
samples = samples[::self.interval] samples = samples[::self.interval]
print(f'collected {len(samples)} samples in {(time() - start_time):.2f}s') print(f'collected {len(samples)} samples in {(time() - start_time):.2f}s')
self.samples = samples self.samples = samples
def get_sample(self, idx): def get_sample(self, idx):
"""Get data sample. For each sample, map extractor will be applied to extract """Get data sample. For each sample, map extractor will be applied to extract
map elements. map elements.
Args: Args:
idx (int): data index idx (int): data index
Returns: Returns:
result (dict): dict of input result (dict): dict of input
""" """
sample = self.samples[idx] sample = self.samples[idx]
if not self.test_mode: if not self.test_mode:
ann = sample['annotation'] ann = sample['annotation']
# collected required keys # collected required keys
map_label2geom = {} map_label2geom = {}
for k, v in ann.items(): for k, v in ann.items():
if k in self.cat2id.keys(): if k in self.cat2id.keys():
map_label2geom[self.cat2id[k]] = [LineString(np.array(l)[:, :3]) for l in v] map_label2geom[self.cat2id[k]] = [LineString(np.array(l)[:, :3]) for l in v]
ego2img_rts = [] ego2img_rts = []
cams = sample['sensor'] cams = sample['sensor']
for c in cams.values(): for c in cams.values():
extrinsic, intrinsic = np.array( extrinsic, intrinsic = np.array(
c['extrinsic']), np.array(c['intrinsic']) c['extrinsic']), np.array(c['intrinsic'])
ego2cam_rt = extrinsic ego2cam_rt = extrinsic
viewpad = np.eye(4) viewpad = np.eye(4)
viewpad[:intrinsic.shape[0], :intrinsic.shape[1]] = intrinsic viewpad[:intrinsic.shape[0], :intrinsic.shape[1]] = intrinsic
ego2cam_rt = (viewpad @ ego2cam_rt) ego2cam_rt = (viewpad @ ego2cam_rt)
ego2img_rts.append(ego2cam_rt) ego2img_rts.append(ego2cam_rt)
pose = sample['pose'] pose = sample['pose']
input_dict = { input_dict = {
'token': sample['timestamp'], 'token': sample['timestamp'],
'img_filenames': [os.path.join(self.root_path, c['image_path']) for c in cams.values()], 'img_filenames': [os.path.join(self.root_path, c['image_path']) for c in cams.values()],
# intrinsics are 3x3 Ks # intrinsics are 3x3 Ks
'cam_intrinsics': [c['intrinsic'] for c in cams.values()], 'cam_intrinsics': [c['intrinsic'] for c in cams.values()],
# extrinsics are 4x4 tranform matrix, NOTE: **ego2cam** # extrinsics are 4x4 tranform matrix, NOTE: **ego2cam**
'cam_extrinsics': [c['extrinsic'] for c in cams.values()], 'cam_extrinsics': [c['extrinsic'] for c in cams.values()],
'ego2img': ego2img_rts, 'ego2img': ego2img_rts,
'ego2global_translation': pose['ego2global_translation'], 'ego2global_translation': pose['ego2global_translation'],
'ego2global_rotation': pose['ego2global_rotation'], 'ego2global_rotation': pose['ego2global_rotation'],
} }
if not self.test_mode: if not self.test_mode:
input_dict.update({'map_geoms': map_label2geom}) # {0: List[ped_crossing(LineString)], 1: ...}}) input_dict.update({'map_geoms': map_label2geom}) # {0: List[ped_crossing(LineString)], 1: ...}})
return input_dict return input_dict
\ No newline at end of file
import numpy as np import numpy as np
import os import os
import os.path as osp import os.path as osp
import mmcv import mmcv
from .evaluation.vector_eval import VectorEvaluate from .evaluation.vector_eval import VectorEvaluate
from mmdet3d.datasets.pipelines import Compose from mmdet3d.datasets.pipelines import Compose
from mmdet.datasets import DATASETS from mmdet.datasets import DATASETS
from torch.utils.data import Dataset from torch.utils.data import Dataset
import warnings import warnings
warnings.filterwarnings("ignore") warnings.filterwarnings("ignore")
@DATASETS.register_module() @DATASETS.register_module()
class BaseMapDataset(Dataset): class BaseMapDataset(Dataset):
"""Map dataset base class. """Map dataset base class.
Args: Args:
ann_file (str): annotation file path ann_file (str): annotation file path
cat2id (dict): category to class id cat2id (dict): category to class id
roi_size (tuple): bev range roi_size (tuple): bev range
eval_config (Config): evaluation config eval_config (Config): evaluation config
meta (dict): meta information meta (dict): meta information
pipeline (Config): data processing pipeline config, pipeline (Config): data processing pipeline config,
interval (int): annotation load interval interval (int): annotation load interval
work_dir (str): path to work dir work_dir (str): path to work dir
test_mode (bool): whether in test mode test_mode (bool): whether in test mode
""" """
def __init__(self, def __init__(self,
ann_file, ann_file,
root_path, root_path,
cat2id, cat2id,
roi_size, roi_size,
meta, meta,
pipeline, pipeline,
interval=1, interval=1,
work_dir=None, work_dir=None,
test_mode=False, test_mode=False,
): ):
super().__init__() super().__init__()
self.ann_file = ann_file self.ann_file = ann_file
self.meta = meta self.meta = meta
self.root_path = root_path self.root_path = root_path
self.classes = list(cat2id.keys()) self.classes = list(cat2id.keys())
self.num_classes = len(self.classes) self.num_classes = len(self.classes)
self.cat2id = cat2id self.cat2id = cat2id
self.interval = interval self.interval = interval
self.load_annotations(self.ann_file) self.load_annotations(self.ann_file)
self.idx2token = {} self.idx2token = {}
for i, s in enumerate(self.samples): for i, s in enumerate(self.samples):
if 'timestamp' in s: if 'timestamp' in s:
self.idx2token[i] = s['timestamp'] self.idx2token[i] = s['timestamp']
else: else:
self.idx2token[i] = s['token'] self.idx2token[i] = s['token']
self.token2idx = {v: k for k, v in self.idx2token.items()} self.token2idx = {v: k for k, v in self.idx2token.items()}
if pipeline is not None: if pipeline is not None:
self.pipeline = Compose(pipeline) self.pipeline = Compose(pipeline)
else: else:
self.pipeline = None self.pipeline = None
# dummy flags to fit with mmdet dataset # dummy flags to fit with mmdet dataset
self.flag = np.zeros(len(self), dtype=np.uint8) self.flag = np.zeros(len(self), dtype=np.uint8)
self.roi_size = roi_size self.roi_size = roi_size
self.work_dir = work_dir self.work_dir = work_dir
self.test_mode = test_mode self.test_mode = test_mode
def load_annotations(self, ann_file): def load_annotations(self, ann_file):
raise NotImplementedError raise NotImplementedError
def get_sample(self, idx): def get_sample(self, idx):
raise NotImplementedError raise NotImplementedError
def format_results(self, results, denormalize=True, prefix=None): def format_results(self, results, denormalize=True, prefix=None):
'''Format prediction result to submission format. '''Format prediction result to submission format.
Args: Args:
results (list[Tensor]): List of prediction results. results (list[Tensor]): List of prediction results.
denormalize (bool): whether to denormalize prediction from (0, 1) \ denormalize (bool): whether to denormalize prediction from (0, 1) \
to bev range. Default: True to bev range. Default: True
prefix (str): work dir prefix to save submission file. prefix (str): work dir prefix to save submission file.
Returns: Returns:
dict: Evaluation results dict: Evaluation results
''' '''
meta = self.meta meta = self.meta
submissions = { submissions = {
'meta': meta, 'meta': meta,
'results': {}, 'results': {},
} }
for pred in results: for pred in results:
''' '''
For each case, the result should be formatted as Dict{'vectors': [], 'scores': [], 'labels': []} For each case, the result should be formatted as Dict{'vectors': [], 'scores': [], 'labels': []}
'vectors': List of vector, each vector is a array([[x1, y1], [x2, y2] ...]), 'vectors': List of vector, each vector is a array([[x1, y1], [x2, y2] ...]),
contain all vectors predicted in this sample. contain all vectors predicted in this sample.
'scores: List of score(float), 'scores: List of score(float),
contain scores of all instances in this sample. contain scores of all instances in this sample.
'labels': List of label(int), 'labels': List of label(int),
contain labels of all instances in this sample. contain labels of all instances in this sample.
''' '''
if pred is None: # empty prediction if pred is None: # empty prediction
continue continue
single_case = {'vectors': [], 'scores': [], 'labels': []} single_case = {'vectors': [], 'scores': [], 'labels': []}
token = pred['token'] token = pred['token']
roi_size = np.array(self.roi_size) roi_size = np.array(self.roi_size)
origin = -np.array([self.roi_size[0]/2, self.roi_size[1]/2]) origin = -np.array([self.roi_size[0]/2, self.roi_size[1]/2])
for i in range(len(pred['scores'])): for i in range(len(pred['scores'])):
score = pred['scores'][i] score = pred['scores'][i]
label = pred['labels'][i] label = pred['labels'][i]
vector = pred['vectors'][i] vector = pred['vectors'][i]
# A line should have >=2 points # A line should have >=2 points
if len(vector) < 2: if len(vector) < 2:
continue continue
if denormalize: if denormalize:
eps = 2 eps = 2
vector = vector * (roi_size + eps) + origin vector = vector * (roi_size + eps) + origin
single_case['vectors'].append(vector) single_case['vectors'].append(vector)
single_case['scores'].append(score) single_case['scores'].append(score)
single_case['labels'].append(label) single_case['labels'].append(label)
submissions['results'][token] = single_case submissions['results'][token] = single_case
out_path = osp.join(prefix, 'submission_vector.json') out_path = osp.join(prefix, 'submission_vector.json')
print(f'\nsaving submissions results to {out_path}') print(f'\nsaving submissions results to {out_path}')
os.makedirs(os.path.dirname(out_path), exist_ok=True) os.makedirs(os.path.dirname(out_path), exist_ok=True)
mmcv.dump(submissions, out_path) mmcv.dump(submissions, out_path)
return out_path return out_path
def evaluate(self, results, logger=None, **kwargs): def evaluate(self, results, logger=None, **kwargs):
'''Evaluate prediction result based on `output_format` specified by dataset. '''Evaluate prediction result based on `output_format` specified by dataset.
Args: Args:
results (list[Tensor]): List of prediction results. results (list[Tensor]): List of prediction results.
logger (logger): logger to print evaluation results. logger (logger): logger to print evaluation results.
Returns: Returns:
dict: Evaluation results. dict: Evaluation results.
''' '''
output_format = self.meta['output_format'] output_format = self.meta['output_format']
self.evaluator = VectorEvaluate(self.ann_file) self.evaluator = VectorEvaluate(self.ann_file)
print('len of the results', len(results)) print('len of the results', len(results))
result_path = self.format_results(results, denormalize=True, prefix=self.work_dir) result_path = self.format_results(results, denormalize=True, prefix=self.work_dir)
result_dict = self.evaluator.evaluate(result_path, logger=logger) result_dict = self.evaluator.evaluate(result_path, logger=logger)
return result_dict return result_dict
def __len__(self): def __len__(self):
"""Return the length of data infos. """Return the length of data infos.
Returns: Returns:
int: Length of data infos. int: Length of data infos.
""" """
return len(self.samples) return len(self.samples)
def _rand_another(self, idx): def _rand_another(self, idx):
"""Randomly get another item. """Randomly get another item.
Returns: Returns:
int: Another index of item. int: Another index of item.
""" """
return np.random.choice(self.__len__) return np.random.choice(self.__len__)
def __getitem__(self, idx): def __getitem__(self, idx):
"""Get item from infos according to the given index. """Get item from infos according to the given index.
Returns: Returns:
dict: Data dictionary of the corresponding index. dict: Data dictionary of the corresponding index.
""" """
input_dict = self.get_sample(idx) input_dict = self.get_sample(idx)
data = self.pipeline(input_dict) data = self.pipeline(input_dict)
return data return data
import numpy as np import numpy as np
from .distance import chamfer_distance, frechet_distance from .distance import chamfer_distance, frechet_distance
from typing import List, Tuple, Union from typing import List, Tuple, Union
from numpy.typing import NDArray from numpy.typing import NDArray
def average_precision(recalls, precisions, mode='area'): def average_precision(recalls, precisions, mode='area'):
"""Calculate average precision. """Calculate average precision.
Args: Args:
recalls (ndarray): shape (num_dets, ) recalls (ndarray): shape (num_dets, )
precisions (ndarray): shape (num_dets, ) precisions (ndarray): shape (num_dets, )
mode (str): 'area' or '11points', 'area' means calculating the area mode (str): 'area' or '11points', 'area' means calculating the area
under precision-recall curve, '11points' means calculating under precision-recall curve, '11points' means calculating
the average precision of recalls at [0, 0.1, ..., 1] the average precision of recalls at [0, 0.1, ..., 1]
Returns: Returns:
float: calculated average precision float: calculated average precision
""" """
recalls = recalls[np.newaxis, :] recalls = recalls[np.newaxis, :]
precisions = precisions[np.newaxis, :] precisions = precisions[np.newaxis, :]
assert recalls.shape == precisions.shape and recalls.ndim == 2 assert recalls.shape == precisions.shape and recalls.ndim == 2
num_scales = recalls.shape[0] num_scales = recalls.shape[0]
ap = 0. ap = 0.
if mode == 'area': if mode == 'area':
zeros = np.zeros((num_scales, 1), dtype=recalls.dtype) zeros = np.zeros((num_scales, 1), dtype=recalls.dtype)
ones = np.ones((num_scales, 1), dtype=recalls.dtype) ones = np.ones((num_scales, 1), dtype=recalls.dtype)
mrec = np.hstack((zeros, recalls, ones)) mrec = np.hstack((zeros, recalls, ones))
mpre = np.hstack((zeros, precisions, zeros)) mpre = np.hstack((zeros, precisions, zeros))
for i in range(mpre.shape[1] - 1, 0, -1): for i in range(mpre.shape[1] - 1, 0, -1):
mpre[:, i - 1] = np.maximum(mpre[:, i - 1], mpre[:, i]) mpre[:, i - 1] = np.maximum(mpre[:, i - 1], mpre[:, i])
ind = np.where(mrec[0, 1:] != mrec[0, :-1])[0] ind = np.where(mrec[0, 1:] != mrec[0, :-1])[0]
ap = np.sum( ap = np.sum(
(mrec[0, ind + 1] - mrec[0, ind]) * mpre[0, ind + 1]) (mrec[0, ind + 1] - mrec[0, ind]) * mpre[0, ind + 1])
elif mode == '11points': elif mode == '11points':
for thr in np.arange(0, 1 + 1e-3, 0.1): for thr in np.arange(0, 1 + 1e-3, 0.1):
precs = precisions[0, recalls[i, :] >= thr] precs = precisions[0, recalls[i, :] >= thr]
prec = precs.max() if precs.size > 0 else 0 prec = precs.max() if precs.size > 0 else 0
ap += prec ap += prec
ap /= 11 ap /= 11
else: else:
raise ValueError( raise ValueError(
'Unrecognized mode, only "area" and "11points" are supported') 'Unrecognized mode, only "area" and "11points" are supported')
return ap return ap
def instance_match(pred_lines: List[NDArray], def instance_match(pred_lines: List[NDArray],
scores: NDArray, scores: NDArray,
gt_lines: List[NDArray], gt_lines: List[NDArray],
thresholds: Union[Tuple, List], thresholds: Union[Tuple, List],
metric: str='chamfer') -> List: metric: str='chamfer') -> List:
"""Compute whether detected lines are true positive or false positive. """Compute whether detected lines are true positive or false positive.
Args: Args:
pred_lines (List): Detected lines of a sample, each line has shape (INTERP_NUM, 2 or 3). pred_lines (List): Detected lines of a sample, each line has shape (INTERP_NUM, 2 or 3).
scores (array): Confidence score of each line, of shape (M, ). scores (array): Confidence score of each line, of shape (M, ).
gt_lines (List): GT lines of a sample, each line has shape (INTERP_NUM, 2 or 3). gt_lines (List): GT lines of a sample, each line has shape (INTERP_NUM, 2 or 3).
thresholds (list of tuple): List of thresholds. thresholds (list of tuple): List of thresholds.
metric (str): Distance function for lines matching. Default: 'chamfer'. metric (str): Distance function for lines matching. Default: 'chamfer'.
Returns: Returns:
list_of_tp_fp (list): tp-fp matching result at all thresholds list_of_tp_fp (list): tp-fp matching result at all thresholds
""" """
if metric == 'chamfer': if metric == 'chamfer':
distance_fn = chamfer_distance distance_fn = chamfer_distance
elif metric == 'frechet': elif metric == 'frechet':
distance_fn = frechet_distance distance_fn = frechet_distance
else: else:
raise ValueError(f'unknown distance function {metric}') raise ValueError(f'unknown distance function {metric}')
num_preds = len(pred_lines) num_preds = len(pred_lines)
num_gts = len(gt_lines) num_gts = len(gt_lines)
# tp and fp # tp and fp
tp_fp_list = [] tp_fp_list = []
tp = np.zeros((num_preds), dtype=np.float32) tp = np.zeros((num_preds), dtype=np.float32)
fp = np.zeros((num_preds), dtype=np.float32) fp = np.zeros((num_preds), dtype=np.float32)
# if there is no gt lines in this sample, then all pred lines are false positives # if there is no gt lines in this sample, then all pred lines are false positives
if num_gts == 0: if num_gts == 0:
fp[...] = 1 fp[...] = 1
for thr in thresholds: for thr in thresholds:
tp_fp_list.append((tp.copy(), fp.copy())) tp_fp_list.append((tp.copy(), fp.copy()))
return tp_fp_list return tp_fp_list
if num_preds == 0: if num_preds == 0:
for thr in thresholds: for thr in thresholds:
tp_fp_list.append((tp.copy(), fp.copy())) tp_fp_list.append((tp.copy(), fp.copy()))
return tp_fp_list return tp_fp_list
# distance matrix: M x N # distance matrix: M x N
matrix = np.zeros((num_preds, num_gts)) matrix = np.zeros((num_preds, num_gts))
for i in range(num_preds): for i in range(num_preds):
for j in range(num_gts): for j in range(num_gts):
matrix[i, j] = distance_fn(pred_lines[i], gt_lines[j]) matrix[i, j] = distance_fn(pred_lines[i], gt_lines[j])
# for each det, the min distance with all gts # for each det, the min distance with all gts
matrix_min = matrix.min(axis=1) matrix_min = matrix.min(axis=1)
# for each det, which gt is the closest to it # for each det, which gt is the closest to it
matrix_argmin = matrix.argmin(axis=1) matrix_argmin = matrix.argmin(axis=1)
# sort all dets in descending order by scores # sort all dets in descending order by scores
sort_inds = np.argsort(-scores) sort_inds = np.argsort(-scores)
# match under different thresholds # match under different thresholds
for thr in thresholds: for thr in thresholds:
tp = np.zeros((num_preds), dtype=np.float32) tp = np.zeros((num_preds), dtype=np.float32)
fp = np.zeros((num_preds), dtype=np.float32) fp = np.zeros((num_preds), dtype=np.float32)
gt_covered = np.zeros(num_gts, dtype=bool) gt_covered = np.zeros(num_gts, dtype=bool)
for i in sort_inds: for i in sort_inds:
if matrix_min[i] <= thr: if matrix_min[i] <= thr:
matched_gt = matrix_argmin[i] matched_gt = matrix_argmin[i]
if not gt_covered[matched_gt]: if not gt_covered[matched_gt]:
gt_covered[matched_gt] = True gt_covered[matched_gt] = True
tp[i] = 1 tp[i] = 1
else: else:
fp[i] = 1 fp[i] = 1
else: else:
fp[i] = 1 fp[i] = 1
tp_fp_list.append((tp, fp)) tp_fp_list.append((tp, fp))
return tp_fp_list return tp_fp_list
\ No newline at end of file
from scipy.spatial import distance from scipy.spatial import distance
from numpy.typing import NDArray from numpy.typing import NDArray
def chamfer_distance(line1: NDArray, line2: NDArray) -> float: def chamfer_distance(line1: NDArray, line2: NDArray) -> float:
''' Calculate chamfer distance between two lines. Make sure the ''' Calculate chamfer distance between two lines. Make sure the
lines are interpolated. lines are interpolated.
Args: Args:
line1 (array): coordinates of line1 line1 (array): coordinates of line1
line2 (array): coordinates of line2 line2 (array): coordinates of line2
Returns: Returns:
distance (float): chamfer distance distance (float): chamfer distance
''' '''
dist_matrix = distance.cdist(line1, line2, 'euclidean') dist_matrix = distance.cdist(line1, line2, 'euclidean')
dist12 = dist_matrix.min(-1).sum() / len(line1) dist12 = dist_matrix.min(-1).sum() / len(line1)
dist21 = dist_matrix.min(-2).sum() / len(line2) dist21 = dist_matrix.min(-2).sum() / len(line2)
return (dist12 + dist21) / 2 return (dist12 + dist21) / 2
def frechet_distance(line1: NDArray, line2: NDArray) -> float: def frechet_distance(line1: NDArray, line2: NDArray) -> float:
''' Calculate frechet distance between two lines. Make sure the ''' Calculate frechet distance between two lines. Make sure the
lines are interpolated. lines are interpolated.
Args: Args:
line1 (array): coordinates of line1 line1 (array): coordinates of line1
line2 (array): coordinates of line2 line2 (array): coordinates of line2
Returns: Returns:
distance (float): frechet distance distance (float): frechet distance
''' '''
raise NotImplementedError raise NotImplementedError
from functools import partial from functools import partial
import numpy as np import numpy as np
from multiprocessing import Pool from multiprocessing import Pool
from mmdet3d.datasets import build_dataset, build_dataloader from mmdet3d.datasets import build_dataset, build_dataloader
import mmcv import mmcv
from .AP import instance_match, average_precision from .AP import instance_match, average_precision
import prettytable import prettytable
from time import time from time import time
from functools import cached_property from functools import cached_property
from shapely.geometry import LineString from shapely.geometry import LineString
from numpy.typing import NDArray from numpy.typing import NDArray
from typing import Dict, List, Optional from typing import Dict, List, Optional
from logging import Logger from logging import Logger
from mmcv import Config from mmcv import Config
from copy import deepcopy from copy import deepcopy
INTERP_NUM = 100 # number of points to interpolate during evaluation INTERP_NUM = 100 # number of points to interpolate during evaluation
SAMPLE_DIST = 0.3 # fixed sample distance SAMPLE_DIST = 0.3 # fixed sample distance
THRESHOLDS = [0.5, 1.0, 1.5] # AP thresholds THRESHOLDS = [0.5, 1.0, 1.5] # AP thresholds
N_WORKERS = 16 # num workers to parallel N_WORKERS = 16 # num workers to parallel
CAT2ID = { CAT2ID = {
'ped_crossing': 0, 'ped_crossing': 0,
'divider': 1, 'divider': 1,
'boundary': 2, 'boundary': 2,
} }
class VectorEvaluate(object): class VectorEvaluate(object):
"""Evaluator for vectorized map. """Evaluator for vectorized map.
Args: Args:
dataset_cfg (Config): dataset cfg for gt dataset_cfg (Config): dataset cfg for gt
n_workers (int): num workers to parallel n_workers (int): num workers to parallel
""" """
def __init__(self, ann_file, n_workers: int=N_WORKERS) -> None: def __init__(self, ann_file, n_workers: int=N_WORKERS) -> None:
ann = mmcv.load(ann_file) ann = mmcv.load(ann_file)
gts = {} gts = {}
for seg_id, seq in ann.items(): for seg_id, seq in ann.items():
for frame in seq: for frame in seq:
ann = {} ann = {}
for cat, vectors in frame['annotation'].items(): for cat, vectors in frame['annotation'].items():
# only evaluate in 2-dimension # only evaluate in 2-dimension
ann[cat] = [np.array(v)[:, :2] for v in vectors] ann[cat] = [np.array(v)[:, :2] for v in vectors]
gts[frame['timestamp']] = ann gts[frame['timestamp']] = ann
self.gts = gts self.gts = gts
self.n_workers = n_workers self.n_workers = n_workers
self.cat2id = CAT2ID self.cat2id = CAT2ID
self.id2cat = {v: k for k, v in self.cat2id.items()} self.id2cat = {v: k for k, v in self.cat2id.items()}
def interp_fixed_num(self, def interp_fixed_num(self,
vector: NDArray, vector: NDArray,
num_pts: int) -> NDArray: num_pts: int) -> NDArray:
''' Interpolate a polyline. ''' Interpolate a polyline.
Args: Args:
vector (array): line coordinates, shape (M, 2) vector (array): line coordinates, shape (M, 2)
num_pts (int): num_pts (int):
Returns: Returns:
sampled_points (array): interpolated coordinates sampled_points (array): interpolated coordinates
''' '''
line = LineString(vector) line = LineString(vector)
distances = np.linspace(0, line.length, num_pts) distances = np.linspace(0, line.length, num_pts)
sampled_points = np.array([list(line.interpolate(distance).coords) sampled_points = np.array([list(line.interpolate(distance).coords)
for distance in distances]).squeeze() for distance in distances]).squeeze()
return sampled_points return sampled_points
def interp_fixed_dist(self, def interp_fixed_dist(self,
vector: NDArray, vector: NDArray,
sample_dist: float) -> NDArray: sample_dist: float) -> NDArray:
''' Interpolate a line at fixed interval. ''' Interpolate a line at fixed interval.
Args: Args:
vector (LineString): vector vector (LineString): vector
sample_dist (float): sample interval sample_dist (float): sample interval
Returns: Returns:
points (array): interpolated points, shape (N, 2) points (array): interpolated points, shape (N, 2)
''' '''
line = LineString(vector) line = LineString(vector)
distances = list(np.arange(sample_dist, line.length, sample_dist)) distances = list(np.arange(sample_dist, line.length, sample_dist))
# make sure to sample at least two points when sample_dist > line.length # make sure to sample at least two points when sample_dist > line.length
distances = [0,] + distances + [line.length,] distances = [0,] + distances + [line.length,]
sampled_points = np.array([list(line.interpolate(distance).coords) sampled_points = np.array([list(line.interpolate(distance).coords)
for distance in distances]).squeeze() for distance in distances]).squeeze()
return sampled_points return sampled_points
def _evaluate_single(self, def _evaluate_single(self,
pred_vectors: List, pred_vectors: List,
scores: List, scores: List,
groundtruth: List, groundtruth: List,
thresholds: List, thresholds: List,
metric: str='metric') -> Dict[int, NDArray]: metric: str='metric') -> Dict[int, NDArray]:
''' Do single-frame matching for one class. ''' Do single-frame matching for one class.
Args: Args:
pred_vectors (List): List[vector(ndarray) (different length)], pred_vectors (List): List[vector(ndarray) (different length)],
scores (List): List[score(float)] scores (List): List[score(float)]
groundtruth (List): List of vectors groundtruth (List): List of vectors
thresholds (List): List of thresholds thresholds (List): List of thresholds
Returns: Returns:
tp_fp_score_by_thr (Dict): matching results at different thresholds tp_fp_score_by_thr (Dict): matching results at different thresholds
e.g. {0.5: (M, 2), 1.0: (M, 2), 1.5: (M, 2)} e.g. {0.5: (M, 2), 1.0: (M, 2), 1.5: (M, 2)}
''' '''
pred_lines = [] pred_lines = []
# interpolate predictions # interpolate predictions
for vector in pred_vectors: for vector in pred_vectors:
vector = np.array(vector) vector = np.array(vector)
# vector_interp = self.interp_fixed_num(vector, INTERP_NUM) # vector_interp = self.interp_fixed_num(vector, INTERP_NUM)
vector_interp = self.interp_fixed_dist(vector, SAMPLE_DIST) vector_interp = self.interp_fixed_dist(vector, SAMPLE_DIST)
pred_lines.append(vector_interp) pred_lines.append(vector_interp)
# interpolate groundtruth # interpolate groundtruth
gt_lines = [] gt_lines = []
for vector in groundtruth: for vector in groundtruth:
# vector_interp = self.interp_fixed_num(vector, INTERP_NUM) # vector_interp = self.interp_fixed_num(vector, INTERP_NUM)
vector_interp = self.interp_fixed_dist(vector, SAMPLE_DIST) vector_interp = self.interp_fixed_dist(vector, SAMPLE_DIST)
gt_lines.append(vector_interp) gt_lines.append(vector_interp)
scores = np.array(scores) scores = np.array(scores)
tp_fp_list = instance_match(pred_lines, scores, gt_lines, thresholds, metric) # (M, 2) tp_fp_list = instance_match(pred_lines, scores, gt_lines, thresholds, metric) # (M, 2)
tp_fp_score_by_thr = {} tp_fp_score_by_thr = {}
for i, thr in enumerate(thresholds): for i, thr in enumerate(thresholds):
tp, fp = tp_fp_list[i] tp, fp = tp_fp_list[i]
tp_fp_score = np.hstack([tp[:, None], fp[:, None], scores[:, None]]) tp_fp_score = np.hstack([tp[:, None], fp[:, None], scores[:, None]])
tp_fp_score_by_thr[thr] = tp_fp_score tp_fp_score_by_thr[thr] = tp_fp_score
return tp_fp_score_by_thr # {0.5: (M, 2), 1.0: (M, 2), 1.5: (M, 2)} return tp_fp_score_by_thr # {0.5: (M, 2), 1.0: (M, 2), 1.5: (M, 2)}
def evaluate(self, def evaluate(self,
result_path: str, result_path: str,
metric: str='chamfer', metric: str='chamfer',
logger: Optional[Logger]=None) -> Dict[str, float]: logger: Optional[Logger]=None) -> Dict[str, float]:
''' Do evaluation for a submission file and print evalution results to `logger` if specified. ''' Do evaluation for a submission file and print evalution results to `logger` if specified.
The submission will be aligned by tokens before evaluation. We use multi-worker to speed up. The submission will be aligned by tokens before evaluation. We use multi-worker to speed up.
Args: Args:
result_path (str): path to submission file result_path (str): path to submission file
metric (str): distance metric. Default: 'chamfer' metric (str): distance metric. Default: 'chamfer'
logger (Logger): logger to print evaluation result, Default: None logger (Logger): logger to print evaluation result, Default: None
Returns: Returns:
new_result_dict (Dict): evaluation results. AP by categories. new_result_dict (Dict): evaluation results. AP by categories.
''' '''
results = mmcv.load(result_path) results = mmcv.load(result_path)
results = results['results'] results = results['results']
# re-group samples and gt by label # re-group samples and gt by label
samples_by_cls = {label: [] for label in self.id2cat.keys()} samples_by_cls = {label: [] for label in self.id2cat.keys()}
num_gts = {label: 0 for label in self.id2cat.keys()} num_gts = {label: 0 for label in self.id2cat.keys()}
num_preds = {label: 0 for label in self.id2cat.keys()} num_preds = {label: 0 for label in self.id2cat.keys()}
# align by token # align by token
for token, gt in self.gts.items(): for token, gt in self.gts.items():
if token in results.keys(): if token in results.keys():
pred = results[token] pred = results[token]
else: else:
pred = {'vectors': [], 'scores': [], 'labels': []} pred = {'vectors': [], 'scores': [], 'labels': []}
# for every sample # for every sample
vectors_by_cls = {label: [] for label in self.id2cat.keys()} vectors_by_cls = {label: [] for label in self.id2cat.keys()}
scores_by_cls = {label: [] for label in self.id2cat.keys()} scores_by_cls = {label: [] for label in self.id2cat.keys()}
for i in range(len(pred['labels'])): for i in range(len(pred['labels'])):
# i-th pred line in sample # i-th pred line in sample
label = pred['labels'][i] label = pred['labels'][i]
vector = pred['vectors'][i] vector = pred['vectors'][i]
score = pred['scores'][i] score = pred['scores'][i]
vectors_by_cls[label].append(vector) vectors_by_cls[label].append(vector)
scores_by_cls[label].append(score) scores_by_cls[label].append(score)
for label, cat in self.id2cat.items(): for label, cat in self.id2cat.items():
new_sample = (vectors_by_cls[label], scores_by_cls[label], gt[cat]) new_sample = (vectors_by_cls[label], scores_by_cls[label], gt[cat])
num_gts[label] += len(gt[cat]) num_gts[label] += len(gt[cat])
num_preds[label] += len(scores_by_cls[label]) num_preds[label] += len(scores_by_cls[label])
samples_by_cls[label].append(new_sample) samples_by_cls[label].append(new_sample)
result_dict = {} result_dict = {}
print(f'\nevaluating {len(self.id2cat)} categories...') print(f'\nevaluating {len(self.id2cat)} categories...')
start = time() start = time()
if self.n_workers > 0: if self.n_workers > 0:
pool = Pool(self.n_workers) pool = Pool(self.n_workers)
sum_mAP = 0 sum_mAP = 0
pbar = mmcv.ProgressBar(len(self.id2cat)) pbar = mmcv.ProgressBar(len(self.id2cat))
for label in self.id2cat.keys(): for label in self.id2cat.keys():
samples = samples_by_cls[label] # List[(pred_lines, scores, gts)] samples = samples_by_cls[label] # List[(pred_lines, scores, gts)]
result_dict[self.id2cat[label]] = { result_dict[self.id2cat[label]] = {
'num_gts': num_gts[label], 'num_gts': num_gts[label],
'num_preds': num_preds[label] 'num_preds': num_preds[label]
} }
sum_AP = 0 sum_AP = 0
fn = partial(self._evaluate_single, thresholds=THRESHOLDS, metric=metric) fn = partial(self._evaluate_single, thresholds=THRESHOLDS, metric=metric)
if self.n_workers > 0: if self.n_workers > 0:
tpfp_score_list = pool.starmap(fn, samples) tpfp_score_list = pool.starmap(fn, samples)
else: else:
tpfp_score_list = [] tpfp_score_list = []
for sample in samples: for sample in samples:
tpfp_score_list.append(fn(*sample)) tpfp_score_list.append(fn(*sample))
for thr in THRESHOLDS: for thr in THRESHOLDS:
tp_fp_score = [i[thr] for i in tpfp_score_list] tp_fp_score = [i[thr] for i in tpfp_score_list]
tp_fp_score = np.vstack(tp_fp_score) # (num_dets, 3) tp_fp_score = np.vstack(tp_fp_score) # (num_dets, 3)
sort_inds = np.argsort(-tp_fp_score[:, -1]) sort_inds = np.argsort(-tp_fp_score[:, -1])
tp = tp_fp_score[sort_inds, 0] # (num_dets,) tp = tp_fp_score[sort_inds, 0] # (num_dets,)
fp = tp_fp_score[sort_inds, 1] # (num_dets,) fp = tp_fp_score[sort_inds, 1] # (num_dets,)
tp = np.cumsum(tp, axis=0) tp = np.cumsum(tp, axis=0)
fp = np.cumsum(fp, axis=0) fp = np.cumsum(fp, axis=0)
eps = np.finfo(np.float32).eps eps = np.finfo(np.float32).eps
recalls = tp / np.maximum(num_gts[label], eps) recalls = tp / np.maximum(num_gts[label], eps)
precisions = tp / np.maximum((tp + fp), eps) precisions = tp / np.maximum((tp + fp), eps)
AP = average_precision(recalls, precisions, 'area') AP = average_precision(recalls, precisions, 'area')
sum_AP += AP sum_AP += AP
result_dict[self.id2cat[label]].update({f'AP@{thr}': AP}) result_dict[self.id2cat[label]].update({f'AP@{thr}': AP})
pbar.update() pbar.update()
AP = sum_AP / len(THRESHOLDS) AP = sum_AP / len(THRESHOLDS)
sum_mAP += AP sum_mAP += AP
result_dict[self.id2cat[label]].update({f'AP': AP}) result_dict[self.id2cat[label]].update({f'AP': AP})
if self.n_workers > 0: if self.n_workers > 0:
pool.close() pool.close()
mAP = sum_mAP / len(self.id2cat.keys()) mAP = sum_mAP / len(self.id2cat.keys())
result_dict.update({'mAP': mAP}) result_dict.update({'mAP': mAP})
print(f"finished in {time() - start:.2f}s") print(f"finished in {time() - start:.2f}s")
# print results # print results
table = prettytable.PrettyTable(['category', 'num_preds', 'num_gts'] + table = prettytable.PrettyTable(['category', 'num_preds', 'num_gts'] +
[f'AP@{thr}' for thr in THRESHOLDS] + ['AP']) [f'AP@{thr}' for thr in THRESHOLDS] + ['AP'])
for label in self.id2cat.keys(): for label in self.id2cat.keys():
table.add_row([ table.add_row([
self.id2cat[label], self.id2cat[label],
result_dict[self.id2cat[label]]['num_preds'], result_dict[self.id2cat[label]]['num_preds'],
result_dict[self.id2cat[label]]['num_gts'], result_dict[self.id2cat[label]]['num_gts'],
*[round(result_dict[self.id2cat[label]][f'AP@{thr}'], 4) for thr in THRESHOLDS], *[round(result_dict[self.id2cat[label]][f'AP@{thr}'], 4) for thr in THRESHOLDS],
round(result_dict[self.id2cat[label]]['AP'], 4), round(result_dict[self.id2cat[label]]['AP'], 4),
]) ])
from mmcv.utils import print_log from mmcv.utils import print_log
print_log('\n'+str(table), logger=logger) print_log('\n'+str(table), logger=logger)
print_log(f'mAP = {mAP:.4f}\n', logger=logger) print_log(f'mAP = {mAP:.4f}\n', logger=logger)
new_result_dict = {} new_result_dict = {}
for name in self.cat2id: for name in self.cat2id:
new_result_dict[name] = result_dict[name]['AP'] new_result_dict[name] = result_dict[name]['AP']
return new_result_dict return new_result_dict
\ No newline at end of file
from .loading import LoadMultiViewImagesFromFiles from .loading import LoadMultiViewImagesFromFiles
from .formating import FormatBundleMap from .formating import FormatBundleMap
from .transform import ResizeMultiViewImages, PadMultiViewImages, Normalize3D from .transform import ResizeMultiViewImages, PadMultiViewImages, Normalize3D
from .vectorize import VectorizeMap from .vectorize import VectorizeMap
from .poly_bbox import PolygonizeLocalMapBbox from .poly_bbox import PolygonizeLocalMapBbox
# for argoverse # for argoverse
__all__ = [ __all__ = [
'LoadMultiViewImagesFromFiles', 'LoadMultiViewImagesFromFiles',
'FormatBundleMap', 'Normalize3D', 'ResizeMultiViewImages', 'PadMultiViewImages', 'FormatBundleMap', 'Normalize3D', 'ResizeMultiViewImages', 'PadMultiViewImages',
'VectorizeMap', 'PolygonizeLocalMapBbox' 'VectorizeMap', 'PolygonizeLocalMapBbox'
] ]
\ No newline at end of file
import numpy as np import numpy as np
from mmcv.parallel import DataContainer as DC from mmcv.parallel import DataContainer as DC
from mmdet3d.core.points import BasePoints from mmdet3d.core.points import BasePoints
from mmdet.datasets.builder import PIPELINES from mmdet.datasets.builder import PIPELINES
from mmdet.datasets.pipelines import to_tensor from mmdet.datasets.pipelines import to_tensor
@PIPELINES.register_module() @PIPELINES.register_module()
class FormatBundleMap(object): class FormatBundleMap(object):
"""Format data for map tasks and then collect data for model input. """Format data for map tasks and then collect data for model input.
These fields are formatted as follows. These fields are formatted as follows.
- img: (1) transpose, (2) to tensor, (3) to DataContainer (stack=True) - img: (1) transpose, (2) to tensor, (3) to DataContainer (stack=True)
- semantic_mask (if exists): (1) to tensor, (2) to DataContainer (stack=True) - semantic_mask (if exists): (1) to tensor, (2) to DataContainer (stack=True)
- vectors (if exists): (1) to DataContainer (cpu_only=True) - vectors (if exists): (1) to DataContainer (cpu_only=True)
- img_metas: (1) to DataContainer (cpu_only=True) - img_metas: (1) to DataContainer (cpu_only=True)
""" """
def __init__(self, process_img=True, def __init__(self, process_img=True,
keys=['img', 'semantic_mask', 'vectors'], keys=['img', 'semantic_mask', 'vectors'],
meta_keys=['intrinsics', 'extrinsics']): meta_keys=['intrinsics', 'extrinsics']):
self.process_img = process_img self.process_img = process_img
self.keys = keys self.keys = keys
self.meta_keys = meta_keys self.meta_keys = meta_keys
def __call__(self, results): def __call__(self, results):
"""Call function to transform and format common fields in results. """Call function to transform and format common fields in results.
Args: Args:
results (dict): Result dict contains the data to convert. results (dict): Result dict contains the data to convert.
Returns: Returns:
dict: The result dict contains the data that is formatted with dict: The result dict contains the data that is formatted with
default bundle. default bundle.
""" """
# Format 3D data # Format 3D data
if 'points' in results: if 'points' in results:
assert isinstance(results['points'], BasePoints) assert isinstance(results['points'], BasePoints)
results['points'] = DC(results['points'].tensor) results['points'] = DC(results['points'].tensor)
for key in ['voxels', 'coors', 'voxel_centers', 'num_points']: for key in ['voxels', 'coors', 'voxel_centers', 'num_points']:
if key not in results: if key not in results:
continue continue
results[key] = DC(to_tensor(results[key]), stack=False) results[key] = DC(to_tensor(results[key]), stack=False)
if 'img' in results and self.process_img: if 'img' in results and self.process_img:
if isinstance(results['img'], list): if isinstance(results['img'], list):
# process multiple imgs in single frame # process multiple imgs in single frame
imgs = [img.transpose(2, 0, 1) for img in results['img']] imgs = [img.transpose(2, 0, 1) for img in results['img']]
imgs = np.ascontiguousarray(np.stack(imgs, axis=0)) imgs = np.ascontiguousarray(np.stack(imgs, axis=0))
results['img'] = DC(to_tensor(imgs), stack=True) results['img'] = DC(to_tensor(imgs), stack=True)
else: else:
img = np.ascontiguousarray(results['img'].transpose(2, 0, 1)) img = np.ascontiguousarray(results['img'].transpose(2, 0, 1))
results['img'] = DC(to_tensor(img), stack=True) results['img'] = DC(to_tensor(img), stack=True)
if 'semantic_mask' in results: if 'semantic_mask' in results:
results['semantic_mask'] = DC(to_tensor(results['semantic_mask']), stack=True) results['semantic_mask'] = DC(to_tensor(results['semantic_mask']), stack=True)
if 'vectors' in results: if 'vectors' in results:
# vectors may have different sizes # vectors may have different sizes
vectors = results['vectors'] vectors = results['vectors']
results['vectors'] = DC(vectors, stack=False, cpu_only=True) results['vectors'] = DC(vectors, stack=False, cpu_only=True)
if 'polys' in results: if 'polys' in results:
results['polys'] = DC(results['polys'], stack=False, cpu_only=True) results['polys'] = DC(results['polys'], stack=False, cpu_only=True)
return results return results
def __repr__(self): def __repr__(self):
"""str: Return a string that describes the module.""" """str: Return a string that describes the module."""
repr_str = self.__class__.__name__ repr_str = self.__class__.__name__
repr_str += f'(process_img={self.process_img}, ' repr_str += f'(process_img={self.process_img}, '
return repr_str return repr_str
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment