"vscode:/vscode.git/clone" did not exist on "20a45b60a458e1684671c7d836d8f75854d62727"
Commit f3b13cad authored by yeshenglong1's avatar yeshenglong1
Browse files

UpDate README.md

parent 0797920d
# This schedule is mainly used by models with dynamic voxelization
# optimizer
lr = 0.003 # max learning rate
optimizer = dict(
type='AdamW',
lr=lr,
betas=(0.95, 0.99), # the momentum is change during training
weight_decay=0.001)
optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
lr_config = dict(
policy='CosineAnnealing',
warmup='linear',
warmup_iters=1000,
warmup_ratio=1.0 / 10,
min_lr_ratio=1e-5)
momentum_config = None
runner = dict(type='EpochBasedRunner', max_epochs=40)
# This schedule is mainly used by models with dynamic voxelization
# optimizer
lr = 0.003 # max learning rate
optimizer = dict(
type='AdamW',
lr=lr,
betas=(0.95, 0.99), # the momentum is change during training
weight_decay=0.001)
optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
lr_config = dict(
policy='CosineAnnealing',
warmup='linear',
warmup_iters=1000,
warmup_ratio=1.0 / 10,
min_lr_ratio=1e-5)
momentum_config = None
runner = dict(type='EpochBasedRunner', max_epochs=40)
# For nuScenes dataset, we usually evaluate the model at the end of training.
# Since the models are trained by 24 epochs by default, we set evaluation
# interval to be 20. Please change the interval accordingly if you do not
# use a default schedule.
# optimizer
# This schedule is mainly used by models on nuScenes dataset
optimizer = dict(type='AdamW', lr=1e-4, weight_decay=0.01)
# max_norm=10 is better for SECOND
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
lr_config = dict(
policy='cyclic',
target_ratio=(10, 1e-4),
cyclic_times=1,
step_ratio_up=0.4,
)
momentum_config = dict(
policy='cyclic',
target_ratio=(0.85 / 0.95, 1),
cyclic_times=1,
step_ratio_up=0.4,
)
# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=20)
# For nuScenes dataset, we usually evaluate the model at the end of training.
# Since the models are trained by 24 epochs by default, we set evaluation
# interval to be 20. Please change the interval accordingly if you do not
# use a default schedule.
# optimizer
# This schedule is mainly used by models on nuScenes dataset
optimizer = dict(type='AdamW', lr=1e-4, weight_decay=0.01)
# max_norm=10 is better for SECOND
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
lr_config = dict(
policy='cyclic',
target_ratio=(10, 1e-4),
cyclic_times=1,
step_ratio_up=0.4,
)
momentum_config = dict(
policy='cyclic',
target_ratio=(0.85 / 0.95, 1),
cyclic_times=1,
step_ratio_up=0.4,
)
# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=20)
# The schedule is usually used by models trained on KITTI dataset
# The learning rate set in the cyclic schedule is the initial learning rate
# rather than the max learning rate. Since the target_ratio is (10, 1e-4),
# the learning rate will change from 0.0018 to 0.018, than go to 0.0018*1e-4
lr = 0.0018
# The optimizer follows the setting in SECOND.Pytorch, but here we use
# the offcial AdamW optimizer implemented by PyTorch.
optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)
optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
# We use cyclic learning rate and momentum schedule following SECOND.Pytorch
# https://github.com/traveller59/second.pytorch/blob/3aba19c9688274f75ebb5e576f65cfe54773c021/torchplus/train/learning_schedules_fastai.py#L69 # noqa
# We implement them in mmcv, for more details, please refer to
# https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/lr_updater.py#L327 # noqa
# https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/momentum_updater.py#L130 # noqa
lr_config = dict(
policy='cyclic',
target_ratio=(10, 1e-4),
cyclic_times=1,
step_ratio_up=0.4,
)
momentum_config = dict(
policy='cyclic',
target_ratio=(0.85 / 0.95, 1),
cyclic_times=1,
step_ratio_up=0.4,
)
# Although the max_epochs is 40, this schedule is usually used we
# RepeatDataset with repeat ratio N, thus the actual max epoch
# number could be Nx40
runner = dict(type='EpochBasedRunner', max_epochs=40)
# The schedule is usually used by models trained on KITTI dataset
# The learning rate set in the cyclic schedule is the initial learning rate
# rather than the max learning rate. Since the target_ratio is (10, 1e-4),
# the learning rate will change from 0.0018 to 0.018, than go to 0.0018*1e-4
lr = 0.0018
# The optimizer follows the setting in SECOND.Pytorch, but here we use
# the offcial AdamW optimizer implemented by PyTorch.
optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)
optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
# We use cyclic learning rate and momentum schedule following SECOND.Pytorch
# https://github.com/traveller59/second.pytorch/blob/3aba19c9688274f75ebb5e576f65cfe54773c021/torchplus/train/learning_schedules_fastai.py#L69 # noqa
# We implement them in mmcv, for more details, please refer to
# https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/lr_updater.py#L327 # noqa
# https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/momentum_updater.py#L130 # noqa
lr_config = dict(
policy='cyclic',
target_ratio=(10, 1e-4),
cyclic_times=1,
step_ratio_up=0.4,
)
momentum_config = dict(
policy='cyclic',
target_ratio=(0.85 / 0.95, 1),
cyclic_times=1,
step_ratio_up=0.4,
)
# Although the max_epochs is 40, this schedule is usually used we
# RepeatDataset with repeat ratio N, thus the actual max epoch
# number could be Nx40
runner = dict(type='EpochBasedRunner', max_epochs=40)
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[8, 11])
runner = dict(type='EpochBasedRunner', max_epochs=12)
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[8, 11])
runner = dict(type='EpochBasedRunner', max_epochs=12)
# optimizer
# This schedule is mainly used by models on nuScenes dataset
optimizer = dict(type='AdamW', lr=0.001, weight_decay=0.01)
# max_norm=10 is better for SECOND
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=1000,
warmup_ratio=1.0 / 1000,
step=[20, 23])
momentum_config = None
# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=24)
# optimizer
# This schedule is mainly used by models on nuScenes dataset
optimizer = dict(type='AdamW', lr=0.001, weight_decay=0.01)
# max_norm=10 is better for SECOND
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=1000,
warmup_ratio=1.0 / 1000,
step=[20, 23])
momentum_config = None
# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=24)
# optimizer
# This schedule is mainly used by models on indoor dataset,
# e.g., VoteNet on SUNRGBD and ScanNet
lr = 0.008 # max learning rate
optimizer = dict(type='AdamW', lr=lr, weight_decay=0.01)
optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
lr_config = dict(policy='step', warmup=None, step=[24, 32])
# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=36)
# optimizer
# This schedule is mainly used by models on indoor dataset,
# e.g., VoteNet on SUNRGBD and ScanNet
lr = 0.008 # max learning rate
optimizer = dict(type='AdamW', lr=lr, weight_decay=0.01)
optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
lr_config = dict(policy='step', warmup=None, step=[24, 32])
# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=36)
# optimizer
# This schedule is mainly used on S3DIS dataset in segmentation task
optimizer = dict(type='SGD', lr=0.2, weight_decay=0.0001, momentum=0.9)
optimizer_config = dict(grad_clip=None)
lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=0.002)
momentum_config = None
# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=150)
# optimizer
# This schedule is mainly used on S3DIS dataset in segmentation task
optimizer = dict(type='SGD', lr=0.2, weight_decay=0.0001, momentum=0.9)
optimizer_config = dict(grad_clip=None)
lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=0.002)
momentum_config = None
# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=150)
# optimizer
# This schedule is mainly used on ScanNet dataset in segmentation task
optimizer = dict(type='Adam', lr=0.001, weight_decay=0.01)
optimizer_config = dict(grad_clip=None)
lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5)
momentum_config = None
# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=200)
# optimizer
# This schedule is mainly used on ScanNet dataset in segmentation task
optimizer = dict(type='Adam', lr=0.001, weight_decay=0.01)
optimizer_config = dict(grad_clip=None)
lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5)
momentum_config = None
# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=200)
# optimizer
# This schedule is mainly used on S3DIS dataset in segmentation task
optimizer = dict(type='Adam', lr=0.001, weight_decay=0.001)
optimizer_config = dict(grad_clip=None)
lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5)
momentum_config = None
# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=50)
# optimizer
# This schedule is mainly used on S3DIS dataset in segmentation task
optimizer = dict(type='Adam', lr=0.001, weight_decay=0.001)
optimizer_config = dict(grad_clip=None)
lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5)
momentum_config = None
# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=50)
_base_ = [
'./_base_/default_runtime.py'
]
# meta info for submission file
meta = {
'use_lidar': False,
'use_camera': True,
'use_external': False,
'output_format': 'vector',
# NOTE: please modify the information below
'method': 'VectorMapNet', # name of your method
'authors': ['Yicheng Liu', 'Tianyuan Yuan', 'Yue Wang',
'Yilun Wang', 'Hang Zhao'], # author names
'e-mail': 'yuantianyuan01@gmail.com', # your e-mail address
'institution / company': 'MarsLab, Tsinghua University', # your organization
'country / region': 'xxx', # (IMPORTANT) your country/region in iso3166 standard
}
# model type
type = 'Mapper'
plugin = True
# plugin code dir
plugin_dir = 'src/'
# img configs
img_norm_cfg = dict(
mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
img_size = (int(128*2), int((16/9*128)*2))
# category configs
cat2id = {
'ped_crossing': 0,
'divider': 1,
'boundary': 2,
}
num_class = max(list(cat2id.values())) + 1
# bev configs
roi_size = (60, 30) # bev range, 60m in x-axis, 30m in y-axis
canvas_size = (200, 100) # bev feature size
# vectorize params
coords_dim = 2 # polylines coordinates dimension, 2 or 3
sample_dist = -1 # sampling params, vectormapnet uses simplify
sample_num = -1 # sampling params, vectormapnet uses simplify
simplify = True # sampling params, vectormapnet uses simplify
# model configs
head_dim = 256
norm_cfg = dict(type='BN2d')
num_points = 30
model = dict(
type='VectorMapNet',
backbone_cfg=dict(
type='IPMEncoder',
img_backbone=dict(
type='ResNet',
with_cp=False,
pretrained='open-mmlab://detectron2/resnet50_caffe',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=-1,
norm_cfg=norm_cfg,
norm_eval=True,
style='caffe',
dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),
stage_with_dcn=(False, False, True, True)),
img_neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=128,
start_level=0,
add_extra_convs=True,
# extra_convs_on_inputs=False, # use P5
num_outs=4,
norm_cfg=norm_cfg,
relu_before_extra_convs=True),
upsample=dict(
zoom_size=(1, 2, 4, 8),
in_channels=128,
out_channels=128,),
xbound=[-roi_size[0]/2, roi_size[0]/2, roi_size[0]/canvas_size[0]],
ybound=[-roi_size[1]/2, roi_size[1]/2, roi_size[1]/canvas_size[1]],
heights=[-1.1, 0, 0.5, 1.1],
out_channels=128,
pretrained=None,
num_cam=7,
),
head_cfg=dict(
type='DGHead',
augmentation=True,
augmentation_kwargs=dict(
p=0.3,scale=0.01,
bbox_type='xyxy',
),
det_net_cfg=dict(
type='MapElementDetector',
num_query=120,
max_lines=35,
bbox_size=2,
mode='xyxy',
canvas_size=canvas_size,
separate_detect=False,
discrete_output=False,
num_classes=num_class,
in_channels=128,
score_thre=0.1,
num_reg_fcs=2,
num_points=4,
iterative=False,
sync_cls_avg_factor=True,
transformer=dict(
type='DeformableDetrTransformer_',
encoder=dict(
type='PlaceHolderEncoder',
embed_dims=head_dim,
),
decoder=dict(
type='DeformableDetrTransformerDecoder_',
num_layers=6,
return_intermediate=True,
transformerlayers=dict(
type='DetrTransformerDecoderLayer',
attn_cfgs=[
dict(
type='MultiheadAttention',
embed_dims=head_dim,
num_heads=8,
attn_drop=0.1,
proj_drop=0.1,
dropout_layer=dict(type='Dropout', drop_prob=0.1),),
dict(
type='MultiScaleDeformableAttention',
embed_dims=head_dim,
num_heads=8,
num_levels=1,
),
],
ffn_cfgs=dict(
type='FFN',
embed_dims=head_dim,
feedforward_channels=head_dim*2,
num_fcs=2,
ffn_drop=0.1,
act_cfg=dict(type='ReLU', inplace=True),
),
feedforward_channels=head_dim*2,
ffn_dropout=0.1,
operation_order=('norm', 'self_attn', 'norm', 'cross_attn',
'norm', 'ffn',)))
),
positional_encoding=dict(
type='SinePositionalEncoding',
num_feats=head_dim//2,
normalize=True,
offset=-0.5),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=2.0),
loss_reg=dict(
type='LinesLoss',
loss_weight=0.1),
train_cfg=dict(
assigner=dict(
type='HungarianLinesAssigner',
cost=dict(
type='MapQueriesCost',
cls_cost=dict(type='FocalLossCost', weight=2.0),
reg_cost=dict(type='BBoxCostC', weight=0.1), # continues
iou_cost=dict(type='IoUCostC', weight=1,box_format='xyxy'), # continues
),
),
),
),
gen_net_cfg=dict(
type='PolylineGenerator',
in_channels=128,
encoder_config=None,
decoder_config={
'layer_config': {
'd_model': 256,
'nhead': 8,
'dim_feedforward': 512,
'dropout': 0.2,
'norm_first': True,
're_zero': True,
},
'num_layers': 6,
},
class_conditional=True,
num_classes=num_class,
canvas_size=canvas_size, #xy
max_seq_length=500,
decoder_cross_attention=False,
use_discrete_vertex_embeddings=True,
),
max_num_vertices=80,
top_p_gen_model=0.9,
sync_cls_avg_factor=True,
),
with_auxiliary_head=False,
model_name='VectorMapNet'
)
# data processing pipelines
train_pipeline = [
dict(
type='VectorizeMap',
coords_dim=coords_dim,
roi_size=roi_size,
simplify=True,
normalize=True,
),
dict(
type='PolygonizeLocalMapBbox',
canvas_size=canvas_size, # xy
coord_dim=2,
num_class=num_class,
threshold=4/200,
),
dict(type='LoadMultiViewImagesFromFiles'),
dict(type='ResizeMultiViewImages',
size = (int(128*2), int((16/9*128)*2)), # H, W
change_intrinsics=True,
),
dict(type='Normalize3D', **img_norm_cfg),
dict(type='PadMultiViewImages', size_divisor=32, change_intrinsics=True),
dict(type='FormatBundleMap'),
dict(type='Collect3D', keys=['img', 'polys', 'vectors'], meta_keys=(
'token', 'ego2img'))
]
test_pipeline = [
dict(type='LoadMultiViewImagesFromFiles'),
dict(type='ResizeMultiViewImages',
size=img_size, # H, W
change_intrinsics=True,
),
dict(type='Normalize3D', **img_norm_cfg),
dict(type='PadMultiViewImages', size_divisor=32, change_intrinsics=True),
dict(type='FormatBundleMap'),
dict(type='Collect3D', keys=['img'], meta_keys=(
'token', 'ego2img'))
]
# dataset configs
data = dict(
samples_per_gpu=5,
workers_per_gpu=5,
train=dict(
type='AV2Dataset',
ann_file='./data/train_annotations.json',
root_path='./data/argoverse2/',
meta=meta,
roi_size=roi_size,
cat2id=cat2id,
pipeline=train_pipeline,
interval=1,
),
val=dict(
type='AV2Dataset',
ann_file='./data/val_annotations.json',
root_path='./data/argoverse2/',
meta=meta,
roi_size=roi_size,
cat2id=cat2id,
pipeline=test_pipeline,
test_mode=True,
interval=1,
),
test=dict(
type='AV2Dataset',
ann_file='./data/test_annotations.json',
root_path='./data/argoverse2/',
meta=meta,
roi_size=roi_size,
cat2id=cat2id,
pipeline=test_pipeline,
test_mode=True,
interval=1,
),
)
# optimizer
optimizer = dict(
type='AdamW',
lr=1e-3,
paramwise_cfg=dict(
custom_keys={
'backbone': dict(lr_mult=0.1),
}),
weight_decay=0.01)
optimizer_config = dict(grad_clip=dict(max_norm=3.5, norm_type=2))
# learning policy & schedule
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=400,
warmup_ratio=0.1,
step=[100, 120])
checkpoint_config = dict(interval=5)
total_epochs = 130
# kwargs for dataset evaluation
eval_kwargs = dict()
evaluation = dict(
interval=5,
**eval_kwargs)
runner = dict(type='EpochBasedRunner', max_epochs=total_epochs)
find_unused_parameters = True
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
_base_ = [
'./_base_/default_runtime.py'
]
# meta info for submission file
meta = {
'use_lidar': False,
'use_camera': True,
'use_external': False,
'output_format': 'vector',
# NOTE: please modify the information below
'method': 'VectorMapNet', # name of your method
'authors': ['Yicheng Liu', 'Tianyuan Yuan', 'Yue Wang',
'Yilun Wang', 'Hang Zhao'], # author names
'e-mail': 'yuantianyuan01@gmail.com', # your e-mail address
'institution / company': 'MarsLab, Tsinghua University', # your organization
'country / region': 'xxx', # (IMPORTANT) your country/region in iso3166 standard
}
# model type
type = 'Mapper'
plugin = True
# plugin code dir
plugin_dir = 'src/'
# img configs
img_norm_cfg = dict(
mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
img_size = (int(128*2), int((16/9*128)*2))
# category configs
cat2id = {
'ped_crossing': 0,
'divider': 1,
'boundary': 2,
}
num_class = max(list(cat2id.values())) + 1
# bev configs
roi_size = (60, 30) # bev range, 60m in x-axis, 30m in y-axis
canvas_size = (200, 100) # bev feature size
# vectorize params
coords_dim = 2 # polylines coordinates dimension, 2 or 3
sample_dist = -1 # sampling params, vectormapnet uses simplify
sample_num = -1 # sampling params, vectormapnet uses simplify
simplify = True # sampling params, vectormapnet uses simplify
# model configs
head_dim = 256
norm_cfg = dict(type='BN2d')
num_points = 30
model = dict(
type='VectorMapNet',
backbone_cfg=dict(
type='IPMEncoder',
img_backbone=dict(
type='ResNet',
with_cp=False,
pretrained='open-mmlab://detectron2/resnet50_caffe',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=-1,
norm_cfg=norm_cfg,
norm_eval=True,
style='caffe',
dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),
stage_with_dcn=(False, False, True, True)),
img_neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=128,
start_level=0,
add_extra_convs=True,
# extra_convs_on_inputs=False, # use P5
num_outs=4,
norm_cfg=norm_cfg,
relu_before_extra_convs=True),
upsample=dict(
zoom_size=(1, 2, 4, 8),
in_channels=128,
out_channels=128,),
xbound=[-roi_size[0]/2, roi_size[0]/2, roi_size[0]/canvas_size[0]],
ybound=[-roi_size[1]/2, roi_size[1]/2, roi_size[1]/canvas_size[1]],
heights=[-1.1, 0, 0.5, 1.1],
out_channels=128,
pretrained=None,
num_cam=7,
),
head_cfg=dict(
type='DGHead',
augmentation=True,
augmentation_kwargs=dict(
p=0.3,scale=0.01,
bbox_type='xyxy',
),
det_net_cfg=dict(
type='MapElementDetector',
num_query=120,
max_lines=35,
bbox_size=2,
mode='xyxy',
canvas_size=canvas_size,
separate_detect=False,
discrete_output=False,
num_classes=num_class,
in_channels=128,
score_thre=0.1,
num_reg_fcs=2,
num_points=4,
iterative=False,
sync_cls_avg_factor=True,
transformer=dict(
type='DeformableDetrTransformer_',
encoder=dict(
type='PlaceHolderEncoder',
embed_dims=head_dim,
),
decoder=dict(
type='DeformableDetrTransformerDecoder_',
num_layers=6,
return_intermediate=True,
transformerlayers=dict(
type='DetrTransformerDecoderLayer',
attn_cfgs=[
dict(
type='MultiheadAttention',
embed_dims=head_dim,
num_heads=8,
attn_drop=0.1,
proj_drop=0.1,
dropout_layer=dict(type='Dropout', drop_prob=0.1),),
dict(
type='MultiScaleDeformableAttention',
embed_dims=head_dim,
num_heads=8,
num_levels=1,
),
],
ffn_cfgs=dict(
type='FFN',
embed_dims=head_dim,
feedforward_channels=head_dim*2,
num_fcs=2,
ffn_drop=0.1,
act_cfg=dict(type='ReLU', inplace=True),
),
feedforward_channels=head_dim*2,
ffn_dropout=0.1,
operation_order=('norm', 'self_attn', 'norm', 'cross_attn',
'norm', 'ffn',)))
),
positional_encoding=dict(
type='SinePositionalEncoding',
num_feats=head_dim//2,
normalize=True,
offset=-0.5),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=2.0),
loss_reg=dict(
type='LinesLoss',
loss_weight=0.1),
train_cfg=dict(
assigner=dict(
type='HungarianLinesAssigner',
cost=dict(
type='MapQueriesCost',
cls_cost=dict(type='FocalLossCost', weight=2.0),
reg_cost=dict(type='BBoxCostC', weight=0.1), # continues
iou_cost=dict(type='IoUCostC', weight=1,box_format='xyxy'), # continues
),
),
),
),
gen_net_cfg=dict(
type='PolylineGenerator',
in_channels=128,
encoder_config=None,
decoder_config={
'layer_config': {
'd_model': 256,
'nhead': 8,
'dim_feedforward': 512,
'dropout': 0.2,
'norm_first': True,
're_zero': True,
},
'num_layers': 6,
},
class_conditional=True,
num_classes=num_class,
canvas_size=canvas_size, #xy
max_seq_length=500,
decoder_cross_attention=False,
use_discrete_vertex_embeddings=True,
),
max_num_vertices=80,
top_p_gen_model=0.9,
sync_cls_avg_factor=True,
),
with_auxiliary_head=False,
model_name='VectorMapNet'
)
# data processing pipelines
train_pipeline = [
dict(
type='VectorizeMap',
coords_dim=coords_dim,
roi_size=roi_size,
simplify=True,
normalize=True,
),
dict(
type='PolygonizeLocalMapBbox',
canvas_size=canvas_size, # xy
coord_dim=2,
num_class=num_class,
threshold=4/200,
),
dict(type='LoadMultiViewImagesFromFiles'),
dict(type='ResizeMultiViewImages',
size = (int(128*2), int((16/9*128)*2)), # H, W
change_intrinsics=True,
),
dict(type='Normalize3D', **img_norm_cfg),
dict(type='PadMultiViewImages', size_divisor=32, change_intrinsics=True),
dict(type='FormatBundleMap'),
dict(type='Collect3D', keys=['img', 'polys', 'vectors'], meta_keys=(
'token', 'ego2img'))
]
test_pipeline = [
dict(type='LoadMultiViewImagesFromFiles'),
dict(type='ResizeMultiViewImages',
size=img_size, # H, W
change_intrinsics=True,
),
dict(type='Normalize3D', **img_norm_cfg),
dict(type='PadMultiViewImages', size_divisor=32, change_intrinsics=True),
dict(type='FormatBundleMap'),
dict(type='Collect3D', keys=['img'], meta_keys=(
'token', 'ego2img'))
]
# dataset configs
data = dict(
samples_per_gpu=5,
workers_per_gpu=5,
train=dict(
type='AV2Dataset',
ann_file='./data/train_annotations.json',
root_path='./data/argoverse2/',
meta=meta,
roi_size=roi_size,
cat2id=cat2id,
pipeline=train_pipeline,
interval=1,
),
val=dict(
type='AV2Dataset',
ann_file='./data/val_annotations.json',
root_path='./data/argoverse2/',
meta=meta,
roi_size=roi_size,
cat2id=cat2id,
pipeline=test_pipeline,
test_mode=True,
interval=1,
),
test=dict(
type='AV2Dataset',
ann_file='./data/test_annotations.json',
root_path='./data/argoverse2/',
meta=meta,
roi_size=roi_size,
cat2id=cat2id,
pipeline=test_pipeline,
test_mode=True,
interval=1,
),
)
# optimizer
optimizer = dict(
type='AdamW',
lr=1e-3,
paramwise_cfg=dict(
custom_keys={
'backbone': dict(lr_mult=0.1),
}),
weight_decay=0.01)
optimizer_config = dict(grad_clip=dict(max_norm=3.5, norm_type=2))
# learning policy & schedule
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=400,
warmup_ratio=0.1,
step=[100, 120])
checkpoint_config = dict(interval=5)
total_epochs = 130
# kwargs for dataset evaluation
eval_kwargs = dict()
evaluation = dict(
interval=5,
**eval_kwargs)
runner = dict(type='EpochBasedRunner', max_epochs=total_epochs)
find_unused_parameters = True
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
_base_ = [
'./_base_/default_runtime.py'
]
# meta info for submission file
meta = {
'use_lidar': False,
'use_camera': True,
'use_external': False,
'output_format': 'vector',
# NOTE: please modify the information below
'method': 'VectorMapNet', # name of your method
'authors': ['Yicheng Liu', 'Tianyuan Yuan', 'Yue Wang',
'Yilun Wang', 'Hang Zhao'], # author names
'e-mail': 'yuantianyuan01@gmail.com', # your e-mail address
'institution / company': 'MarsLab, Tsinghua University', # your organization
'country / region': 'xxx', # (IMPORTANT) your country/region in iso3166 standard
}
# model type
type = 'Mapper'
plugin = True
# plugin code dir
plugin_dir = 'src/'
# img configs
# img_norm_cfg = dict(
# mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
img_size = (int(128*2), int((16/9*128)*2))
# category configs
cat2id = {
'ped_crossing': 0,
'divider': 1,
'boundary': 2,
}
num_class = max(list(cat2id.values())) + 1
# bev configs
roi_size = (60, 30) # bev range, 60m in x-axis, 30m in y-axis
canvas_size = (200, 100) # bev feature size
# vectorize params
coords_dim = 2 # polylines coordinates dimension, 2 or 3
sample_dist = -1 # sampling params, vectormapnet uses simplify
sample_num = -1 # sampling params, vectormapnet uses simplify
simplify = True # sampling params, vectormapnet uses simplify
# model configs
head_dim = 256
norm_cfg = dict(type='BN2d')
num_points = 30
pretrained = 'https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_s_1k_224.pth'
model = dict(
type='VectorMapNet',
backbone_cfg=dict(
type='IPMEncoder',
img_backbone=dict(
_delete_=True,
type='InternImage',
core_op='DCNv3',
channels=80,
depths=[4, 4, 21, 4],
groups=[5, 10, 20, 40],
mlp_ratio=4.,
drop_path_rate=0.3,
norm_layer='LN',
layer_scale=1.0,
offset_scale=1.0,
post_norm=True,
with_cp=False,
init_cfg=dict(type='Pretrained', checkpoint=pretrained)),
img_neck=dict(
type='FPN',
in_channels=[80, 160, 320, 640],
out_channels=128,
start_level=0,
add_extra_convs=True,
# extra_convs_on_inputs=False, # use P5
num_outs=4,
norm_cfg=norm_cfg,
relu_before_extra_convs=True),
upsample=dict(
zoom_size=(1, 2, 4, 8),
in_channels=128,
out_channels=128,),
xbound=[-roi_size[0]/2, roi_size[0]/2, roi_size[0]/canvas_size[0]],
ybound=[-roi_size[1]/2, roi_size[1]/2, roi_size[1]/canvas_size[1]],
heights=[-1.1, 0, 0.5, 1.1],
out_channels=128,
pretrained=None,
num_cam=7,
),
head_cfg=dict(
type='DGHead',
augmentation=True,
augmentation_kwargs=dict(
p=0.3,scale=0.01,
bbox_type='xyxy',
),
det_net_cfg=dict(
type='MapElementDetector',
num_query=120,
max_lines=35,
bbox_size=2,
mode='xyxy',
canvas_size=canvas_size,
separate_detect=False,
discrete_output=False,
num_classes=num_class,
in_channels=128,
score_thre=0.1,
num_reg_fcs=2,
num_points=4,
iterative=False,
sync_cls_avg_factor=True,
transformer=dict(
type='DeformableDetrTransformer_',
encoder=dict(
type='PlaceHolderEncoder',
embed_dims=head_dim,
),
decoder=dict(
type='DeformableDetrTransformerDecoder_',
num_layers=6,
return_intermediate=True,
transformerlayers=dict(
type='DetrTransformerDecoderLayer',
attn_cfgs=[
dict(
type='MultiheadAttention',
embed_dims=head_dim,
num_heads=8,
attn_drop=0.1,
proj_drop=0.1,
dropout_layer=dict(type='Dropout', drop_prob=0.1),),
dict(
type='MultiScaleDeformableAttention',
embed_dims=head_dim,
num_heads=8,
num_levels=1,
),
],
ffn_cfgs=dict(
type='FFN',
embed_dims=head_dim,
feedforward_channels=head_dim*2,
num_fcs=2,
ffn_drop=0.1,
act_cfg=dict(type='ReLU', inplace=True),
),
feedforward_channels=head_dim*2,
ffn_dropout=0.1,
operation_order=('norm', 'self_attn', 'norm', 'cross_attn',
'norm', 'ffn',)))
),
positional_encoding=dict(
type='SinePositionalEncoding',
num_feats=head_dim//2,
normalize=True,
offset=-0.5),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=2.0),
loss_reg=dict(
type='LinesLoss',
loss_weight=0.1),
train_cfg=dict(
assigner=dict(
type='HungarianLinesAssigner',
cost=dict(
type='MapQueriesCost',
cls_cost=dict(type='FocalLossCost', weight=2.0),
reg_cost=dict(type='BBoxCostC', weight=0.1), # continues
iou_cost=dict(type='IoUCostC', weight=1,box_format='xyxy'), # continues
),
),
),
),
gen_net_cfg=dict(
type='PolylineGenerator',
in_channels=128,
encoder_config=None,
decoder_config={
'layer_config': {
'd_model': 256,
'nhead': 8,
'dim_feedforward': 512,
'dropout': 0.2,
'norm_first': True,
're_zero': True,
},
'num_layers': 6,
},
class_conditional=True,
num_classes=num_class,
canvas_size=canvas_size, #xy
max_seq_length=500,
decoder_cross_attention=False,
use_discrete_vertex_embeddings=True,
),
max_num_vertices=80,
top_p_gen_model=0.9,
sync_cls_avg_factor=True,
),
with_auxiliary_head=False,
model_name='VectorMapNet'
)
# data processing pipelines
train_pipeline = [
dict(
type='VectorizeMap',
coords_dim=coords_dim,
roi_size=roi_size,
simplify=True,
normalize=True,
),
dict(
type='PolygonizeLocalMapBbox',
canvas_size=canvas_size, # xy
coord_dim=2,
num_class=num_class,
threshold=4/200,
),
dict(type='LoadMultiViewImagesFromFiles'),
dict(type='ResizeMultiViewImages',
size = (int(128*2), int((16/9*128)*2)), # H, W
change_intrinsics=True,
),
dict(type='Normalize3D', **img_norm_cfg),
dict(type='PadMultiViewImages', size_divisor=32, change_intrinsics=True),
dict(type='FormatBundleMap'),
dict(type='Collect3D', keys=['img', 'polys', 'vectors'], meta_keys=(
'token', 'ego2img'))
]
test_pipeline = [
dict(type='LoadMultiViewImagesFromFiles'),
dict(type='ResizeMultiViewImages',
size=img_size, # H, W
change_intrinsics=True,
),
dict(type='Normalize3D', **img_norm_cfg),
dict(type='PadMultiViewImages', size_divisor=32, change_intrinsics=True),
dict(type='FormatBundleMap'),
dict(type='Collect3D', keys=['img'], meta_keys=(
'token', 'ego2img'))
]
# dataset configs
data = dict(
samples_per_gpu=5,
workers_per_gpu=5,
train=dict(
type='AV2Dataset',
ann_file='/mnt/petrelfs/yeshenglong/Dataset/track_data/train_annotations.json',
root_path='/mnt/petrelfs/yeshenglong/Dataset/track_data/train',
meta=meta,
roi_size=roi_size,
cat2id=cat2id,
pipeline=train_pipeline,
interval=1,
),
val=dict(
type='AV2Dataset',
ann_file='/mnt/petrelfs/yeshenglong/Dataset/track_data/val_annotations.json',
root_path='/mnt/petrelfs/yeshenglong/Dataset/track_data/val',
meta=meta,
roi_size=roi_size,
cat2id=cat2id,
pipeline=test_pipeline,
test_mode=True,
interval=1,
),
test=dict(
type='AV2Dataset',
ann_file='/mnt/petrelfs/yeshenglong/Dataset/track_data/test_annotations.json',
root_path='/mnt/petrelfs/yeshenglong/Dataset/track_data/test',
meta=meta,
roi_size=roi_size,
cat2id=cat2id,
pipeline=test_pipeline,
test_mode=True,
interval=1,
),
)
# optimizer
optimizer = dict(
type='AdamW',
lr=1e-3,
paramwise_cfg=dict(
custom_keys={
'backbone': dict(lr_mult=0.1),
}),
weight_decay=0.01)
optimizer_config = dict(grad_clip=dict(max_norm=3.5, norm_type=2))
# learning policy & schedule
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=400,
warmup_ratio=0.1,
step=[100, 120])
checkpoint_config = dict(interval=5)
total_epochs = 130
# kwargs for dataset evaluation
eval_kwargs = dict()
evaluation = dict(
interval=5,
**eval_kwargs)
runner = dict(type='EpochBasedRunner', max_epochs=total_epochs)
find_unused_parameters = True
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
_base_ = [
'./_base_/default_runtime.py'
]
# meta info for submission file
meta = {
'use_lidar': False,
'use_camera': True,
'use_external': False,
'output_format': 'vector',
# NOTE: please modify the information below
'method': 'VectorMapNet', # name of your method
'authors': ['Yicheng Liu', 'Tianyuan Yuan', 'Yue Wang',
'Yilun Wang', 'Hang Zhao'], # author names
'e-mail': 'yuantianyuan01@gmail.com', # your e-mail address
'institution / company': 'MarsLab, Tsinghua University', # your organization
'country / region': 'xxx', # (IMPORTANT) your country/region in iso3166 standard
}
# model type
type = 'Mapper'
plugin = True
# plugin code dir
plugin_dir = 'src/'
# img configs
# img_norm_cfg = dict(
# mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
img_size = (int(128*2), int((16/9*128)*2))
# category configs
cat2id = {
'ped_crossing': 0,
'divider': 1,
'boundary': 2,
}
num_class = max(list(cat2id.values())) + 1
# bev configs
roi_size = (60, 30) # bev range, 60m in x-axis, 30m in y-axis
canvas_size = (200, 100) # bev feature size
# vectorize params
coords_dim = 2 # polylines coordinates dimension, 2 or 3
sample_dist = -1 # sampling params, vectormapnet uses simplify
sample_num = -1 # sampling params, vectormapnet uses simplify
simplify = True # sampling params, vectormapnet uses simplify
# model configs
head_dim = 256
norm_cfg = dict(type='BN2d')
num_points = 30
pretrained = 'https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_s_1k_224.pth'
model = dict(
type='VectorMapNet',
backbone_cfg=dict(
type='IPMEncoder',
img_backbone=dict(
_delete_=True,
type='InternImage',
core_op='DCNv3',
channels=80,
depths=[4, 4, 21, 4],
groups=[5, 10, 20, 40],
mlp_ratio=4.,
drop_path_rate=0.3,
norm_layer='LN',
layer_scale=1.0,
offset_scale=1.0,
post_norm=True,
with_cp=False,
init_cfg=dict(type='Pretrained', checkpoint=pretrained)),
img_neck=dict(
type='FPN',
in_channels=[80, 160, 320, 640],
out_channels=128,
start_level=0,
add_extra_convs=True,
# extra_convs_on_inputs=False, # use P5
num_outs=4,
norm_cfg=norm_cfg,
relu_before_extra_convs=True),
upsample=dict(
zoom_size=(1, 2, 4, 8),
in_channels=128,
out_channels=128,),
xbound=[-roi_size[0]/2, roi_size[0]/2, roi_size[0]/canvas_size[0]],
ybound=[-roi_size[1]/2, roi_size[1]/2, roi_size[1]/canvas_size[1]],
heights=[-1.1, 0, 0.5, 1.1],
out_channels=128,
pretrained=None,
num_cam=7,
),
head_cfg=dict(
type='DGHead',
augmentation=True,
augmentation_kwargs=dict(
p=0.3,scale=0.01,
bbox_type='xyxy',
),
det_net_cfg=dict(
type='MapElementDetector',
num_query=120,
max_lines=35,
bbox_size=2,
mode='xyxy',
canvas_size=canvas_size,
separate_detect=False,
discrete_output=False,
num_classes=num_class,
in_channels=128,
score_thre=0.1,
num_reg_fcs=2,
num_points=4,
iterative=False,
sync_cls_avg_factor=True,
transformer=dict(
type='DeformableDetrTransformer_',
encoder=dict(
type='PlaceHolderEncoder',
embed_dims=head_dim,
),
decoder=dict(
type='DeformableDetrTransformerDecoder_',
num_layers=6,
return_intermediate=True,
transformerlayers=dict(
type='DetrTransformerDecoderLayer',
attn_cfgs=[
dict(
type='MultiheadAttention',
embed_dims=head_dim,
num_heads=8,
attn_drop=0.1,
proj_drop=0.1,
dropout_layer=dict(type='Dropout', drop_prob=0.1),),
dict(
type='MultiScaleDeformableAttention',
embed_dims=head_dim,
num_heads=8,
num_levels=1,
),
],
ffn_cfgs=dict(
type='FFN',
embed_dims=head_dim,
feedforward_channels=head_dim*2,
num_fcs=2,
ffn_drop=0.1,
act_cfg=dict(type='ReLU', inplace=True),
),
feedforward_channels=head_dim*2,
ffn_dropout=0.1,
operation_order=('norm', 'self_attn', 'norm', 'cross_attn',
'norm', 'ffn',)))
),
positional_encoding=dict(
type='SinePositionalEncoding',
num_feats=head_dim//2,
normalize=True,
offset=-0.5),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=2.0),
loss_reg=dict(
type='LinesLoss',
loss_weight=0.1),
train_cfg=dict(
assigner=dict(
type='HungarianLinesAssigner',
cost=dict(
type='MapQueriesCost',
cls_cost=dict(type='FocalLossCost', weight=2.0),
reg_cost=dict(type='BBoxCostC', weight=0.1), # continues
iou_cost=dict(type='IoUCostC', weight=1,box_format='xyxy'), # continues
),
),
),
),
gen_net_cfg=dict(
type='PolylineGenerator',
in_channels=128,
encoder_config=None,
decoder_config={
'layer_config': {
'd_model': 256,
'nhead': 8,
'dim_feedforward': 512,
'dropout': 0.2,
'norm_first': True,
're_zero': True,
},
'num_layers': 6,
},
class_conditional=True,
num_classes=num_class,
canvas_size=canvas_size, #xy
max_seq_length=500,
decoder_cross_attention=False,
use_discrete_vertex_embeddings=True,
),
max_num_vertices=80,
top_p_gen_model=0.9,
sync_cls_avg_factor=True,
),
with_auxiliary_head=False,
model_name='VectorMapNet'
)
# data processing pipelines
train_pipeline = [
dict(
type='VectorizeMap',
coords_dim=coords_dim,
roi_size=roi_size,
simplify=True,
normalize=True,
),
dict(
type='PolygonizeLocalMapBbox',
canvas_size=canvas_size, # xy
coord_dim=2,
num_class=num_class,
threshold=4/200,
),
dict(type='LoadMultiViewImagesFromFiles'),
dict(type='ResizeMultiViewImages',
size = (int(128*2), int((16/9*128)*2)), # H, W
change_intrinsics=True,
),
dict(type='Normalize3D', **img_norm_cfg),
dict(type='PadMultiViewImages', size_divisor=32, change_intrinsics=True),
dict(type='FormatBundleMap'),
dict(type='Collect3D', keys=['img', 'polys', 'vectors'], meta_keys=(
'token', 'ego2img'))
]
test_pipeline = [
dict(type='LoadMultiViewImagesFromFiles'),
dict(type='ResizeMultiViewImages',
size=img_size, # H, W
change_intrinsics=True,
),
dict(type='Normalize3D', **img_norm_cfg),
dict(type='PadMultiViewImages', size_divisor=32, change_intrinsics=True),
dict(type='FormatBundleMap'),
dict(type='Collect3D', keys=['img'], meta_keys=(
'token', 'ego2img'))
]
# dataset configs
data = dict(
samples_per_gpu=5,
workers_per_gpu=5,
train=dict(
type='AV2Dataset',
ann_file='/mnt/petrelfs/yeshenglong/Dataset/track_data/train_annotations.json',
root_path='/mnt/petrelfs/yeshenglong/Dataset/track_data/train',
meta=meta,
roi_size=roi_size,
cat2id=cat2id,
pipeline=train_pipeline,
interval=1,
),
val=dict(
type='AV2Dataset',
ann_file='/mnt/petrelfs/yeshenglong/Dataset/track_data/val_annotations.json',
root_path='/mnt/petrelfs/yeshenglong/Dataset/track_data/val',
meta=meta,
roi_size=roi_size,
cat2id=cat2id,
pipeline=test_pipeline,
test_mode=True,
interval=1,
),
test=dict(
type='AV2Dataset',
ann_file='/mnt/petrelfs/yeshenglong/Dataset/track_data/test_annotations.json',
root_path='/mnt/petrelfs/yeshenglong/Dataset/track_data/test',
meta=meta,
roi_size=roi_size,
cat2id=cat2id,
pipeline=test_pipeline,
test_mode=True,
interval=1,
),
)
# optimizer
optimizer = dict(
type='AdamW',
lr=1e-3,
paramwise_cfg=dict(
custom_keys={
'backbone': dict(lr_mult=0.1),
}),
weight_decay=0.01)
optimizer_config = dict(grad_clip=dict(max_norm=3.5, norm_type=2))
# learning policy & schedule
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=400,
warmup_ratio=0.1,
step=[100, 120])
checkpoint_config = dict(interval=5)
total_epochs = 130
# kwargs for dataset evaluation
eval_kwargs = dict()
evaluation = dict(
interval=5,
**eval_kwargs)
runner = dict(type='EpochBasedRunner', max_epochs=total_epochs)
find_unused_parameters = True
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
from .base_dataset import BaseMapDataset
from mmdet.datasets import DATASETS
import numpy as np
from time import time
import mmcv
import os
from shapely.geometry import LineString
@DATASETS.register_module()
class AV2Dataset(BaseMapDataset):
"""Argoverse2 map dataset class.
Args:
ann_file (str): annotation file path
cat2id (dict): category to class id
roi_size (tuple): bev range
eval_config (Config): evaluation config
meta (dict): meta information
pipeline (Config): data processing pipeline config,
interval (int): annotation load interval
work_dir (str): path to work dir
test_mode (bool): whether in test mode
"""
def __init__(self, **kwargs,):
super().__init__(**kwargs)
def load_annotations(self, ann_file):
"""Load annotations from ann_file.
Args:
ann_file (str): Path of the annotation file.
Returns:
list[dict]: List of annotations.
"""
start_time = time()
ann = mmcv.load(ann_file)
samples = []
for seg_id, sequence in ann.items():
samples.extend(sequence)
samples = samples[::self.interval]
print(f'collected {len(samples)} samples in {(time() - start_time):.2f}s')
self.samples = samples
def get_sample(self, idx):
"""Get data sample. For each sample, map extractor will be applied to extract
map elements.
Args:
idx (int): data index
Returns:
result (dict): dict of input
"""
sample = self.samples[idx]
if not self.test_mode:
ann = sample['annotation']
# collected required keys
map_label2geom = {}
for k, v in ann.items():
if k in self.cat2id.keys():
map_label2geom[self.cat2id[k]] = [LineString(np.array(l)[:, :3]) for l in v]
ego2img_rts = []
cams = sample['sensor']
for c in cams.values():
extrinsic, intrinsic = np.array(
c['extrinsic']), np.array(c['intrinsic'])
ego2cam_rt = extrinsic
viewpad = np.eye(4)
viewpad[:intrinsic.shape[0], :intrinsic.shape[1]] = intrinsic
ego2cam_rt = (viewpad @ ego2cam_rt)
ego2img_rts.append(ego2cam_rt)
pose = sample['pose']
input_dict = {
'token': sample['timestamp'],
'img_filenames': [os.path.join(self.root_path, c['image_path']) for c in cams.values()],
# intrinsics are 3x3 Ks
'cam_intrinsics': [c['intrinsic'] for c in cams.values()],
# extrinsics are 4x4 tranform matrix, NOTE: **ego2cam**
'cam_extrinsics': [c['extrinsic'] for c in cams.values()],
'ego2img': ego2img_rts,
'ego2global_translation': pose['ego2global_translation'],
'ego2global_rotation': pose['ego2global_rotation'],
}
if not self.test_mode:
input_dict.update({'map_geoms': map_label2geom}) # {0: List[ped_crossing(LineString)], 1: ...}})
from .base_dataset import BaseMapDataset
from mmdet.datasets import DATASETS
import numpy as np
from time import time
import mmcv
import os
from shapely.geometry import LineString
@DATASETS.register_module()
class AV2Dataset(BaseMapDataset):
"""Argoverse2 map dataset class.
Args:
ann_file (str): annotation file path
cat2id (dict): category to class id
roi_size (tuple): bev range
eval_config (Config): evaluation config
meta (dict): meta information
pipeline (Config): data processing pipeline config,
interval (int): annotation load interval
work_dir (str): path to work dir
test_mode (bool): whether in test mode
"""
def __init__(self, **kwargs,):
super().__init__(**kwargs)
def load_annotations(self, ann_file):
"""Load annotations from ann_file.
Args:
ann_file (str): Path of the annotation file.
Returns:
list[dict]: List of annotations.
"""
start_time = time()
ann = mmcv.load(ann_file)
samples = []
for seg_id, sequence in ann.items():
samples.extend(sequence)
samples = samples[::self.interval]
print(f'collected {len(samples)} samples in {(time() - start_time):.2f}s')
self.samples = samples
def get_sample(self, idx):
"""Get data sample. For each sample, map extractor will be applied to extract
map elements.
Args:
idx (int): data index
Returns:
result (dict): dict of input
"""
sample = self.samples[idx]
if not self.test_mode:
ann = sample['annotation']
# collected required keys
map_label2geom = {}
for k, v in ann.items():
if k in self.cat2id.keys():
map_label2geom[self.cat2id[k]] = [LineString(np.array(l)[:, :3]) for l in v]
ego2img_rts = []
cams = sample['sensor']
for c in cams.values():
extrinsic, intrinsic = np.array(
c['extrinsic']), np.array(c['intrinsic'])
ego2cam_rt = extrinsic
viewpad = np.eye(4)
viewpad[:intrinsic.shape[0], :intrinsic.shape[1]] = intrinsic
ego2cam_rt = (viewpad @ ego2cam_rt)
ego2img_rts.append(ego2cam_rt)
pose = sample['pose']
input_dict = {
'token': sample['timestamp'],
'img_filenames': [os.path.join(self.root_path, c['image_path']) for c in cams.values()],
# intrinsics are 3x3 Ks
'cam_intrinsics': [c['intrinsic'] for c in cams.values()],
# extrinsics are 4x4 tranform matrix, NOTE: **ego2cam**
'cam_extrinsics': [c['extrinsic'] for c in cams.values()],
'ego2img': ego2img_rts,
'ego2global_translation': pose['ego2global_translation'],
'ego2global_rotation': pose['ego2global_rotation'],
}
if not self.test_mode:
input_dict.update({'map_geoms': map_label2geom}) # {0: List[ped_crossing(LineString)], 1: ...}})
return input_dict
\ No newline at end of file
import numpy as np
import os
import os.path as osp
import mmcv
from .evaluation.vector_eval import VectorEvaluate
from mmdet3d.datasets.pipelines import Compose
from mmdet.datasets import DATASETS
from torch.utils.data import Dataset
import warnings
warnings.filterwarnings("ignore")
@DATASETS.register_module()
class BaseMapDataset(Dataset):
"""Map dataset base class.
Args:
ann_file (str): annotation file path
cat2id (dict): category to class id
roi_size (tuple): bev range
eval_config (Config): evaluation config
meta (dict): meta information
pipeline (Config): data processing pipeline config,
interval (int): annotation load interval
work_dir (str): path to work dir
test_mode (bool): whether in test mode
"""
def __init__(self,
ann_file,
root_path,
cat2id,
roi_size,
meta,
pipeline,
interval=1,
work_dir=None,
test_mode=False,
):
super().__init__()
self.ann_file = ann_file
self.meta = meta
self.root_path = root_path
self.classes = list(cat2id.keys())
self.num_classes = len(self.classes)
self.cat2id = cat2id
self.interval = interval
self.load_annotations(self.ann_file)
self.idx2token = {}
for i, s in enumerate(self.samples):
if 'timestamp' in s:
self.idx2token[i] = s['timestamp']
else:
self.idx2token[i] = s['token']
self.token2idx = {v: k for k, v in self.idx2token.items()}
if pipeline is not None:
self.pipeline = Compose(pipeline)
else:
self.pipeline = None
# dummy flags to fit with mmdet dataset
self.flag = np.zeros(len(self), dtype=np.uint8)
self.roi_size = roi_size
self.work_dir = work_dir
self.test_mode = test_mode
def load_annotations(self, ann_file):
raise NotImplementedError
def get_sample(self, idx):
raise NotImplementedError
def format_results(self, results, denormalize=True, prefix=None):
'''Format prediction result to submission format.
Args:
results (list[Tensor]): List of prediction results.
denormalize (bool): whether to denormalize prediction from (0, 1) \
to bev range. Default: True
prefix (str): work dir prefix to save submission file.
Returns:
dict: Evaluation results
'''
meta = self.meta
submissions = {
'meta': meta,
'results': {},
}
for pred in results:
'''
For each case, the result should be formatted as Dict{'vectors': [], 'scores': [], 'labels': []}
'vectors': List of vector, each vector is a array([[x1, y1], [x2, y2] ...]),
contain all vectors predicted in this sample.
'scores: List of score(float),
contain scores of all instances in this sample.
'labels': List of label(int),
contain labels of all instances in this sample.
'''
if pred is None: # empty prediction
continue
single_case = {'vectors': [], 'scores': [], 'labels': []}
token = pred['token']
roi_size = np.array(self.roi_size)
origin = -np.array([self.roi_size[0]/2, self.roi_size[1]/2])
for i in range(len(pred['scores'])):
score = pred['scores'][i]
label = pred['labels'][i]
vector = pred['vectors'][i]
# A line should have >=2 points
if len(vector) < 2:
continue
if denormalize:
eps = 2
vector = vector * (roi_size + eps) + origin
single_case['vectors'].append(vector)
single_case['scores'].append(score)
single_case['labels'].append(label)
submissions['results'][token] = single_case
out_path = osp.join(prefix, 'submission_vector.json')
print(f'\nsaving submissions results to {out_path}')
os.makedirs(os.path.dirname(out_path), exist_ok=True)
mmcv.dump(submissions, out_path)
return out_path
def evaluate(self, results, logger=None, **kwargs):
'''Evaluate prediction result based on `output_format` specified by dataset.
Args:
results (list[Tensor]): List of prediction results.
logger (logger): logger to print evaluation results.
Returns:
dict: Evaluation results.
'''
output_format = self.meta['output_format']
self.evaluator = VectorEvaluate(self.ann_file)
print('len of the results', len(results))
result_path = self.format_results(results, denormalize=True, prefix=self.work_dir)
result_dict = self.evaluator.evaluate(result_path, logger=logger)
return result_dict
def __len__(self):
"""Return the length of data infos.
Returns:
int: Length of data infos.
"""
return len(self.samples)
def _rand_another(self, idx):
"""Randomly get another item.
Returns:
int: Another index of item.
"""
return np.random.choice(self.__len__)
def __getitem__(self, idx):
"""Get item from infos according to the given index.
Returns:
dict: Data dictionary of the corresponding index.
"""
input_dict = self.get_sample(idx)
data = self.pipeline(input_dict)
return data
import numpy as np
import os
import os.path as osp
import mmcv
from .evaluation.vector_eval import VectorEvaluate
from mmdet3d.datasets.pipelines import Compose
from mmdet.datasets import DATASETS
from torch.utils.data import Dataset
import warnings
warnings.filterwarnings("ignore")
@DATASETS.register_module()
class BaseMapDataset(Dataset):
"""Map dataset base class.
Args:
ann_file (str): annotation file path
cat2id (dict): category to class id
roi_size (tuple): bev range
eval_config (Config): evaluation config
meta (dict): meta information
pipeline (Config): data processing pipeline config,
interval (int): annotation load interval
work_dir (str): path to work dir
test_mode (bool): whether in test mode
"""
def __init__(self,
ann_file,
root_path,
cat2id,
roi_size,
meta,
pipeline,
interval=1,
work_dir=None,
test_mode=False,
):
super().__init__()
self.ann_file = ann_file
self.meta = meta
self.root_path = root_path
self.classes = list(cat2id.keys())
self.num_classes = len(self.classes)
self.cat2id = cat2id
self.interval = interval
self.load_annotations(self.ann_file)
self.idx2token = {}
for i, s in enumerate(self.samples):
if 'timestamp' in s:
self.idx2token[i] = s['timestamp']
else:
self.idx2token[i] = s['token']
self.token2idx = {v: k for k, v in self.idx2token.items()}
if pipeline is not None:
self.pipeline = Compose(pipeline)
else:
self.pipeline = None
# dummy flags to fit with mmdet dataset
self.flag = np.zeros(len(self), dtype=np.uint8)
self.roi_size = roi_size
self.work_dir = work_dir
self.test_mode = test_mode
def load_annotations(self, ann_file):
raise NotImplementedError
def get_sample(self, idx):
raise NotImplementedError
def format_results(self, results, denormalize=True, prefix=None):
'''Format prediction result to submission format.
Args:
results (list[Tensor]): List of prediction results.
denormalize (bool): whether to denormalize prediction from (0, 1) \
to bev range. Default: True
prefix (str): work dir prefix to save submission file.
Returns:
dict: Evaluation results
'''
meta = self.meta
submissions = {
'meta': meta,
'results': {},
}
for pred in results:
'''
For each case, the result should be formatted as Dict{'vectors': [], 'scores': [], 'labels': []}
'vectors': List of vector, each vector is a array([[x1, y1], [x2, y2] ...]),
contain all vectors predicted in this sample.
'scores: List of score(float),
contain scores of all instances in this sample.
'labels': List of label(int),
contain labels of all instances in this sample.
'''
if pred is None: # empty prediction
continue
single_case = {'vectors': [], 'scores': [], 'labels': []}
token = pred['token']
roi_size = np.array(self.roi_size)
origin = -np.array([self.roi_size[0]/2, self.roi_size[1]/2])
for i in range(len(pred['scores'])):
score = pred['scores'][i]
label = pred['labels'][i]
vector = pred['vectors'][i]
# A line should have >=2 points
if len(vector) < 2:
continue
if denormalize:
eps = 2
vector = vector * (roi_size + eps) + origin
single_case['vectors'].append(vector)
single_case['scores'].append(score)
single_case['labels'].append(label)
submissions['results'][token] = single_case
out_path = osp.join(prefix, 'submission_vector.json')
print(f'\nsaving submissions results to {out_path}')
os.makedirs(os.path.dirname(out_path), exist_ok=True)
mmcv.dump(submissions, out_path)
return out_path
def evaluate(self, results, logger=None, **kwargs):
'''Evaluate prediction result based on `output_format` specified by dataset.
Args:
results (list[Tensor]): List of prediction results.
logger (logger): logger to print evaluation results.
Returns:
dict: Evaluation results.
'''
output_format = self.meta['output_format']
self.evaluator = VectorEvaluate(self.ann_file)
print('len of the results', len(results))
result_path = self.format_results(results, denormalize=True, prefix=self.work_dir)
result_dict = self.evaluator.evaluate(result_path, logger=logger)
return result_dict
def __len__(self):
"""Return the length of data infos.
Returns:
int: Length of data infos.
"""
return len(self.samples)
def _rand_another(self, idx):
"""Randomly get another item.
Returns:
int: Another index of item.
"""
return np.random.choice(self.__len__)
def __getitem__(self, idx):
"""Get item from infos according to the given index.
Returns:
dict: Data dictionary of the corresponding index.
"""
input_dict = self.get_sample(idx)
data = self.pipeline(input_dict)
return data
import numpy as np
from .distance import chamfer_distance, frechet_distance
from typing import List, Tuple, Union
from numpy.typing import NDArray
def average_precision(recalls, precisions, mode='area'):
"""Calculate average precision.
Args:
recalls (ndarray): shape (num_dets, )
precisions (ndarray): shape (num_dets, )
mode (str): 'area' or '11points', 'area' means calculating the area
under precision-recall curve, '11points' means calculating
the average precision of recalls at [0, 0.1, ..., 1]
Returns:
float: calculated average precision
"""
recalls = recalls[np.newaxis, :]
precisions = precisions[np.newaxis, :]
assert recalls.shape == precisions.shape and recalls.ndim == 2
num_scales = recalls.shape[0]
ap = 0.
if mode == 'area':
zeros = np.zeros((num_scales, 1), dtype=recalls.dtype)
ones = np.ones((num_scales, 1), dtype=recalls.dtype)
mrec = np.hstack((zeros, recalls, ones))
mpre = np.hstack((zeros, precisions, zeros))
for i in range(mpre.shape[1] - 1, 0, -1):
mpre[:, i - 1] = np.maximum(mpre[:, i - 1], mpre[:, i])
ind = np.where(mrec[0, 1:] != mrec[0, :-1])[0]
ap = np.sum(
(mrec[0, ind + 1] - mrec[0, ind]) * mpre[0, ind + 1])
elif mode == '11points':
for thr in np.arange(0, 1 + 1e-3, 0.1):
precs = precisions[0, recalls[i, :] >= thr]
prec = precs.max() if precs.size > 0 else 0
ap += prec
ap /= 11
else:
raise ValueError(
'Unrecognized mode, only "area" and "11points" are supported')
return ap
def instance_match(pred_lines: List[NDArray],
scores: NDArray,
gt_lines: List[NDArray],
thresholds: Union[Tuple, List],
metric: str='chamfer') -> List:
"""Compute whether detected lines are true positive or false positive.
Args:
pred_lines (List): Detected lines of a sample, each line has shape (INTERP_NUM, 2 or 3).
scores (array): Confidence score of each line, of shape (M, ).
gt_lines (List): GT lines of a sample, each line has shape (INTERP_NUM, 2 or 3).
thresholds (list of tuple): List of thresholds.
metric (str): Distance function for lines matching. Default: 'chamfer'.
Returns:
list_of_tp_fp (list): tp-fp matching result at all thresholds
"""
if metric == 'chamfer':
distance_fn = chamfer_distance
elif metric == 'frechet':
distance_fn = frechet_distance
else:
raise ValueError(f'unknown distance function {metric}')
num_preds = len(pred_lines)
num_gts = len(gt_lines)
# tp and fp
tp_fp_list = []
tp = np.zeros((num_preds), dtype=np.float32)
fp = np.zeros((num_preds), dtype=np.float32)
# if there is no gt lines in this sample, then all pred lines are false positives
if num_gts == 0:
fp[...] = 1
for thr in thresholds:
tp_fp_list.append((tp.copy(), fp.copy()))
return tp_fp_list
if num_preds == 0:
for thr in thresholds:
tp_fp_list.append((tp.copy(), fp.copy()))
return tp_fp_list
# distance matrix: M x N
matrix = np.zeros((num_preds, num_gts))
for i in range(num_preds):
for j in range(num_gts):
matrix[i, j] = distance_fn(pred_lines[i], gt_lines[j])
# for each det, the min distance with all gts
matrix_min = matrix.min(axis=1)
# for each det, which gt is the closest to it
matrix_argmin = matrix.argmin(axis=1)
# sort all dets in descending order by scores
sort_inds = np.argsort(-scores)
# match under different thresholds
for thr in thresholds:
tp = np.zeros((num_preds), dtype=np.float32)
fp = np.zeros((num_preds), dtype=np.float32)
gt_covered = np.zeros(num_gts, dtype=bool)
for i in sort_inds:
if matrix_min[i] <= thr:
matched_gt = matrix_argmin[i]
if not gt_covered[matched_gt]:
gt_covered[matched_gt] = True
tp[i] = 1
else:
fp[i] = 1
else:
fp[i] = 1
tp_fp_list.append((tp, fp))
import numpy as np
from .distance import chamfer_distance, frechet_distance
from typing import List, Tuple, Union
from numpy.typing import NDArray
def average_precision(recalls, precisions, mode='area'):
"""Calculate average precision.
Args:
recalls (ndarray): shape (num_dets, )
precisions (ndarray): shape (num_dets, )
mode (str): 'area' or '11points', 'area' means calculating the area
under precision-recall curve, '11points' means calculating
the average precision of recalls at [0, 0.1, ..., 1]
Returns:
float: calculated average precision
"""
recalls = recalls[np.newaxis, :]
precisions = precisions[np.newaxis, :]
assert recalls.shape == precisions.shape and recalls.ndim == 2
num_scales = recalls.shape[0]
ap = 0.
if mode == 'area':
zeros = np.zeros((num_scales, 1), dtype=recalls.dtype)
ones = np.ones((num_scales, 1), dtype=recalls.dtype)
mrec = np.hstack((zeros, recalls, ones))
mpre = np.hstack((zeros, precisions, zeros))
for i in range(mpre.shape[1] - 1, 0, -1):
mpre[:, i - 1] = np.maximum(mpre[:, i - 1], mpre[:, i])
ind = np.where(mrec[0, 1:] != mrec[0, :-1])[0]
ap = np.sum(
(mrec[0, ind + 1] - mrec[0, ind]) * mpre[0, ind + 1])
elif mode == '11points':
for thr in np.arange(0, 1 + 1e-3, 0.1):
precs = precisions[0, recalls[i, :] >= thr]
prec = precs.max() if precs.size > 0 else 0
ap += prec
ap /= 11
else:
raise ValueError(
'Unrecognized mode, only "area" and "11points" are supported')
return ap
def instance_match(pred_lines: List[NDArray],
scores: NDArray,
gt_lines: List[NDArray],
thresholds: Union[Tuple, List],
metric: str='chamfer') -> List:
"""Compute whether detected lines are true positive or false positive.
Args:
pred_lines (List): Detected lines of a sample, each line has shape (INTERP_NUM, 2 or 3).
scores (array): Confidence score of each line, of shape (M, ).
gt_lines (List): GT lines of a sample, each line has shape (INTERP_NUM, 2 or 3).
thresholds (list of tuple): List of thresholds.
metric (str): Distance function for lines matching. Default: 'chamfer'.
Returns:
list_of_tp_fp (list): tp-fp matching result at all thresholds
"""
if metric == 'chamfer':
distance_fn = chamfer_distance
elif metric == 'frechet':
distance_fn = frechet_distance
else:
raise ValueError(f'unknown distance function {metric}')
num_preds = len(pred_lines)
num_gts = len(gt_lines)
# tp and fp
tp_fp_list = []
tp = np.zeros((num_preds), dtype=np.float32)
fp = np.zeros((num_preds), dtype=np.float32)
# if there is no gt lines in this sample, then all pred lines are false positives
if num_gts == 0:
fp[...] = 1
for thr in thresholds:
tp_fp_list.append((tp.copy(), fp.copy()))
return tp_fp_list
if num_preds == 0:
for thr in thresholds:
tp_fp_list.append((tp.copy(), fp.copy()))
return tp_fp_list
# distance matrix: M x N
matrix = np.zeros((num_preds, num_gts))
for i in range(num_preds):
for j in range(num_gts):
matrix[i, j] = distance_fn(pred_lines[i], gt_lines[j])
# for each det, the min distance with all gts
matrix_min = matrix.min(axis=1)
# for each det, which gt is the closest to it
matrix_argmin = matrix.argmin(axis=1)
# sort all dets in descending order by scores
sort_inds = np.argsort(-scores)
# match under different thresholds
for thr in thresholds:
tp = np.zeros((num_preds), dtype=np.float32)
fp = np.zeros((num_preds), dtype=np.float32)
gt_covered = np.zeros(num_gts, dtype=bool)
for i in sort_inds:
if matrix_min[i] <= thr:
matched_gt = matrix_argmin[i]
if not gt_covered[matched_gt]:
gt_covered[matched_gt] = True
tp[i] = 1
else:
fp[i] = 1
else:
fp[i] = 1
tp_fp_list.append((tp, fp))
return tp_fp_list
\ No newline at end of file
from scipy.spatial import distance
from numpy.typing import NDArray
def chamfer_distance(line1: NDArray, line2: NDArray) -> float:
''' Calculate chamfer distance between two lines. Make sure the
lines are interpolated.
Args:
line1 (array): coordinates of line1
line2 (array): coordinates of line2
Returns:
distance (float): chamfer distance
'''
dist_matrix = distance.cdist(line1, line2, 'euclidean')
dist12 = dist_matrix.min(-1).sum() / len(line1)
dist21 = dist_matrix.min(-2).sum() / len(line2)
return (dist12 + dist21) / 2
def frechet_distance(line1: NDArray, line2: NDArray) -> float:
''' Calculate frechet distance between two lines. Make sure the
lines are interpolated.
Args:
line1 (array): coordinates of line1
line2 (array): coordinates of line2
Returns:
distance (float): frechet distance
'''
raise NotImplementedError
from scipy.spatial import distance
from numpy.typing import NDArray
def chamfer_distance(line1: NDArray, line2: NDArray) -> float:
''' Calculate chamfer distance between two lines. Make sure the
lines are interpolated.
Args:
line1 (array): coordinates of line1
line2 (array): coordinates of line2
Returns:
distance (float): chamfer distance
'''
dist_matrix = distance.cdist(line1, line2, 'euclidean')
dist12 = dist_matrix.min(-1).sum() / len(line1)
dist21 = dist_matrix.min(-2).sum() / len(line2)
return (dist12 + dist21) / 2
def frechet_distance(line1: NDArray, line2: NDArray) -> float:
''' Calculate frechet distance between two lines. Make sure the
lines are interpolated.
Args:
line1 (array): coordinates of line1
line2 (array): coordinates of line2
Returns:
distance (float): frechet distance
'''
raise NotImplementedError
from functools import partial
import numpy as np
from multiprocessing import Pool
from mmdet3d.datasets import build_dataset, build_dataloader
import mmcv
from .AP import instance_match, average_precision
import prettytable
from time import time
from functools import cached_property
from shapely.geometry import LineString
from numpy.typing import NDArray
from typing import Dict, List, Optional
from logging import Logger
from mmcv import Config
from copy import deepcopy
INTERP_NUM = 100 # number of points to interpolate during evaluation
SAMPLE_DIST = 0.3 # fixed sample distance
THRESHOLDS = [0.5, 1.0, 1.5] # AP thresholds
N_WORKERS = 16 # num workers to parallel
CAT2ID = {
'ped_crossing': 0,
'divider': 1,
'boundary': 2,
}
class VectorEvaluate(object):
"""Evaluator for vectorized map.
Args:
dataset_cfg (Config): dataset cfg for gt
n_workers (int): num workers to parallel
"""
def __init__(self, ann_file, n_workers: int=N_WORKERS) -> None:
ann = mmcv.load(ann_file)
gts = {}
for seg_id, seq in ann.items():
for frame in seq:
ann = {}
for cat, vectors in frame['annotation'].items():
# only evaluate in 2-dimension
ann[cat] = [np.array(v)[:, :2] for v in vectors]
gts[frame['timestamp']] = ann
self.gts = gts
self.n_workers = n_workers
self.cat2id = CAT2ID
self.id2cat = {v: k for k, v in self.cat2id.items()}
def interp_fixed_num(self,
vector: NDArray,
num_pts: int) -> NDArray:
''' Interpolate a polyline.
Args:
vector (array): line coordinates, shape (M, 2)
num_pts (int):
Returns:
sampled_points (array): interpolated coordinates
'''
line = LineString(vector)
distances = np.linspace(0, line.length, num_pts)
sampled_points = np.array([list(line.interpolate(distance).coords)
for distance in distances]).squeeze()
return sampled_points
def interp_fixed_dist(self,
vector: NDArray,
sample_dist: float) -> NDArray:
''' Interpolate a line at fixed interval.
Args:
vector (LineString): vector
sample_dist (float): sample interval
Returns:
points (array): interpolated points, shape (N, 2)
'''
line = LineString(vector)
distances = list(np.arange(sample_dist, line.length, sample_dist))
# make sure to sample at least two points when sample_dist > line.length
distances = [0,] + distances + [line.length,]
sampled_points = np.array([list(line.interpolate(distance).coords)
for distance in distances]).squeeze()
return sampled_points
def _evaluate_single(self,
pred_vectors: List,
scores: List,
groundtruth: List,
thresholds: List,
metric: str='metric') -> Dict[int, NDArray]:
''' Do single-frame matching for one class.
Args:
pred_vectors (List): List[vector(ndarray) (different length)],
scores (List): List[score(float)]
groundtruth (List): List of vectors
thresholds (List): List of thresholds
Returns:
tp_fp_score_by_thr (Dict): matching results at different thresholds
e.g. {0.5: (M, 2), 1.0: (M, 2), 1.5: (M, 2)}
'''
pred_lines = []
# interpolate predictions
for vector in pred_vectors:
vector = np.array(vector)
# vector_interp = self.interp_fixed_num(vector, INTERP_NUM)
vector_interp = self.interp_fixed_dist(vector, SAMPLE_DIST)
pred_lines.append(vector_interp)
# interpolate groundtruth
gt_lines = []
for vector in groundtruth:
# vector_interp = self.interp_fixed_num(vector, INTERP_NUM)
vector_interp = self.interp_fixed_dist(vector, SAMPLE_DIST)
gt_lines.append(vector_interp)
scores = np.array(scores)
tp_fp_list = instance_match(pred_lines, scores, gt_lines, thresholds, metric) # (M, 2)
tp_fp_score_by_thr = {}
for i, thr in enumerate(thresholds):
tp, fp = tp_fp_list[i]
tp_fp_score = np.hstack([tp[:, None], fp[:, None], scores[:, None]])
tp_fp_score_by_thr[thr] = tp_fp_score
return tp_fp_score_by_thr # {0.5: (M, 2), 1.0: (M, 2), 1.5: (M, 2)}
def evaluate(self,
result_path: str,
metric: str='chamfer',
logger: Optional[Logger]=None) -> Dict[str, float]:
''' Do evaluation for a submission file and print evalution results to `logger` if specified.
The submission will be aligned by tokens before evaluation. We use multi-worker to speed up.
Args:
result_path (str): path to submission file
metric (str): distance metric. Default: 'chamfer'
logger (Logger): logger to print evaluation result, Default: None
Returns:
new_result_dict (Dict): evaluation results. AP by categories.
'''
results = mmcv.load(result_path)
results = results['results']
# re-group samples and gt by label
samples_by_cls = {label: [] for label in self.id2cat.keys()}
num_gts = {label: 0 for label in self.id2cat.keys()}
num_preds = {label: 0 for label in self.id2cat.keys()}
# align by token
for token, gt in self.gts.items():
if token in results.keys():
pred = results[token]
else:
pred = {'vectors': [], 'scores': [], 'labels': []}
# for every sample
vectors_by_cls = {label: [] for label in self.id2cat.keys()}
scores_by_cls = {label: [] for label in self.id2cat.keys()}
for i in range(len(pred['labels'])):
# i-th pred line in sample
label = pred['labels'][i]
vector = pred['vectors'][i]
score = pred['scores'][i]
vectors_by_cls[label].append(vector)
scores_by_cls[label].append(score)
for label, cat in self.id2cat.items():
new_sample = (vectors_by_cls[label], scores_by_cls[label], gt[cat])
num_gts[label] += len(gt[cat])
num_preds[label] += len(scores_by_cls[label])
samples_by_cls[label].append(new_sample)
result_dict = {}
print(f'\nevaluating {len(self.id2cat)} categories...')
start = time()
if self.n_workers > 0:
pool = Pool(self.n_workers)
sum_mAP = 0
pbar = mmcv.ProgressBar(len(self.id2cat))
for label in self.id2cat.keys():
samples = samples_by_cls[label] # List[(pred_lines, scores, gts)]
result_dict[self.id2cat[label]] = {
'num_gts': num_gts[label],
'num_preds': num_preds[label]
}
sum_AP = 0
fn = partial(self._evaluate_single, thresholds=THRESHOLDS, metric=metric)
if self.n_workers > 0:
tpfp_score_list = pool.starmap(fn, samples)
else:
tpfp_score_list = []
for sample in samples:
tpfp_score_list.append(fn(*sample))
for thr in THRESHOLDS:
tp_fp_score = [i[thr] for i in tpfp_score_list]
tp_fp_score = np.vstack(tp_fp_score) # (num_dets, 3)
sort_inds = np.argsort(-tp_fp_score[:, -1])
tp = tp_fp_score[sort_inds, 0] # (num_dets,)
fp = tp_fp_score[sort_inds, 1] # (num_dets,)
tp = np.cumsum(tp, axis=0)
fp = np.cumsum(fp, axis=0)
eps = np.finfo(np.float32).eps
recalls = tp / np.maximum(num_gts[label], eps)
precisions = tp / np.maximum((tp + fp), eps)
AP = average_precision(recalls, precisions, 'area')
sum_AP += AP
result_dict[self.id2cat[label]].update({f'AP@{thr}': AP})
pbar.update()
AP = sum_AP / len(THRESHOLDS)
sum_mAP += AP
result_dict[self.id2cat[label]].update({f'AP': AP})
if self.n_workers > 0:
pool.close()
mAP = sum_mAP / len(self.id2cat.keys())
result_dict.update({'mAP': mAP})
print(f"finished in {time() - start:.2f}s")
# print results
table = prettytable.PrettyTable(['category', 'num_preds', 'num_gts'] +
[f'AP@{thr}' for thr in THRESHOLDS] + ['AP'])
for label in self.id2cat.keys():
table.add_row([
self.id2cat[label],
result_dict[self.id2cat[label]]['num_preds'],
result_dict[self.id2cat[label]]['num_gts'],
*[round(result_dict[self.id2cat[label]][f'AP@{thr}'], 4) for thr in THRESHOLDS],
round(result_dict[self.id2cat[label]]['AP'], 4),
])
from mmcv.utils import print_log
print_log('\n'+str(table), logger=logger)
print_log(f'mAP = {mAP:.4f}\n', logger=logger)
new_result_dict = {}
for name in self.cat2id:
new_result_dict[name] = result_dict[name]['AP']
from functools import partial
import numpy as np
from multiprocessing import Pool
from mmdet3d.datasets import build_dataset, build_dataloader
import mmcv
from .AP import instance_match, average_precision
import prettytable
from time import time
from functools import cached_property
from shapely.geometry import LineString
from numpy.typing import NDArray
from typing import Dict, List, Optional
from logging import Logger
from mmcv import Config
from copy import deepcopy
INTERP_NUM = 100 # number of points to interpolate during evaluation
SAMPLE_DIST = 0.3 # fixed sample distance
THRESHOLDS = [0.5, 1.0, 1.5] # AP thresholds
N_WORKERS = 16 # num workers to parallel
CAT2ID = {
'ped_crossing': 0,
'divider': 1,
'boundary': 2,
}
class VectorEvaluate(object):
"""Evaluator for vectorized map.
Args:
dataset_cfg (Config): dataset cfg for gt
n_workers (int): num workers to parallel
"""
def __init__(self, ann_file, n_workers: int=N_WORKERS) -> None:
ann = mmcv.load(ann_file)
gts = {}
for seg_id, seq in ann.items():
for frame in seq:
ann = {}
for cat, vectors in frame['annotation'].items():
# only evaluate in 2-dimension
ann[cat] = [np.array(v)[:, :2] for v in vectors]
gts[frame['timestamp']] = ann
self.gts = gts
self.n_workers = n_workers
self.cat2id = CAT2ID
self.id2cat = {v: k for k, v in self.cat2id.items()}
def interp_fixed_num(self,
vector: NDArray,
num_pts: int) -> NDArray:
''' Interpolate a polyline.
Args:
vector (array): line coordinates, shape (M, 2)
num_pts (int):
Returns:
sampled_points (array): interpolated coordinates
'''
line = LineString(vector)
distances = np.linspace(0, line.length, num_pts)
sampled_points = np.array([list(line.interpolate(distance).coords)
for distance in distances]).squeeze()
return sampled_points
def interp_fixed_dist(self,
vector: NDArray,
sample_dist: float) -> NDArray:
''' Interpolate a line at fixed interval.
Args:
vector (LineString): vector
sample_dist (float): sample interval
Returns:
points (array): interpolated points, shape (N, 2)
'''
line = LineString(vector)
distances = list(np.arange(sample_dist, line.length, sample_dist))
# make sure to sample at least two points when sample_dist > line.length
distances = [0,] + distances + [line.length,]
sampled_points = np.array([list(line.interpolate(distance).coords)
for distance in distances]).squeeze()
return sampled_points
def _evaluate_single(self,
pred_vectors: List,
scores: List,
groundtruth: List,
thresholds: List,
metric: str='metric') -> Dict[int, NDArray]:
''' Do single-frame matching for one class.
Args:
pred_vectors (List): List[vector(ndarray) (different length)],
scores (List): List[score(float)]
groundtruth (List): List of vectors
thresholds (List): List of thresholds
Returns:
tp_fp_score_by_thr (Dict): matching results at different thresholds
e.g. {0.5: (M, 2), 1.0: (M, 2), 1.5: (M, 2)}
'''
pred_lines = []
# interpolate predictions
for vector in pred_vectors:
vector = np.array(vector)
# vector_interp = self.interp_fixed_num(vector, INTERP_NUM)
vector_interp = self.interp_fixed_dist(vector, SAMPLE_DIST)
pred_lines.append(vector_interp)
# interpolate groundtruth
gt_lines = []
for vector in groundtruth:
# vector_interp = self.interp_fixed_num(vector, INTERP_NUM)
vector_interp = self.interp_fixed_dist(vector, SAMPLE_DIST)
gt_lines.append(vector_interp)
scores = np.array(scores)
tp_fp_list = instance_match(pred_lines, scores, gt_lines, thresholds, metric) # (M, 2)
tp_fp_score_by_thr = {}
for i, thr in enumerate(thresholds):
tp, fp = tp_fp_list[i]
tp_fp_score = np.hstack([tp[:, None], fp[:, None], scores[:, None]])
tp_fp_score_by_thr[thr] = tp_fp_score
return tp_fp_score_by_thr # {0.5: (M, 2), 1.0: (M, 2), 1.5: (M, 2)}
def evaluate(self,
result_path: str,
metric: str='chamfer',
logger: Optional[Logger]=None) -> Dict[str, float]:
''' Do evaluation for a submission file and print evalution results to `logger` if specified.
The submission will be aligned by tokens before evaluation. We use multi-worker to speed up.
Args:
result_path (str): path to submission file
metric (str): distance metric. Default: 'chamfer'
logger (Logger): logger to print evaluation result, Default: None
Returns:
new_result_dict (Dict): evaluation results. AP by categories.
'''
results = mmcv.load(result_path)
results = results['results']
# re-group samples and gt by label
samples_by_cls = {label: [] for label in self.id2cat.keys()}
num_gts = {label: 0 for label in self.id2cat.keys()}
num_preds = {label: 0 for label in self.id2cat.keys()}
# align by token
for token, gt in self.gts.items():
if token in results.keys():
pred = results[token]
else:
pred = {'vectors': [], 'scores': [], 'labels': []}
# for every sample
vectors_by_cls = {label: [] for label in self.id2cat.keys()}
scores_by_cls = {label: [] for label in self.id2cat.keys()}
for i in range(len(pred['labels'])):
# i-th pred line in sample
label = pred['labels'][i]
vector = pred['vectors'][i]
score = pred['scores'][i]
vectors_by_cls[label].append(vector)
scores_by_cls[label].append(score)
for label, cat in self.id2cat.items():
new_sample = (vectors_by_cls[label], scores_by_cls[label], gt[cat])
num_gts[label] += len(gt[cat])
num_preds[label] += len(scores_by_cls[label])
samples_by_cls[label].append(new_sample)
result_dict = {}
print(f'\nevaluating {len(self.id2cat)} categories...')
start = time()
if self.n_workers > 0:
pool = Pool(self.n_workers)
sum_mAP = 0
pbar = mmcv.ProgressBar(len(self.id2cat))
for label in self.id2cat.keys():
samples = samples_by_cls[label] # List[(pred_lines, scores, gts)]
result_dict[self.id2cat[label]] = {
'num_gts': num_gts[label],
'num_preds': num_preds[label]
}
sum_AP = 0
fn = partial(self._evaluate_single, thresholds=THRESHOLDS, metric=metric)
if self.n_workers > 0:
tpfp_score_list = pool.starmap(fn, samples)
else:
tpfp_score_list = []
for sample in samples:
tpfp_score_list.append(fn(*sample))
for thr in THRESHOLDS:
tp_fp_score = [i[thr] for i in tpfp_score_list]
tp_fp_score = np.vstack(tp_fp_score) # (num_dets, 3)
sort_inds = np.argsort(-tp_fp_score[:, -1])
tp = tp_fp_score[sort_inds, 0] # (num_dets,)
fp = tp_fp_score[sort_inds, 1] # (num_dets,)
tp = np.cumsum(tp, axis=0)
fp = np.cumsum(fp, axis=0)
eps = np.finfo(np.float32).eps
recalls = tp / np.maximum(num_gts[label], eps)
precisions = tp / np.maximum((tp + fp), eps)
AP = average_precision(recalls, precisions, 'area')
sum_AP += AP
result_dict[self.id2cat[label]].update({f'AP@{thr}': AP})
pbar.update()
AP = sum_AP / len(THRESHOLDS)
sum_mAP += AP
result_dict[self.id2cat[label]].update({f'AP': AP})
if self.n_workers > 0:
pool.close()
mAP = sum_mAP / len(self.id2cat.keys())
result_dict.update({'mAP': mAP})
print(f"finished in {time() - start:.2f}s")
# print results
table = prettytable.PrettyTable(['category', 'num_preds', 'num_gts'] +
[f'AP@{thr}' for thr in THRESHOLDS] + ['AP'])
for label in self.id2cat.keys():
table.add_row([
self.id2cat[label],
result_dict[self.id2cat[label]]['num_preds'],
result_dict[self.id2cat[label]]['num_gts'],
*[round(result_dict[self.id2cat[label]][f'AP@{thr}'], 4) for thr in THRESHOLDS],
round(result_dict[self.id2cat[label]]['AP'], 4),
])
from mmcv.utils import print_log
print_log('\n'+str(table), logger=logger)
print_log(f'mAP = {mAP:.4f}\n', logger=logger)
new_result_dict = {}
for name in self.cat2id:
new_result_dict[name] = result_dict[name]['AP']
return new_result_dict
\ No newline at end of file
from .loading import LoadMultiViewImagesFromFiles
from .formating import FormatBundleMap
from .transform import ResizeMultiViewImages, PadMultiViewImages, Normalize3D
from .vectorize import VectorizeMap
from .poly_bbox import PolygonizeLocalMapBbox
# for argoverse
__all__ = [
'LoadMultiViewImagesFromFiles',
'FormatBundleMap', 'Normalize3D', 'ResizeMultiViewImages', 'PadMultiViewImages',
'VectorizeMap', 'PolygonizeLocalMapBbox'
from .loading import LoadMultiViewImagesFromFiles
from .formating import FormatBundleMap
from .transform import ResizeMultiViewImages, PadMultiViewImages, Normalize3D
from .vectorize import VectorizeMap
from .poly_bbox import PolygonizeLocalMapBbox
# for argoverse
__all__ = [
'LoadMultiViewImagesFromFiles',
'FormatBundleMap', 'Normalize3D', 'ResizeMultiViewImages', 'PadMultiViewImages',
'VectorizeMap', 'PolygonizeLocalMapBbox'
]
\ No newline at end of file
import numpy as np
from mmcv.parallel import DataContainer as DC
from mmdet3d.core.points import BasePoints
from mmdet.datasets.builder import PIPELINES
from mmdet.datasets.pipelines import to_tensor
@PIPELINES.register_module()
class FormatBundleMap(object):
"""Format data for map tasks and then collect data for model input.
These fields are formatted as follows.
- img: (1) transpose, (2) to tensor, (3) to DataContainer (stack=True)
- semantic_mask (if exists): (1) to tensor, (2) to DataContainer (stack=True)
- vectors (if exists): (1) to DataContainer (cpu_only=True)
- img_metas: (1) to DataContainer (cpu_only=True)
"""
def __init__(self, process_img=True,
keys=['img', 'semantic_mask', 'vectors'],
meta_keys=['intrinsics', 'extrinsics']):
self.process_img = process_img
self.keys = keys
self.meta_keys = meta_keys
def __call__(self, results):
"""Call function to transform and format common fields in results.
Args:
results (dict): Result dict contains the data to convert.
Returns:
dict: The result dict contains the data that is formatted with
default bundle.
"""
# Format 3D data
if 'points' in results:
assert isinstance(results['points'], BasePoints)
results['points'] = DC(results['points'].tensor)
for key in ['voxels', 'coors', 'voxel_centers', 'num_points']:
if key not in results:
continue
results[key] = DC(to_tensor(results[key]), stack=False)
if 'img' in results and self.process_img:
if isinstance(results['img'], list):
# process multiple imgs in single frame
imgs = [img.transpose(2, 0, 1) for img in results['img']]
imgs = np.ascontiguousarray(np.stack(imgs, axis=0))
results['img'] = DC(to_tensor(imgs), stack=True)
else:
img = np.ascontiguousarray(results['img'].transpose(2, 0, 1))
results['img'] = DC(to_tensor(img), stack=True)
if 'semantic_mask' in results:
results['semantic_mask'] = DC(to_tensor(results['semantic_mask']), stack=True)
if 'vectors' in results:
# vectors may have different sizes
vectors = results['vectors']
results['vectors'] = DC(vectors, stack=False, cpu_only=True)
if 'polys' in results:
results['polys'] = DC(results['polys'], stack=False, cpu_only=True)
return results
def __repr__(self):
"""str: Return a string that describes the module."""
repr_str = self.__class__.__name__
repr_str += f'(process_img={self.process_img}, '
return repr_str
import numpy as np
from mmcv.parallel import DataContainer as DC
from mmdet3d.core.points import BasePoints
from mmdet.datasets.builder import PIPELINES
from mmdet.datasets.pipelines import to_tensor
@PIPELINES.register_module()
class FormatBundleMap(object):
"""Format data for map tasks and then collect data for model input.
These fields are formatted as follows.
- img: (1) transpose, (2) to tensor, (3) to DataContainer (stack=True)
- semantic_mask (if exists): (1) to tensor, (2) to DataContainer (stack=True)
- vectors (if exists): (1) to DataContainer (cpu_only=True)
- img_metas: (1) to DataContainer (cpu_only=True)
"""
def __init__(self, process_img=True,
keys=['img', 'semantic_mask', 'vectors'],
meta_keys=['intrinsics', 'extrinsics']):
self.process_img = process_img
self.keys = keys
self.meta_keys = meta_keys
def __call__(self, results):
"""Call function to transform and format common fields in results.
Args:
results (dict): Result dict contains the data to convert.
Returns:
dict: The result dict contains the data that is formatted with
default bundle.
"""
# Format 3D data
if 'points' in results:
assert isinstance(results['points'], BasePoints)
results['points'] = DC(results['points'].tensor)
for key in ['voxels', 'coors', 'voxel_centers', 'num_points']:
if key not in results:
continue
results[key] = DC(to_tensor(results[key]), stack=False)
if 'img' in results and self.process_img:
if isinstance(results['img'], list):
# process multiple imgs in single frame
imgs = [img.transpose(2, 0, 1) for img in results['img']]
imgs = np.ascontiguousarray(np.stack(imgs, axis=0))
results['img'] = DC(to_tensor(imgs), stack=True)
else:
img = np.ascontiguousarray(results['img'].transpose(2, 0, 1))
results['img'] = DC(to_tensor(img), stack=True)
if 'semantic_mask' in results:
results['semantic_mask'] = DC(to_tensor(results['semantic_mask']), stack=True)
if 'vectors' in results:
# vectors may have different sizes
vectors = results['vectors']
results['vectors'] = DC(vectors, stack=False, cpu_only=True)
if 'polys' in results:
results['polys'] = DC(results['polys'], stack=False, cpu_only=True)
return results
def __repr__(self):
"""str: Return a string that describes the module."""
repr_str = self.__class__.__name__
repr_str += f'(process_img={self.process_img}, '
return repr_str
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment