Commit 41b18fd8 authored by zhe chen's avatar zhe chen
Browse files

Use pre-commit to reformat code


Use pre-commit to reformat code
parent ff20ea39
......@@ -2,6 +2,7 @@ import mmcv
import numpy as np
from mmdet.datasets.builder import PIPELINES
@PIPELINES.register_module(force=True)
class LoadMultiViewImagesFromFiles(object):
"""Load multi channel images from a list of separate channel files.
......@@ -56,5 +57,5 @@ class LoadMultiViewImagesFromFiles(object):
def __repr__(self):
"""str: Return a string that describes the module."""
return f'{self.__class__.__name__} (to_float32={self.to_float32}, '\
return f'{self.__class__.__name__} (to_float32={self.to_float32}, ' \
f"color_type='{self.color_type}')"
import numpy as np
from mmdet.datasets.builder import PIPELINES
from shapely.geometry import LineString
@PIPELINES.register_module(force=True)
class PolygonizeLocalMapBbox(object):
"""Pre-Processing used by vectormapnet model.
......@@ -18,7 +18,7 @@ class PolygonizeLocalMapBbox(object):
canvas_size=(200, 100),
coord_dim=2,
num_class=3,
threshold=6/200,
threshold=6 / 200,
):
self.canvas_size = np.array(canvas_size)
......@@ -47,7 +47,7 @@ class PolygonizeLocalMapBbox(object):
polyline_weight = np.ones_like(polyline).reshape(-1)
polyline_weight = np.pad(
polyline_weight, ((0, 1),), constant_values=1.)
polyline_weight = polyline_weight/polyline_weight.sum()
polyline_weight = polyline_weight / polyline_weight.sum()
# flatten and quantilized
fpolyline = quantize_verts(
......@@ -98,11 +98,11 @@ class PolygonizeLocalMapBbox(object):
qkp_msks = np.stack(qkp_masks)
# format det
kps = np.stack(kps, axis=0).astype(np.float32)*self.canvas_size
kps = np.stack(kps, axis=0).astype(np.float32) * self.canvas_size
kp_labels = np.array(kp_labels)
# restrict the boundary
kps[..., 0] = np.clip(kps[..., 0], 0.1, self.canvas_size[0]-0.1)
kps[..., 1] = np.clip(kps[..., 1], 0.1, self.canvas_size[1]-0.1)
kps[..., 0] = np.clip(kps[..., 0], 0.1, self.canvas_size[0] - 0.1)
kps[..., 1] = np.clip(kps[..., 1], 0.1, self.canvas_size[1] - 0.1)
# nbox, boxsize(4)*coord_dim(2)
kps = kps.reshape(kps.shape[0], -1)
......@@ -157,10 +157,9 @@ class PolygonizeLocalMapBbox(object):
def evaluate_line(polyline):
edge = np.linalg.norm(polyline[1:] - polyline[:-1], axis=-1)
start_end_weight = edge[(0, -1), ].copy()
start_end_weight = edge[(0, -1),].copy()
mid_weight = (edge[:-1] + edge[1:]) * .5
pts_weight = np.concatenate(
......@@ -172,9 +171,9 @@ def evaluate_line(polyline):
pts_weight /= denominator
# add weights for stop index
pts_weight = np.repeat(pts_weight, 2)/2
pts_weight = np.repeat(pts_weight, 2) / 2
pts_weight = np.pad(pts_weight, ((0, 1)),
constant_values=1/(len(polyline)*2))
constant_values=1 / (len(polyline) * 2))
return pts_weight
......@@ -216,10 +215,10 @@ def get_bbox(polyline, threshold):
polyline = LineString(polyline)
bbox = polyline.bounds
minx, miny, maxx, maxy = bbox
W, H = maxx-minx, maxy-miny
W, H = maxx - minx, maxy - miny
if W < threshold or H < threshold:
remain = max((threshold - min(W, H))/2, eps)
remain = max((threshold - min(W, H)) / 2, eps)
bbox = polyline.buffer(remain).envelope.bounds
minx, miny, maxx, maxy = bbox
......
import numpy as np
import mmcv
import numpy as np
from mmdet.datasets.builder import PIPELINES
......@@ -82,7 +81,8 @@ class PadMultiViewImages(object):
if self.change_intrinsics:
post_intrinsics, post_ego2imgs = [], []
for img, oshape, cam_intrinsic, ego2img in zip(results['img'], \
original_shape, results['cam_intrinsics'], results['ego2img']):
original_shape, results['cam_intrinsics'],
results['ego2img']):
scaleW = img.shape[1] / oshape[1]
scaleH = img.shape[0] / oshape[0]
......@@ -101,7 +101,6 @@ class PadMultiViewImages(object):
'ego2img': post_ego2imgs,
})
results['img_shape'] = [img.shape for img in padded_img]
results['img_fixed_size'] = self.size
results['img_size_divisor'] = self.size_divisor
......@@ -135,11 +134,12 @@ class ResizeMultiViewImages(object):
size (tuple, optional): resize target size, (h, w).
change_intrinsics (bool): whether to update intrinsics.
"""
def __init__(self, size, change_intrinsics=True):
self.size = size
self.change_intrinsics = change_intrinsics
def __call__(self, results:dict):
def __call__(self, results: dict):
new_imgs, post_intrinsics, post_ego2imgs = [], [], []
......
from typing import Dict, List, Tuple, Union
import numpy as np
from mmdet.datasets.builder import PIPELINES
from shapely.geometry import LineString
from numpy.typing import NDArray
from typing import List, Tuple, Union, Dict
from shapely.geometry import LineString
@PIPELINES.register_module(force=True)
class VectorizeMap(object):
......@@ -24,9 +26,9 @@ class VectorizeMap(object):
roi_size: Union[Tuple, List],
normalize: bool,
coords_dim: int,
simplify: bool=False,
sample_num: int=-1,
sample_dist: float=-1,
simplify: bool = False,
sample_num: int = -1,
sample_dist: float = -1,
):
self.coords_dim = coords_dim
self.sample_num = sample_num
......@@ -71,7 +73,7 @@ class VectorizeMap(object):
distances = list(np.arange(self.sample_dist, line.length, self.sample_dist))
# make sure to sample at least two points when sample_dist > line.length
distances = [0,] + distances + [line.length,]
distances = [0, ] + distances + [line.length, ]
sampled_points = np.array([list(line.interpolate(distance).coords)
for distance in distances]).squeeze()
......@@ -125,7 +127,7 @@ class VectorizeMap(object):
normalized (array): normalized points.
'''
origin = -np.array([self.roi_size[0]/2, self.roi_size[1]/2])
origin = -np.array([self.roi_size[0] / 2, self.roi_size[1] / 2])
line[:, :2] = line[:, :2] - origin
......
from .backbones import *
from .heads import *
from .losses import *
from .mapers import *
from .transformer_utils import *
from .assigner import *
from .assigner import HungarianLinesAssigner
from .match_cost import MapQueriesCost, BBoxLogitsCost, DynamicLinesCost, IoUCostC, BBoxCostC, LinesCost, LinesFixNumChamferCost, ClsSigmoidCost
import torch
from mmdet.core.bbox.assigners import AssignResult, BaseAssigner
from mmdet.core.bbox.builder import BBOX_ASSIGNERS
from mmdet.core.bbox.assigners import AssignResult
from mmdet.core.bbox.assigners import BaseAssigner
from mmdet.core.bbox.match_costs import build_match_cost
try:
......@@ -110,7 +108,8 @@ class HungarianLinesAssigner(BaseAssigner):
matched_row_inds, matched_col_inds = linear_sum_assignment(cost)
except:
print('cost max{}, min{}'.format(cost.max(), cost.min()))
import ipdb; ipdb.set_trace()
import ipdb
ipdb.set_trace()
matched_row_inds = torch.from_numpy(matched_row_inds).to(
preds['lines'].device)
matched_col_inds = torch.from_numpy(matched_col_inds).to(
......
import torch
from mmdet.core.bbox.match_costs.builder import MATCH_COST
from mmdet.core.bbox.match_costs import build_match_cost
from mmdet.core.bbox.iou_calculators import bbox_overlaps
from mmdet.core.bbox.match_costs import build_match_cost
from mmdet.core.bbox.match_costs.builder import MATCH_COST
from mmdet.core.bbox.transforms import bbox_cxcywh_to_xyxy
......@@ -83,7 +82,7 @@ class LinesFixNumChamferCost(object):
num_gts, num_bboxes = gt_lines.size(0), lines_pred.size(0)
dist_mat = lines_pred.new_full((num_bboxes, num_gts),
1.0,)
1.0, )
for i in range(num_bboxes):
for j in range(num_gts):
......@@ -212,6 +211,7 @@ class IoUCostC:
iou_cost = -overlaps
return iou_cost * self.weight
@MATCH_COST.register_module()
class DynamicLinesCost(object):
"""LinesL1Cost.
......@@ -273,7 +273,7 @@ class DynamicLinesCost(object):
m1 = m1.unsqueeze(1).sigmoid() > 0.5
m2 = m2.unsqueeze(0)
valid_points_mask = (m1 + m2)/2.
valid_points_mask = (m1 + m2) / 2.
average_factor_mask = valid_points_mask.sum(-1) > 0
average_factor = average_factor_mask.masked_fill(
......@@ -360,8 +360,7 @@ class MapQueriesCost(object):
# Iou
if self.iou_cost is not None:
iou_cost = self.iou_cost(preds['lines'],gts['lines'])
iou_cost = self.iou_cost(preds['lines'], gts['lines'])
cost += iou_cost
return cost
......@@ -6,8 +6,8 @@ import torch.nn.functional as F
class NoiseSythesis(nn.Module):
def __init__(self,
p, scale=0.01, shift_scale=(8,5),
scaling_size=(0.1,0.1), canvas_size=(200, 100),
p, scale=0.01, shift_scale=(8, 5),
scaling_size=(0.1, 0.1), canvas_size=(200, 100),
bbox_type='sce',
poly_coord_dim=2,
bbox_coord_dim=2,
......@@ -37,7 +37,7 @@ class NoiseSythesis(nn.Module):
dtype = bbox.dtype
B = bbox.shape[0]
noise = (torch.rand(B, device=device)*2-1)[:,None,None] # [-1,1]
noise = (torch.rand(B, device=device) * 2 - 1)[:, None, None] # [-1,1]
scale = self.scaling_size.to(device)
scale = (noise * scale) + 1
......@@ -45,7 +45,7 @@ class NoiseSythesis(nn.Module):
# recenterization
coffset = scaled_bbox.mean(-2) - bbox.float().mean(-2)
scaled_bbox = scaled_bbox - coffset[:,None]
scaled_bbox = scaled_bbox - coffset[:, None]
return scaled_bbox.round().type(dtype)
......@@ -60,7 +60,7 @@ class NoiseSythesis(nn.Module):
scale = (bbox.max(1)[0] - bbox.min(1)[0]) * 0.1
scale = torch.where(scale < shift_scale, scale, shift_scale)
noise = (torch.rand(batch_size, 2, device=device)*2-1) # [-1,1]
noise = (torch.rand(batch_size, 2, device=device) * 2 - 1) # [-1,1]
offset = (noise * scale).round().type(bbox.dtype)
shifted_bbox = bbox + offset[:, None]
......@@ -80,8 +80,8 @@ class NoiseSythesis(nn.Module):
noisy_bbox = noisy_bbox.round().type(dtype)
# prevent out of bound case
for i in range(self.bbox_coord_dim):
noisy_bbox[...,i] =\
torch.clamp(noisy_bbox[...,0],1,self.canvas_size[i])
noisy_bbox[..., i] = \
torch.clamp(noisy_bbox[..., 0], 1, self.canvas_size[i])
else:
noisy_bbox = noisy_bbox.type(torch.float)
......@@ -93,9 +93,9 @@ class NoiseSythesis(nn.Module):
batchsize = polyline.shape[0]
scale = self.canvas_size * self.scale
polyline = F.pad(polyline,(0,self.poly_coord_dim-1))
polyline = polyline.view(batchsize,-1, self.poly_coord_dim)
mask = F.pad(polyline_mask[:,1:],(0,self.poly_coord_dim))
polyline = F.pad(polyline, (0, self.poly_coord_dim - 1))
polyline = polyline.view(batchsize, -1, self.poly_coord_dim)
mask = F.pad(polyline_mask[:, 1:], (0, self.poly_coord_dim))
noisy_polyline = torch.normal(polyline.type(torch.float), scale)
......@@ -104,14 +104,14 @@ class NoiseSythesis(nn.Module):
# prevent out of bound case
for i in range(self.poly_coord_dim):
noisy_polyline[...,i] =\
torch.clamp(noisy_polyline[...,i],0,self.canvas_size[i])
noisy_polyline[..., i] = \
torch.clamp(noisy_polyline[..., i], 0, self.canvas_size[i])
else:
noisy_polyline = noisy_polyline.type(torch.float)
noisy_polyline = noisy_polyline.view(batchsize,-1) * mask
noisy_polyline = noisy_polyline[:,:-(self.poly_coord_dim-1)]
noisy_polyline = noisy_polyline.view(batchsize, -1) * mask
noisy_polyline = noisy_polyline[:, :-(self.poly_coord_dim - 1)]
return noisy_polyline
......@@ -125,11 +125,11 @@ class NoiseSythesis(nn.Module):
bbox = t(bbox)
# prevent out of bound case
bbox[...,0] =\
torch.clamp(bbox[...,0],0,self.canvas_size[0])
bbox[..., 0] = \
torch.clamp(bbox[..., 0], 0, self.canvas_size[0])
bbox[...,1] =\
torch.clamp(bbox[...,1],0,self.canvas_size[1])
bbox[..., 1] = \
torch.clamp(bbox[..., 1], 0, self.canvas_size[1])
return bbox
......@@ -143,8 +143,8 @@ class NoiseSythesis(nn.Module):
bbox = self.gaussian_noise_bbox(bbox)
fbbox_aug = bbox.view(seq_len, -1)
aug_mask = torch.rand(fbbox.shape,device=fbbox.device)
fbbox = torch.where(aug_mask<self.p, fbbox_aug, fbbox)
aug_mask = torch.rand(fbbox.shape, device=fbbox.device)
fbbox = torch.where(aug_mask < self.p, fbbox_aug, fbbox)
elif self.bbox_type == 'rxyxy':
fbbox = self.rbbox_aug(batch)
elif self.bbox_type == 'convex_hull':
......@@ -155,8 +155,8 @@ class NoiseSythesis(nn.Module):
polyline_mask = batch['polyline_masks']
polyline_aug = self.gaussian_noise_poly(polyline, polyline_mask)
aug_mask = torch.rand(polyline.shape,device=polyline.device)
polyline = torch.where(aug_mask<self.p, polyline_aug, polyline)
aug_mask = torch.rand(polyline.shape, device=polyline.device)
polyline = torch.where(aug_mask < self.p, polyline_aug, polyline)
return polyline, fbbox
......@@ -164,7 +164,7 @@ class NoiseSythesis(nn.Module):
return None
def convex_hull_aug(self,batch):
def convex_hull_aug(self, batch):
return None
......@@ -183,5 +183,4 @@ class NoiseSythesis(nn.Module):
aug_bbox_flat = aug_bbox.view(seq_len, -1)
return aug_bbox_flat
......@@ -4,17 +4,19 @@
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
from collections import OrderedDict
import torch
import torch.nn as nn
from collections import OrderedDict
import torch.nn.functional as F
import torch.utils.checkpoint as checkpoint
from timm.models.layers import trunc_normal_, DropPath
from mmcv.runner import _load_checkpoint
from mmcv.cnn import constant_init, trunc_normal_init
from mmcv.runner import _load_checkpoint
from mmdet.models.builder import BACKBONES
from mmseg.utils import get_root_logger
from ops_dcnv3 import modules as opsm
import torch.nn.functional as F
from mmdet.models.builder import BACKBONES
from timm.models.layers import DropPath, trunc_normal_
class to_channels_first(nn.Module):
......@@ -185,7 +187,7 @@ class AttentiveBlock(nn.Module):
drop=0.,
attn_drop=0.,
drop_path=0.,
norm_layer="LN",
norm_layer='LN',
attn_head_dim=None,
out_dim=None):
super().__init__()
......@@ -575,7 +577,7 @@ class InternImage(nn.Module):
self.num_levels = len(depths)
self.depths = depths
self.channels = channels
self.num_features = int(channels * 2**(self.num_levels - 1))
self.num_features = int(channels * 2 ** (self.num_levels - 1))
self.post_norm = post_norm
self.mlp_ratio = mlp_ratio
self.init_cfg = init_cfg
......@@ -610,7 +612,7 @@ class InternImage(nn.Module):
i == 2) else None # for InternImage-H/G
level = InternImageBlock(
core_op=getattr(opsm, core_op),
channels=int(channels * 2**i),
channels=int(channels * 2 ** i),
depth=depths[i],
groups=groups[i],
mlp_ratio=self.mlp_ratio,
......
import copy
import math
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from mmdet3d.models.builder import BACKBONES
from mmdet.models import build_backbone, build_neck
class UpsampleBlock(nn.Module):
def __init__(self, ins, outs):
super(UpsampleBlock, self).__init__()
......@@ -17,7 +18,6 @@ class UpsampleBlock(nn.Module):
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
x = self.conv(x)
x = self.relu(self.gn(x))
x = self.upsample2x(x)
......@@ -26,7 +26,7 @@ class UpsampleBlock(nn.Module):
def upsample2x(self, x):
_, _, h, w = x.shape
x = F.interpolate(x, size=(h*2, w*2),
x = F.interpolate(x, size=(h * 2, w * 2),
mode='bilinear', align_corners=True)
return x
......@@ -54,7 +54,7 @@ class Upsample(nn.Module):
continue
tmp = [copy.deepcopy(input_conv), ]
tmp += [copy.deepcopy(inter_conv) for i in range(layer_num-1)]
tmp += [copy.deepcopy(inter_conv) for i in range(layer_num - 1)]
fscale.append(nn.Sequential(*tmp))
self.fscale = nn.ModuleList(fscale)
......@@ -117,20 +117,20 @@ class IPMEncoder(nn.Module):
if self.use_lidar:
self.pp = PointPillarEncoder(lidar_dim, xbound, ybound, zbound)
self.outconvs =\
nn.Conv2d((self.upsample.out_channels+3)*len(heights), out_channels//2,
self.outconvs = \
nn.Conv2d((self.upsample.out_channels + 3) * len(heights), out_channels // 2,
kernel_size=3, stride=1, padding=1) # same
if self.use_image:
_out_channels = out_channels//2
_out_channels = out_channels // 2
else:
_out_channels = out_channels
self.outconvs_lidar =\
self.outconvs_lidar = \
nn.Conv2d(lidar_dim, _out_channels,
kernel_size=3, stride=1, padding=1) # same
else:
self.outconvs =\
nn.Conv2d((self.upsample.out_channels+3)*len(heights), out_channels,
self.outconvs = \
nn.Conv2d((self.upsample.out_channels + 3) * len(heights), out_channels,
kernel_size=3, stride=1, padding=1) # same
self.init_weights(pretrained=pretrained)
......@@ -139,11 +139,10 @@ class IPMEncoder(nn.Module):
bev_planes = [construct_plane_grid(
xbound, ybound, h) for h in self.heights]
self.register_buffer('bev_planes', torch.stack(
bev_planes),) # nlvl,bH,bW,2
bev_planes), ) # nlvl,bH,bW,2
self.masked_embeds = nn.Embedding(len(heights), out_channels)
def init_weights(self, pretrained=None):
"""Initialize model weights."""
......@@ -225,7 +224,7 @@ class IPMEncoder(nn.Module):
if self.use_lidar:
lidar_feat = self.get_lidar_feature(points)
if self.use_image:
bev_feat = torch.cat([bev_feat,lidar_feat],dim=1)
bev_feat = torch.cat([bev_feat, lidar_feat], dim=1)
else:
bev_feat = lidar_feat
......@@ -250,7 +249,7 @@ class IPMEncoder(nn.Module):
# bev_grid_pos: B*ncam, nlvl*bH*bW, 2
bev_grid_pos, bev_cam_mask = get_campos(bev_grid, ego2cam, img_shape)
# B*cam, nlvl*bH, bW, 2
bev_grid_pos = bev_grid_pos.unflatten(-2, (nlvl*bH, bW))
bev_grid_pos = bev_grid_pos.unflatten(-2, (nlvl * bH, bW))
# project feat from 2D to bev plane
projected_feature = F.grid_sample(
......@@ -262,7 +261,7 @@ class IPMEncoder(nn.Module):
# eliminate the ncam
# The bev feature is the sum of the 6 cameras
bev_feat_mask = bev_feat_mask.unsqueeze(2)
projected_feature = (projected_feature*bev_feat_mask).sum(1)
projected_feature = (projected_feature * bev_feat_mask).sum(1)
num_feat = bev_feat_mask.sum(1)
projected_feature = projected_feature / \
......@@ -351,7 +350,7 @@ def get_campos(reference_points, ego2cam, img_shape):
eps = 1e-9
mask = (reference_points_cam[..., 2:3] > eps)
reference_points_cam =\
reference_points_cam = \
reference_points_cam[..., 0:2] / \
reference_points_cam[..., 2:3] + eps
......@@ -368,7 +367,7 @@ def get_campos(reference_points, ego2cam, img_shape):
# (B, num_cam, num_query)
mask = mask.view(B, num_cam, num_query)
reference_points_cam = reference_points_cam.view(B*num_cam, num_query, 2)
reference_points_cam = reference_points_cam.view(B * num_cam, num_query, 2)
return reference_points_cam, mask
......
from .base_map_head import BaseMapHead
from .dg_head import DGHead
from .map_element_detector import MapElementDetector
from .polyline_generator import PolylineGenerator
\ No newline at end of file
......@@ -3,7 +3,6 @@ from abc import ABCMeta, abstractmethod
import torch.nn as nn
from mmcv.runner import auto_fp16
from mmcv.utils import print_log
from mmdet.utils import get_root_logger
......@@ -24,7 +23,7 @@ class BaseMapHead(nn.Module, metaclass=ABCMeta):
logger = get_root_logger()
print_log(f'load model from: {pretrained}', logger=logger)
@auto_fp16(apply_to=('img', ))
@auto_fp16(apply_to=('img',))
def forward(self, *args, **kwargs):
pass
......
# the causal layer is credited by the https://github.com/alexmt-scale/causal-transformer-decoder
# we made some change to stick with the polygen.
import torch
import torch.nn as nn
from typing import Optional
from torch import Tensor
import torch
import torch.nn as nn
from mmcv.cnn.bricks.registry import ATTENTION
from mmcv.utils import build_from_cfg
from torch import Tensor
def build_attention(cfg, default_args=None):
......@@ -58,7 +58,7 @@ class CausalTransformerDecoder(nn.TransformerDecoder):
if self.training:
if cache is not None:
raise ValueError(
"cache parameter should be None in training mode")
'cache parameter should be None in training mode')
for mod in self.layers:
output = mod(
output,
......@@ -140,7 +140,7 @@ class CausalTransformerDecoderLayer(nn.TransformerDecoderLayer):
"""
if not self.norm_first:
raise ValueError(
"norm_first parameter should be True!")
'norm_first parameter should be True!')
if self.training:
# the official Pytorch implementation
......@@ -155,7 +155,7 @@ class CausalTransformerDecoderLayer(nn.TransformerDecoderLayer):
x = x + self.res_weight2 * \
self._mha_block(self.norm2(x), memory,
memory_mask, memory_key_padding_mask)
x = x + self.res_weight3*self._ff_block(self.norm3(x))
x = x + self.res_weight3 * self._ff_block(self.norm3(x))
return x
......@@ -189,7 +189,7 @@ class CausalTransformerDecoderLayer(nn.TransformerDecoderLayer):
memory_mask, memory_key_padding_mask)
# final feed-forward network
x = x + self.res_weight3*self._ff_block(self.norm3(x))
x = x + self.res_weight3 * self._ff_block(self.norm3(x))
return x
......@@ -235,7 +235,8 @@ class PolygenTransformerEncoderLayer(nn.TransformerEncoderLayer):
self.norm_first = norm_first
def forward(self, src: Tensor, src_mask: Optional[Tensor] = None, src_key_padding_mask: Optional[Tensor] = None) -> Tensor:
def forward(self, src: Tensor, src_mask: Optional[Tensor] = None,
src_key_padding_mask: Optional[Tensor] = None) -> Tensor:
r"""Pass the input through the encoder layer.
Args:
src: the sequence to the encoder layer (required).
......@@ -249,13 +250,13 @@ class PolygenTransformerEncoderLayer(nn.TransformerEncoderLayer):
x = src
if self.norm_first:
x = x + self.res_weight1*self._sa_block(self.norm1(x), src_mask,
x = x + self.res_weight1 * self._sa_block(self.norm1(x), src_mask,
src_key_padding_mask)
x = x + self.res_weight2*self._ff_block(self.norm2(x))
x = x + self.res_weight2 * self._ff_block(self.norm2(x))
else:
x = self.norm1(
x + self.res_weight1*self._sa_block(x, src_mask, src_key_padding_mask))
x = self.norm2(x + self.res_weight2*self._ff_block(x))
x + self.res_weight1 * self._sa_block(x, src_mask, src_key_padding_mask))
x = self.norm2(x + self.res_weight2 * self._ff_block(x))
return x
......@@ -274,12 +275,12 @@ class PolygenTransformerEncoderLayer(nn.TransformerEncoderLayer):
return self.dropout2(x)
def generate_square_subsequent_mask(sz: int, device: str = "cpu") -> torch.Tensor:
def generate_square_subsequent_mask(sz: int, device: str = 'cpu') -> torch.Tensor:
""" Generate the attention mask for causal decoding """
mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
mask = (
mask.float()
.masked_fill(mask == 0, float("-inf"))
.masked_fill(mask == 0, float('-inf'))
.masked_fill(mask == 1, float(0.0))
).to(device=device)
return mask
......@@ -2,17 +2,19 @@ import torch
import torch.nn.functional as F
from torch import Tensor
def generate_square_subsequent_mask(sz: int, condition_len: int = 1, bool_out=False, device: str = "cpu") -> torch.Tensor:
def generate_square_subsequent_mask(sz: int, condition_len: int = 1, bool_out=False,
device: str = 'cpu') -> torch.Tensor:
""" Generate the attention mask for causal decoding """
mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
if condition_len > 1:
mask[:condition_len,:condition_len] = 1
mask[:condition_len, :condition_len] = 1
if not bool_out:
mask = (
mask.float()
.masked_fill(mask == 0, float("-inf"))
.masked_fill(mask == 0, float('-inf'))
.masked_fill(mask == 1, float(0.0)))
return mask.to(device=device)
......@@ -39,7 +41,7 @@ def quantize_verts(
"""
min_range = -1
max_range = 1
range_quantize = canvas_size-1
range_quantize = canvas_size - 1
verts_ratio = (verts - min_range) / (
max_range - min_range)
......@@ -56,7 +58,7 @@ def top_k_logits(logits, k):
values, _ = torch.topk(logits, k=k)
k_largest = torch.min(values)
logits = torch.where(logits < k_largest,
torch.ones_like(logits)*-1e9, logits)
torch.ones_like(logits) * -1e9, logits)
return logits
......
import copy
import torch
import torch.nn as nn
import torch.nn.functional as F
from mmcv.cnn import Conv2d, Linear
from mmcv.cnn import Linear
from mmcv.runner import force_fp32
from torch.distributions.categorical import Categorical
from mmdet.core import multi_apply, reduce_mean
from mmdet.models import HEADS
from torch.distributions.categorical import Categorical
from .detr_head import DETRMapFixedNumHead
......@@ -21,7 +22,7 @@ class DETRBboxHead(DETRMapFixedNumHead):
self.separate_detect = separate_detect
self.discrete_output = discrete_output
self.bbox_size = 3 if mode=='sce' else 2
self.bbox_size = 3 if mode == 'sce' else 2
if bbox_size is not None:
self.bbox_size = bbox_size
self.coord_dim = coord_dim # for xyz
......@@ -42,12 +43,12 @@ class DETRBboxHead(DETRMapFixedNumHead):
self.img_coord_embed = nn.Linear(2, self.embed_dims)
def _init_branch(self,):
def _init_branch(self, ):
"""Initialize classification branch and regression branch of head."""
# add sigmoid or not
if self.separate_detect:
if self.cls_out_channels == self.num_classes+1:
if self.cls_out_channels == self.num_classes + 1:
self.cls_out_channels = 2
else:
self.cls_out_channels = 1
......@@ -62,10 +63,10 @@ class DETRBboxHead(DETRMapFixedNumHead):
if self.discrete_output:
reg_branch.append(nn.Linear(
self.embed_dims, max(self.canvas_size), bias=True,))
self.embed_dims, max(self.canvas_size), bias=True, ))
else:
reg_branch.append(nn.Linear(
self.embed_dims, self.bbox_size*self.coord_dim, bias=True,))
self.embed_dims, self.bbox_size * self.coord_dim, bias=True, ))
reg_branch = nn.Sequential(*reg_branch)
......@@ -133,7 +134,7 @@ class DETRBboxHead(DETRMapFixedNumHead):
[nb_dec, bs, num_query, num_points, 2].
'''
(global_context_embedding, sequential_context_embeddings) =\
(global_context_embedding, sequential_context_embeddings) = \
self._prepare_context(batch, context)
if self.separate_detect:
......@@ -166,18 +167,18 @@ class DETRBboxHead(DETRMapFixedNumHead):
pos = []
for i in range(4):
pos_embeds = self.bbox_embedding.weight[i]
_pos = self.pre_branches['reg'](query_feat+pos_embeds)
_pos = self.pre_branches['reg'](query_feat + pos_embeds)
pos.append(_pos)
# # y mask
# _vert_mask = torch.arange(logits.shape[-1], device=logits.device)
# vertices_mask_y = (_vert_mask < self.canvas_size[1]+1)
# logits[:,1::2] = logits[:,1::2]*vertices_mask_y - ~vertices_mask_y*1e9
logits = torch.stack(pos, dim=-2)/1.
logits = torch.stack(pos, dim=-2) / 1.
lines = Categorical(logits=logits)
else:
lines = self.pre_branches['reg'](query_feat).sigmoid()
lines = lines.unflatten(-1, (self.bbox_size, self.coord_dim))*self.canvas_size
lines = lines.unflatten(-1, (self.bbox_size, self.coord_dim)) * self.canvas_size
lines = lines.flatten(-2)
return dict(
......@@ -220,7 +221,7 @@ class DETRBboxHead(DETRMapFixedNumHead):
num_pred_lines = len(lines_pred)
# assigner and sampler
assign_result = self.assigner.assign(preds=dict(lines=lines_pred, scores=score_pred,),
assign_result = self.assigner.assign(preds=dict(lines=lines_pred, scores=score_pred, ),
gts=dict(lines=gt_lines,
labels=gt_labels, ),
gt_bboxes_ignore=gt_bboxes_ignore)
......@@ -232,10 +233,10 @@ class DETRBboxHead(DETRMapFixedNumHead):
# label targets 0: foreground, 1: background
if self.separate_detect:
labels = gt_lines.new_full((num_pred_lines, ), 1, dtype=torch.long)
labels = gt_lines.new_full((num_pred_lines,), 1, dtype=torch.long)
else:
labels = gt_lines.new_full(
(num_pred_lines, ), self.num_classes, dtype=torch.long)
(num_pred_lines,), self.num_classes, dtype=torch.long)
labels[pos_inds] = gt_labels[sampling_result.pos_assigned_gt_inds]
label_weights = gt_lines.new_ones(num_pred_lines)
......@@ -308,7 +309,7 @@ class DETRBboxHead(DETRMapFixedNumHead):
(labels_list, label_weights_list,
lines_targets_list, lines_weights_list,
pos_inds_list, neg_inds_list,pos_gt_inds_list) = multi_apply(
pos_inds_list, neg_inds_list, pos_gt_inds_list) = multi_apply(
self._get_target_single,
preds['scores'], lines_pred,
class_label, bbox,
......@@ -351,7 +352,7 @@ class DETRBboxHead(DETRMapFixedNumHead):
"""
# Get target for each sample
new_gts, num_total_pos, num_total_neg, pos_inds_list, pos_gt_inds_list =\
new_gts, num_total_pos, num_total_neg, pos_inds_list, pos_gt_inds_list = \
self.get_targets(preds, gts, gt_bboxes_ignore_list)
# Batched all data
......@@ -386,7 +387,7 @@ class DETRBboxHead(DETRMapFixedNumHead):
# position NLL loss
if self.discrete_output:
loss_reg = -(preds['lines'].log_prob(new_gts['bboxs']) *
new_gts['bboxs_weights']).sum()/(num_total_pos)
new_gts['bboxs_weights']).sum() / (num_total_pos)
else:
loss_reg = self.reg_loss(
preds['lines'], new_gts['bboxs'], new_gts['bboxs_weights'], avg_factor=num_total_pos)
......@@ -408,9 +409,9 @@ class DETRBboxHead(DETRMapFixedNumHead):
pos_msk = label == 0
neg_msk = ~pos_msk
loss_cls = -(p.log()*pos_msk + (1-p).log()*neg_msk)
loss_cls = -(p.log() * pos_msk + (1 - p).log() * neg_msk)
loss_cls = (loss_cls * weights).sum()/cls_avg_factor
loss_cls = (loss_cls * weights).sum() / cls_avg_factor
return loss_cls
......@@ -465,7 +466,7 @@ class DETRBboxHead(DETRMapFixedNumHead):
result_dict['bbox'].append(det_preds)
result_dict['scores'].append(scores)
result_dict['labels'].append(det_labels)
result_dict['lines_bs_idx'].extend([i]*nline)
result_dict['lines_bs_idx'].extend([i] * nline)
# for down stream polyline
_bboxs = torch.cat(result_dict['bbox'], dim=0)
......
import copy
import torch
import torch.nn as nn
import torch.nn.functional as F
import copy
from mmdet.models import HEADS
from mmcv.cnn import Conv2d
from mmcv.cnn import Linear, build_activation_layer, bias_init_with_prob
from mmcv.cnn import (Conv2d, Linear, bias_init_with_prob,
build_activation_layer)
from mmcv.cnn.bricks.transformer import build_positional_encoding
from mmdet.models.utils import build_transformer
from mmcv.runner import force_fp32
from mmdet.core import (multi_apply, build_assigner, build_sampler,
reduce_mean)
from mmdet.models.utils.transformer import inverse_sigmoid
from mmdet.models import build_loss
from mmdet.core import build_assigner, build_sampler, multi_apply, reduce_mean
from mmdet.models import HEADS, build_loss
from mmdet.models.utils import build_transformer
from .base_map_head import BaseMapHead
......@@ -60,14 +57,14 @@ class DETRMapFixedNumHead(BaseMapHead):
if loss_cls['use_sigmoid']:
self.cls_out_channels = num_classes
else:
self.cls_out_channels = num_classes+1
self.cls_out_channels = num_classes + 1
self.iterative = iterative
self.num_reg_fcs = num_reg_fcs
if patch_size is not None:
self.register_buffer('patch_size', torch.tensor(
(patch_size[1], patch_size[0])),)
(patch_size[1], patch_size[0])), )
self._build_transformer(transformer, positional_encoding)
......@@ -104,7 +101,7 @@ class DETRMapFixedNumHead(BaseMapHead):
self.transformer = build_transformer(transformer)
self.embed_dims = self.transformer.embed_dims
def _init_branch(self,):
def _init_branch(self, ):
"""Initialize classification branch and regression branch of head."""
fc_cls = Linear(self.embed_dims, self.cls_out_channels)
......@@ -114,8 +111,9 @@ class DETRMapFixedNumHead(BaseMapHead):
reg_branch.append(Linear(self.embed_dims, self.embed_dims))
reg_branch.append(nn.LayerNorm(self.embed_dims))
reg_branch.append(nn.ReLU())
reg_branch.append(Linear(self.embed_dims, self.num_points*2))
reg_branch.append(Linear(self.embed_dims, self.num_points * 2))
reg_branch = nn.Sequential(*reg_branch)
# add sigmoid or not
def _get_clones(module, N):
......@@ -185,7 +183,6 @@ class DETRMapFixedNumHead(BaseMapHead):
outputs = []
for i, query_feat in enumerate(outs_dec):
ocls = self.pre_branches['cls'](query_feat)
oreg = self.pre_branches['reg'](query_feat)
oreg = oreg.unflatten(dim=2, sizes=(self.num_points, 2))
......@@ -235,7 +232,7 @@ class DETRMapFixedNumHead(BaseMapHead):
num_pred_lines = lines_pred.size(0)
# assigner and sampler
assign_result = self.assigner.assign(preds=dict(lines=lines_pred, scores=score_pred,),
assign_result = self.assigner.assign(preds=dict(lines=lines_pred, scores=score_pred, ),
gts=dict(lines=gt_lines,
labels=gt_labels, ),
gt_bboxes_ignore=gt_bboxes_ignore)
......@@ -245,7 +242,7 @@ class DETRMapFixedNumHead(BaseMapHead):
neg_inds = sampling_result.neg_inds
# label targets
labels = gt_lines.new_full((num_pred_lines, ),
labels = gt_lines.new_full((num_pred_lines,),
self.num_classes,
dtype=torch.long)
labels[pos_inds] = gt_labels[sampling_result.pos_assigned_gt_inds]
......@@ -339,7 +336,7 @@ class DETRMapFixedNumHead(BaseMapHead):
"""
# get target for each sample
new_gts, num_total_pos, num_total_neg, pos_inds_list =\
new_gts, num_total_pos, num_total_neg, pos_inds_list = \
self.get_targets(preds, gts, gt_bboxes_ignore_list)
# batched all data
......@@ -368,7 +365,8 @@ class DETRMapFixedNumHead(BaseMapHead):
lines_preds = preds['lines'].reshape(-1, self.num_points, 2)
if reduction == 'none': # For performance analysis
loss_reg = self.reg_loss(
lines_preds, new_gts['lines_targets'], new_gts['lines_weights'], reduction_override=reduction, avg_factor=num_total_pos)
lines_preds, new_gts['lines_targets'], new_gts['lines_weights'], reduction_override=reduction,
avg_factor=num_total_pos)
else:
loss_reg = self.reg_loss(
lines_preds, new_gts['lines_targets'], new_gts['lines_weights'], avg_factor=num_total_pos)
......
import copy
import numpy as np
import torch
import torch.nn as nn
from mmcv.cnn import Linear, bias_init_with_prob, build_activation_layer
from mmcv.cnn.bricks.transformer import build_positional_encoding
from mmcv.runner import force_fp32
from mmdet.models import HEADS, build_head, build_loss
from mmdet.models import HEADS, build_head
from mmdet.models.utils import build_transformer
from mmdet.models.utils.transformer import inverse_sigmoid
from .base_map_head import BaseMapHead
import numpy as np
from ..augmentation.sythesis_det import NoiseSythesis
from .base_map_head import BaseMapHead
@HEADS.register_module(force=True)
class DGHead(BaseMapHead):
......@@ -46,7 +41,7 @@ class DGHead(BaseMapHead):
self.augmentation = None
if augmentation:
augmentation_kwargs.update({'canvas_size':gen_net_cfg.canvas_size})
augmentation_kwargs.update({'canvas_size': gen_net_cfg.canvas_size})
self.augmentation = NoiseSythesis(**augmentation_kwargs)
self.joint_training = joint_training
......@@ -77,8 +72,8 @@ class DGHead(BaseMapHead):
if only_det: return outs, losses_dict
if self.augmentation is not None:
polylines, bbox_flat =\
self.augmentation(batch['gen'],simple_aug=True)
polylines, bbox_flat = \
self.augmentation(batch['gen'], simple_aug=True)
if bbox_flat is None:
bbox_flat = batch['gen']['bbox_flat']
......@@ -110,7 +105,7 @@ class DGHead(BaseMapHead):
det_match_gt_idx = det_match_gt_idxs[-1]
_bboxs = []
for i, (match_idx, bbox) in enumerate(zip(det_match_idx,pred_bbox)):
for i, (match_idx, bbox) in enumerate(zip(det_match_idx, pred_bbox)):
_bboxs.append(bbox[match_idx])
_bboxs[-1] = _bboxs[-1][torch.argsort(det_match_gt_idx[i])]
......@@ -121,15 +116,15 @@ class DGHead(BaseMapHead):
torch.round(_bboxs).type(torch.int32)
# gen_input['bbox_flat'] = _bboxs
remain_idx = torch.randperm(_bboxs.shape[0])[:int(_bboxs.shape[0]*0.2)]
remain_idx = torch.randperm(_bboxs.shape[0])[:int(_bboxs.shape[0] * 0.2)]
# for data efficient
for k in gen_input.keys():
if k == 'bbox_flat':
gen_input[k] = torch.cat((_bboxs,gen_input[k][remain_idx]),dim=0)
gen_input[k] = torch.cat((_bboxs, gen_input[k][remain_idx]), dim=0)
else:
gen_input[k] = torch.cat((gen_input[k],gen_input[k][remain_idx]),dim=0)
gen_input[k] = torch.cat((gen_input[k], gen_input[k][remain_idx]), dim=0)
if isinstance(context['bev_embeddings'],tuple):
if isinstance(context['bev_embeddings'], tuple):
context['bev_embeddings'] = context['bev_embeddings'][0]
poly_dict = self.gen_net(gen_input, context=context)
......@@ -141,7 +136,7 @@ class DGHead(BaseMapHead):
if self.joint_training:
for k in batch['gen'].keys():
batch['gen'][k] = \
torch.cat((batch['gen'][k],batch['gen'][k][remain_idx]),dim=0)
torch.cat((batch['gen'][k], batch['gen'][k][remain_idx]), dim=0)
gen_losses_dict = \
self.loss_gen(batch, outs)
......@@ -159,7 +154,7 @@ class DGHead(BaseMapHead):
self.det_net.loss(gt['det'], pred['bbox'])
for k, v in det_loss_dict.items():
loss_dict['det_'+k] = v
loss_dict['det_' + k] = v
return loss_dict, det_match_idx, det_match_gt_idx
......@@ -171,7 +166,7 @@ class DGHead(BaseMapHead):
gen_loss_dict = self.gen_net.loss(gt['gen'], pred['polylines'])
for k, v in gen_loss_dict.items():
loss_dict['gen_'+k] = v
loss_dict['gen_' + k] = v
return loss_dict
......@@ -180,7 +175,7 @@ class DGHead(BaseMapHead):
pass
@torch.no_grad()
def inference(self, batch: dict={}, context: dict={}, gt_condition=False, **kwargs):
def inference(self, batch: dict = {}, context: dict = {}, gt_condition=False, **kwargs):
'''
num_samples_batch: number of sample per batch (batch size)
'''
......@@ -193,7 +188,7 @@ class DGHead(BaseMapHead):
if len(outs['lines_bs_idx']) == 0:
return None
if isinstance(context['bev_embeddings'],tuple):
if isinstance(context['bev_embeddings'], tuple):
context['bev_embeddings'] = context['bev_embeddings'][0]
poly_dict = self.gen_net(outs,
......@@ -206,7 +201,7 @@ class DGHead(BaseMapHead):
return outs
def post_process(self, preds: dict, tokens, gts:dict=None, **kwargs):
def post_process(self, preds: dict, tokens, gts: dict = None, **kwargs):
'''
Args:
XXX
......@@ -216,7 +211,7 @@ class DGHead(BaseMapHead):
range_size = self.gen_net.canvas_size.cpu().numpy()
coord_dim = self.gen_net.coord_dim
gen_net_name = self.gen_net.name if hasattr(self.gen_net,'name') else 'gen'
gen_net_name = self.gen_net.name if hasattr(self.gen_net, 'name') else 'gen'
ret_list = []
for batch_idx in range(len(tokens)):
......@@ -227,7 +222,7 @@ class DGHead(BaseMapHead):
det_gt = None
if gts is not None:
det_gt, rec_groundtruth = pack_groundtruth(
batch_idx,gts,tokens,range_size,gen_net_name,coord_dim=coord_dim)
batch_idx, gts, tokens, range_size, gen_net_name, coord_dim=coord_dim)
bbox_res = {
# 'bboxes': preds['bbox'][batch_idx].detach().cpu().numpy(),
......@@ -238,7 +233,6 @@ class DGHead(BaseMapHead):
}
ret_dict_single.update(bbox_res)
# for gen results.
batch2seq = np.nonzero(
preds['lines_bs_idx'].cpu().numpy() == batch_idx)[0]
......@@ -249,13 +243,12 @@ class DGHead(BaseMapHead):
})
for i in batch2seq:
pre = preds['polylines'][i].detach().cpu().numpy()
pre_msk = preds['polyline_masks'][i].detach().cpu().numpy()
valid_idx = np.nonzero(pre_msk)[0][:-1]
# From [200,1] to [199,0] to (1,0)
line = (pre[valid_idx].reshape(-1, coord_dim) - 1) / (range_size-1)
line = (pre[valid_idx].reshape(-1, coord_dim) - 1) / (range_size - 1)
ret_dict_single['vectors'].append(line)
......@@ -266,8 +259,8 @@ class DGHead(BaseMapHead):
return ret_list
def pack_groundtruth(batch_idx,gts,tokens,range_size,gen_net_name='gen',coord_dim=2):
def pack_groundtruth(batch_idx, gts, tokens, range_size, gen_net_name='gen', coord_dim=2):
if 'keypoints' in gts['det']:
gt_bbox = \
gts['det']['keypoints'][batch_idx].detach().cpu().numpy()
......@@ -290,7 +283,7 @@ def pack_groundtruth(batch_idx,gts,tokens,range_size,gen_net_name='gen',coord_di
}
for i in batch2seq:
gt_line =\
gt_line = \
gts['gen']['polylines'].detach().cpu().numpy()[i]
gt_msk = gts['gen']['polyline_masks'].detach().cpu().numpy()[i]
if gen_net_name == 'gen_gmm':
......@@ -299,7 +292,7 @@ def pack_groundtruth(batch_idx,gts,tokens,range_size,gen_net_name='gen',coord_di
valid_idx = np.nonzero(gt_msk)[0][:-1]
# From [200,1] to [199,0] to (1,0)
line = (gt_line[valid_idx].reshape(-1, coord_dim) - 1) / (range_size-1)
line = (gt_line[valid_idx].reshape(-1, coord_dim) - 1) / (range_size - 1)
ret_groundtruth['lines'].append(line)
return det_gt, ret_groundtruth
import copy
import torch
import torch.nn as nn
import torch.nn.functional as F
from mmcv.cnn import Conv2d, Linear
from mmcv.runner import force_fp32
from torch.distributions.categorical import Categorical
from mmdet.core import (multi_apply, build_assigner, build_sampler,
reduce_mean)
from mmdet.models import HEADS
from .detr_bbox import DETRBboxHead
from mmdet.models.utils.transformer import inverse_sigmoid
from mmdet.models import build_loss
from mmcv.cnn import Linear, build_activation_layer, bias_init_with_prob
from mmcv.cnn import (Conv2d, Linear, bias_init_with_prob,
build_activation_layer)
from mmcv.cnn.bricks.transformer import build_positional_encoding
from mmcv.runner import force_fp32
from mmdet.core import build_assigner, build_sampler, multi_apply, reduce_mean
from mmdet.models import HEADS, build_loss
from mmdet.models.utils import build_transformer
from mmdet.models.utils.transformer import inverse_sigmoid
@HEADS.register_module(force=True)
class MapElementDetector(nn.Module):
......@@ -41,7 +38,7 @@ class MapElementDetector(nn.Module):
positional_encoding: dict = None,
loss_cls: dict = None,
loss_reg: dict = None,
train_cfg: dict = None,):
train_cfg: dict = None, ):
super().__init__()
......@@ -65,7 +62,7 @@ class MapElementDetector(nn.Module):
if loss_cls['use_sigmoid']:
self.cls_out_channels = num_classes
else:
self.cls_out_channels = num_classes+1
self.cls_out_channels = num_classes + 1
self.iterative = iterative
self.num_reg_fcs = num_reg_fcs
......@@ -82,7 +79,7 @@ class MapElementDetector(nn.Module):
self.separate_detect = separate_detect
self.discrete_output = discrete_output
self.bbox_size = 3 if mode=='sce' else 2
self.bbox_size = 3 if mode == 'sce' else 2
if bbox_size is not None:
self.bbox_size = bbox_size
self.coord_dim = coord_dim # for xyz
......@@ -115,16 +112,16 @@ class MapElementDetector(nn.Module):
# query_pos_embed & query_embed
self.query_embedding = nn.Embedding(self.num_query,
self.embed_dims*2)
self.embed_dims * 2)
# for bbox parameter xstart, ystart, xend, yend
self.bbox_embedding = nn.Embedding( self.bbox_size,
self.embed_dims*2)
self.bbox_embedding = nn.Embedding(self.bbox_size,
self.embed_dims * 2)
def _init_branch(self,):
def _init_branch(self, ):
"""Initialize classification branch and regression branch of head."""
fc_cls = Linear(self.embed_dims*self.bbox_size, self.cls_out_channels)
fc_cls = Linear(self.embed_dims * self.bbox_size, self.cls_out_channels)
# fc_cls = Linear(self.embed_dims, self.cls_out_channels)
reg_branch = []
......@@ -135,12 +132,13 @@ class MapElementDetector(nn.Module):
if self.discrete_output:
reg_branch.append(nn.Linear(
self.embed_dims, max(self.canvas_size), bias=True,))
self.embed_dims, max(self.canvas_size), bias=True, ))
else:
reg_branch.append(nn.Linear(
self.embed_dims, self.coord_dim, bias=True,))
self.embed_dims, self.coord_dim, bias=True, ))
reg_branch = nn.Sequential(*reg_branch)
# add sigmoid or not
def _get_clones(module, N):
......@@ -240,28 +238,28 @@ class MapElementDetector(nn.Module):
[nb_dec, bs, num_query, num_points, 2].
'''
(global_context_embedding, sequential_context_embeddings) =\
(global_context_embedding, sequential_context_embeddings) = \
self._prepare_context(context)
x = sequential_context_embeddings
B, C, H, W = x.shape
query_embedding = self.query_embedding.weight[None,:,None].repeat(B, 1, self.bbox_size, 1)
query_embedding = self.query_embedding.weight[None, :, None].repeat(B, 1, self.bbox_size, 1)
bbox_embed = self.bbox_embedding.weight
query_embedding = query_embedding + bbox_embed[None,None]
query_embedding = query_embedding.view(B, -1, C*2)
query_embedding = query_embedding + bbox_embed[None, None]
query_embedding = query_embedding.view(B, -1, C * 2)
img_masks = x.new_zeros((B, H, W))
pos_embed = self.positional_encoding(img_masks)
# outs_dec: [nb_dec, bs, num_query, embed_dim]
hs, init_reference, inter_references = self.transformer(
[x,],
[x, ],
[img_masks.type(torch.bool)],
query_embedding,
[pos_embed],
reg_branches= self.reg_branches if self.iterative else None, # noqa:E501
cls_branches= None, # noqa:E501
reg_branches=self.reg_branches if self.iterative else None, # noqa:E501
cls_branches=None, # noqa:E501
)
outs_dec = hs.permute(0, 2, 1, 3)
......@@ -271,22 +269,22 @@ class MapElementDetector(nn.Module):
reference = init_reference
else:
reference = inter_references[i - 1]
outputs.append(self.get_prediction(i,query_feat,reference))
outputs.append(self.get_prediction(i, query_feat, reference))
return outputs
def get_prediction(self, level, query_feat, reference):
bs, num_query, h = query_feat.shape
query_feat = query_feat.view(bs, -1, self.bbox_size,h)
query_feat = query_feat.view(bs, -1, self.bbox_size, h)
ocls = self.pre_branches['cls'][level](query_feat.flatten(-2))
# ocls = ocls.mean(-2)
reference = inverse_sigmoid(reference)
reference = reference.view(bs, -1, self.bbox_size,self.coord_dim)
reference = reference.view(bs, -1, self.bbox_size, self.coord_dim)
tmp = self.pre_branches['reg'][level](query_feat)
tmp[...,:self.kp_coord_dim] = tmp[...,:self.kp_coord_dim] + reference[...,:self.kp_coord_dim]
tmp[..., :self.kp_coord_dim] = tmp[..., :self.kp_coord_dim] + reference[..., :self.kp_coord_dim]
lines = tmp.sigmoid() # bs, num_query, self.bbox_size,2
lines = lines * self.canvas_size[:self.coord_dim]
......@@ -295,7 +293,7 @@ class MapElementDetector(nn.Module):
return dict(
lines=lines, # [bs, num_query, bboxsize*2]
scores=ocls, # [bs, num_query, num_class]
embeddings= query_feat, # [bs, num_query, bbox_size, h]
embeddings=query_feat, # [bs, num_query, bbox_size, h]
)
@force_fp32(apply_to=('score_pred', 'lines_pred', 'gt_lines'))
......@@ -333,7 +331,7 @@ class MapElementDetector(nn.Module):
num_pred_lines = len(lines_pred)
# assigner and sampler
assign_result = self.assigner.assign(preds=dict(lines=lines_pred, scores=score_pred,),
assign_result = self.assigner.assign(preds=dict(lines=lines_pred, scores=score_pred, ),
gts=dict(lines=gt_lines,
labels=gt_labels, ),
gt_bboxes_ignore=gt_bboxes_ignore)
......@@ -345,10 +343,10 @@ class MapElementDetector(nn.Module):
# label targets 0: foreground, 1: background
if self.separate_detect:
labels = gt_lines.new_full((num_pred_lines, ), 1, dtype=torch.long)
labels = gt_lines.new_full((num_pred_lines,), 1, dtype=torch.long)
else:
labels = gt_lines.new_full(
(num_pred_lines, ), self.num_classes, dtype=torch.long)
(num_pred_lines,), self.num_classes, dtype=torch.long)
labels[pos_inds] = gt_labels[sampling_result.pos_assigned_gt_inds]
label_weights = gt_lines.new_ones(num_pred_lines)
......@@ -421,7 +419,7 @@ class MapElementDetector(nn.Module):
(labels_list, label_weights_list,
lines_targets_list, lines_weights_list,
pos_inds_list, neg_inds_list,pos_gt_inds_list) = multi_apply(
pos_inds_list, neg_inds_list, pos_gt_inds_list) = multi_apply(
self._get_target_single,
preds['scores'], lines_pred,
class_label, bbox,
......@@ -464,7 +462,7 @@ class MapElementDetector(nn.Module):
"""
# Get target for each sample
new_gts, num_total_pos, num_total_neg, pos_inds_list, pos_gt_inds_list =\
new_gts, num_total_pos, num_total_neg, pos_inds_list, pos_gt_inds_list = \
self.get_targets(preds, gts, gt_bboxes_ignore_list)
# Batched all data
......@@ -499,7 +497,7 @@ class MapElementDetector(nn.Module):
# position NLL loss
if self.discrete_output:
loss_reg = -(preds['lines'].log_prob(new_gts['bboxs']) *
new_gts['bboxs_weights']).sum()/(num_total_pos)
new_gts['bboxs_weights']).sum() / (num_total_pos)
else:
loss_reg = self.reg_loss(
preds['lines'], new_gts['bboxs'], new_gts['bboxs_weights'], avg_factor=num_total_pos)
......@@ -613,7 +611,7 @@ class MapElementDetector(nn.Module):
result_dict['bbox'].append(det_preds)
result_dict['scores'].append(scores)
result_dict['labels'].append(det_labels)
result_dict['lines_bs_idx'].extend([i]*nline)
result_dict['lines_bs_idx'].extend([i] * nline)
# for down stream polyline
_bboxs = torch.cat(result_dict['bbox'], dim=0)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment