Commit 41b18fd8 authored by zhe chen's avatar zhe chen
Browse files

Use pre-commit to reformat code


Use pre-commit to reformat code
parent ff20ea39
from .bev import draw_annotation_bev
from .pv import draw_annotation_pv
from .utils import assign_attribute, assign_topology
\ No newline at end of file
......@@ -23,8 +23,7 @@
import cv2
import numpy as np
from .utils import THICKNESS, COLOR_DEFAULT, COLOR_DICT, interp_arc
from .utils import COLOR_DEFAULT, COLOR_DICT, THICKNESS, interp_arc
BEV_SCALE = 10
BEV_RANGE = [-50, 50, -25, 25]
......@@ -32,7 +31,7 @@ BEV_RANGE = [-50, 50, -25, 25]
def _draw_lane_centerline(image, lane_centerline, with_attribute):
points = np.array(lane_centerline['points'])
points = BEV_SCALE * (-points[:, :2] + np.array([BEV_RANGE[1] , BEV_RANGE[3]]))
points = BEV_SCALE * (-points[:, :2] + np.array([BEV_RANGE[1], BEV_RANGE[3]]))
points = interp_arc(points)
if points is None:
return
......@@ -46,17 +45,19 @@ def _draw_lane_centerline(image, lane_centerline, with_attribute):
for i in range(len(points) - 1):
x1 = int(points[i][0] + idx * THICKNESS * 1.5)
y1 = int(points[i][1] + idx * THICKNESS * 1.5)
x2 = int(points[i+1][0] + idx * THICKNESS * 1.5)
y2 = int(points[i+1][1] + idx * THICKNESS * 1.5)
x2 = int(points[i + 1][0] + idx * THICKNESS * 1.5)
y2 = int(points[i + 1][1] + idx * THICKNESS * 1.5)
cv2.line(image, pt1=(y1, x1), pt2=(y2, x2), color=color, thickness=THICKNESS, lineType=cv2.LINE_AA)
def _draw_vertex(image, lane_centerline):
points = BEV_SCALE * (-np.array(lane_centerline['points'])[:, :2] + np.array([BEV_RANGE[1] , BEV_RANGE[3]]))
points = BEV_SCALE * (-np.array(lane_centerline['points'])[:, :2] + np.array([BEV_RANGE[1], BEV_RANGE[3]]))
cv2.circle(image, (int(points[0, 1]), int(points[0, 0])), int(THICKNESS * 1.5), COLOR_DEFAULT, -1)
cv2.circle(image, (int(points[-1, 1]), int(points[-1, 0])), int(THICKNESS * 1.5), COLOR_DEFAULT, -1)
def draw_annotation_bev(annotation, with_attribute):
image = np.ones((
BEV_SCALE * (BEV_RANGE[1] - BEV_RANGE[0]),
......
......@@ -23,7 +23,7 @@
import cv2
import numpy as np
from .utils import THICKNESS, COLOR_DEFAULT, COLOR_DICT, interp_arc
from .utils import COLOR_DEFAULT, COLOR_DICT, THICKNESS, interp_arc
def _draw_traffic_element(image, traffic_element):
......@@ -40,6 +40,7 @@ def _draw_traffic_element(image, traffic_element):
cv2.rectangle(image, top_left, bottom_right, color=color, thickness=THICKNESS, lineType=cv2.LINE_AA)
def _project(points, intrinsic, extrinsic):
if points is None:
return points
......@@ -57,6 +58,7 @@ def _project(points, intrinsic, extrinsic):
return points_on_image_cor
def _draw_lane_centerline(image, lane_centerline, intrinsic, extrinsic, with_attribute):
points = _project(interp_arc(lane_centerline['points']), intrinsic, extrinsic)
if points is None:
......@@ -71,14 +73,15 @@ def _draw_lane_centerline(image, lane_centerline, intrinsic, extrinsic, with_att
for i in range(len(points) - 1):
x1 = int(points[i][0] + idx * THICKNESS * 1.5)
y1 = int(points[i][1] + idx * THICKNESS * 1.5)
x2 = int(points[i+1][0] + idx * THICKNESS * 1.5)
y2 = int(points[i+1][1] + idx * THICKNESS * 1.5)
x2 = int(points[i + 1][0] + idx * THICKNESS * 1.5)
y2 = int(points[i + 1][1] + idx * THICKNESS * 1.5)
try:
cv2.line(image, pt1=(x1, y1), pt2=(x2, y2), color=color, thickness=THICKNESS, lineType=cv2.LINE_AA)
except Exception:
return
def _draw_topology(image, topology, intrinsic, extrinsic):
coord_from = [
(topology['traffic_element'][0][0] + topology['traffic_element'][0][0]) / 2,
......@@ -96,10 +99,11 @@ def _draw_topology(image, topology, intrinsic, extrinsic):
curve = np.array([coord_from, mid, coord_to])
pts_fit = np.polyfit(curve[:, 0], curve[:, 1], 2)
xs = np.linspace(curve[0][0], curve[-1][0], 1000)
ys = pts_fit[0] * xs**2 + pts_fit[1] * xs + pts_fit[2]
ys = pts_fit[0] * xs ** 2 + pts_fit[1] * xs + pts_fit[2]
curve = np.int_([np.array([np.transpose(np.vstack([xs, ys]))])])
cv2.polylines(image, curve, isClosed=False, color=color, thickness=THICKNESS//3, lineType=cv2.LINE_AA)
cv2.polylines(image, curve, isClosed=False, color=color, thickness=THICKNESS // 3, lineType=cv2.LINE_AA)
def draw_annotation_pv(camera, image, annotation, intrinsic, extrinsic, with_attribute, with_topology):
for lane_centerline in annotation['lane_centerline']:
......
......@@ -22,7 +22,6 @@
import numpy as np
THICKNESS = 4
COLOR_DEFAULT = (0, 0, 255)
......@@ -109,6 +108,7 @@ def interp_arc(points, t=1000):
return points_interp
def assign_attribute(annotation):
topology_lcte = np.array(annotation['topology_lcte'], dtype=bool)
for i in range(len(annotation['lane_centerline'])):
......@@ -116,6 +116,7 @@ def assign_attribute(annotation):
set([ts['attribute'] for j, ts in enumerate(annotation['traffic_element']) if topology_lcte[i][j]])
return annotation
def assign_topology(annotation):
topology_lcte = np.array(annotation['topology_lcte'], dtype=bool)
annotation['topology'] = []
......
from .core import *
from .datasets import *
from .models import *
\ No newline at end of file
from .assigners import *
from .match_costs import *
\ No newline at end of file
......@@ -21,10 +21,9 @@
# ==============================================================================
import torch
from scipy.optimize import linear_sum_assignment
from mmdet.core.bbox.assigners import AssignResult, HungarianAssigner
from mmdet.core.bbox.builder import BBOX_ASSIGNERS
from mmdet.core.bbox.assigners import HungarianAssigner, AssignResult
from scipy.optimize import linear_sum_assignment
@BBOX_ASSIGNERS.register_module()
......@@ -43,10 +42,10 @@ class LaneHungarianAssigner(HungarianAssigner):
num_gts, num_lanes = gt_lanes.size(0), lane_pred.size(0)
# 1. assign -1 by default
assigned_gt_inds = lane_pred.new_full((num_lanes, ),
assigned_gt_inds = lane_pred.new_full((num_lanes,),
-1,
dtype=torch.long)
assigned_labels = lane_pred.new_full((num_lanes, ),
assigned_labels = lane_pred.new_full((num_lanes,),
-1,
dtype=torch.long)
if num_gts == 0 or num_lanes == 0:
......
......@@ -21,7 +21,6 @@
# ==============================================================================
import torch
from mmdet.core.bbox.match_costs.builder import MATCH_COST
......@@ -33,6 +32,7 @@ class LaneL1Cost:
Adapted from https://github.com/open-mmlab/mmdetection/blob/master/mmdet/core/bbox/match_costs/match_cost.py#L11.
"""
def __init__(self, weight=1.):
self.weight = weight
......
from .pipelines import *
from .openlane_v2_dataset import *
\ No newline at end of file
......@@ -21,21 +21,19 @@
# ==============================================================================
import os
import cv2
import torch
import numpy as np
from math import factorial
from pyquaternion import Quaternion
import cv2
import mmcv
from mmdet.datasets import DATASETS
import numpy as np
import torch
from mmdet3d.datasets import Custom3DDataset
from mmdet.datasets import DATASETS
from openlanev2.dataset import Collection
from openlanev2.evaluation import evaluate as openlanev2_evaluate
from openlanev2.preprocessing import check_results
from openlanev2.visualization.utils import COLOR_DICT
from pyquaternion import Quaternion
COLOR_GT = (0, 255, 0)
COLOR_GT_TOPOLOGY = (0, 127, 0)
......@@ -45,12 +43,11 @@ COLOR_DICT = {k: (v[2], v[1], v[0]) for k, v in COLOR_DICT.items()}
def render_pv(images, lidar2imgs, gt_lc, pred_lc, gt_te, gt_te_attr, pred_te, pred_te_attr):
results = []
for idx, (image, lidar2img) in enumerate(zip(images, lidar2imgs)):
if gt_lc is not None :
if gt_lc is not None:
for lc in gt_lc:
xyz1 = np.concatenate([lc, np.ones((lc.shape[0], 1))], axis=1)
xyz1 = xyz1 @ lidar2img.T
......@@ -90,10 +87,10 @@ def render_pv(images, lidar2imgs, gt_lc, pred_lc, gt_te, gt_te_attr, pred_te, pr
return results
def render_corner_rectangle(img, pt1, pt2, color,
corner_thickness=3, edge_thickness=2,
centre_cross=False, lineType=cv2.LINE_8):
corner_length = min(abs(pt1[0] - pt2[0]), abs(pt1[1] - pt2[1])) // 4
e_args = [color, edge_thickness, lineType]
c_args = [color, corner_thickness, lineType]
......@@ -121,8 +118,8 @@ def render_corner_rectangle(img, pt1, pt2, color,
return img
def render_front_view(image, lidar2img, gt_lc, pred_lc, gt_te, pred_te, gt_topology_lcte, pred_topology_lcte):
def render_front_view(image, lidar2img, gt_lc, pred_lc, gt_te, pred_te, gt_topology_lcte, pred_topology_lcte):
if gt_topology_lcte is not None:
for lc_idx, lcte in enumerate(gt_topology_lcte):
for te_idx, connected in enumerate(lcte):
......@@ -137,7 +134,7 @@ def render_front_view(image, lidar2img, gt_lc, pred_lc, gt_te, pred_te, gt_topol
p1 = (xyz1[:, :2] / xyz1[:, 2:3])[0].astype(int)
te = gt_te[te_idx]
p2 = np.array([(te[0]+te[2])/2, te[3]]).astype(int)
p2 = np.array([(te[0] + te[2]) / 2, te[3]]).astype(int)
image = cv2.arrowedLine(image, (p2[0], p2[1]), (p1[0], p1[1]), COLOR_GT_TOPOLOGY, tipLength=0.03)
......@@ -155,20 +152,22 @@ def render_front_view(image, lidar2img, gt_lc, pred_lc, gt_te, pred_te, gt_topol
p1 = (xyz1[:, :2] / xyz1[:, 2:3])[0].astype(int)
te = pred_te[te_idx]
p2 = np.array([(te[0]+te[2])/2, te[3]]).astype(int)
p2 = np.array([(te[0] + te[2]) / 2, te[3]]).astype(int)
image = cv2.arrowedLine(image, (p2[0], p2[1]), (p1[0], p1[1]), COLOR_PRED_TOPOLOGY, tipLength=0.03)
return image
def render_bev(gt_lc=None, pred_lc=None, gt_topology_lclc=None, pred_topology_lclc=None, map_size=[-52, 52, -27, 27], scale=20):
image = np.zeros((int(scale*(map_size[1]-map_size[0])), int(scale*(map_size[3] - map_size[2])), 3), dtype=np.uint8)
def render_bev(gt_lc=None, pred_lc=None, gt_topology_lclc=None, pred_topology_lclc=None, map_size=[-52, 52, -27, 27],
scale=20):
image = np.zeros((int(scale * (map_size[1] - map_size[0])), int(scale * (map_size[3] - map_size[2])), 3),
dtype=np.uint8)
if gt_lc is not None:
for lc in gt_lc:
draw_coor = (scale * (-lc[:, :2] + np.array([map_size[1], map_size[3]]))).astype(np.int)
image = cv2.polylines(image, [draw_coor[:, [1,0]]], False, COLOR_GT, max(round(scale * 0.2), 1))
image = cv2.polylines(image, [draw_coor[:, [1, 0]]], False, COLOR_GT, max(round(scale * 0.2), 1))
image = cv2.circle(image, (draw_coor[0, 1], draw_coor[0, 0]), max(round(scale * 0.5), 3), COLOR_GT, -1)
image = cv2.circle(image, (draw_coor[-1, 1], draw_coor[-1, 0]), max(round(scale * 0.5), 3), COLOR_GT, -1)
......@@ -182,12 +181,13 @@ def render_bev(gt_lc=None, pred_lc=None, gt_topology_lclc=None, pred_topology_lc
l2_mid = len(l2) // 2
p1 = (scale * (-l1[l1_mid, :2] + np.array([map_size[1], map_size[3]]))).astype(np.int)
p2 = (scale * (-l2[l2_mid, :2] + np.array([map_size[1], map_size[3]]))).astype(np.int)
image = cv2.arrowedLine(image, (p1[1], p1[0]), (p2[1], p2[0]), COLOR_GT_TOPOLOGY, max(round(scale * 0.1), 1), tipLength=0.03)
image = cv2.arrowedLine(image, (p1[1], p1[0]), (p2[1], p2[0]), COLOR_GT_TOPOLOGY,
max(round(scale * 0.1), 1), tipLength=0.03)
if pred_lc is not None:
for lc in pred_lc:
draw_coor = (scale * (-lc[:, :2] + np.array([map_size[1], map_size[3]]))).astype(np.int)
image = cv2.polylines(image, [draw_coor[:, [1,0]]], False, COLOR_PRED, max(round(scale * 0.2), 1))
image = cv2.polylines(image, [draw_coor[:, [1, 0]]], False, COLOR_PRED, max(round(scale * 0.2), 1))
image = cv2.circle(image, (draw_coor[0, 1], draw_coor[0, 0]), max(round(scale * 0.5), 3), COLOR_PRED, -1)
image = cv2.circle(image, (draw_coor[-1, 1], draw_coor[-1, 0]), max(round(scale * 0.5), 3), COLOR_PRED, -1)
......@@ -201,13 +201,14 @@ def render_bev(gt_lc=None, pred_lc=None, gt_topology_lclc=None, pred_topology_lc
l2_mid = len(l2) // 2
p1 = (scale * (-l1[l1_mid, :2] + np.array([map_size[1], map_size[3]]))).astype(np.int)
p2 = (scale * (-l2[l2_mid, :2] + np.array([map_size[1], map_size[3]]))).astype(np.int)
image = cv2.arrowedLine(image, (p1[1], p1[0]), (p2[1], p2[0]), COLOR_PRED_TOPOLOGY, max(round(scale * 0.1), 1), tipLength=0.03)
image = cv2.arrowedLine(image, (p1[1], p1[0]), (p2[1], p2[0]), COLOR_PRED_TOPOLOGY,
max(round(scale * 0.1), 1), tipLength=0.03)
return image
@DATASETS.register_module()
class OpenLaneV2SubsetADataset(Custom3DDataset):
CLASSES = [None]
def __init__(self,
......@@ -227,7 +228,8 @@ class OpenLaneV2SubsetADataset(Custom3DDataset):
def load_annotations(self, ann_file):
ann_file = ann_file.name.split('.pkl')[0].split('/')
self.collection = Collection(data_root=self.data_root, meta_root='/'.join(ann_file[:-1]), collection=ann_file[-1])
self.collection = Collection(data_root=self.data_root, meta_root='/'.join(ann_file[:-1]),
collection=ann_file[-1])
return self.collection.keys
def get_data_info(self, index):
......@@ -243,7 +245,6 @@ class OpenLaneV2SubsetADataset(Custom3DDataset):
trans = []
cam2imgs = []
for i, camera in enumerate(frame.get_camera_list()):
assert camera == 'ring_front_center' if i == 0 else True, \
'the first image should be the front view'
......@@ -299,10 +300,12 @@ class OpenLaneV2SubsetADataset(Custom3DDataset):
frame = self.collection.get_frame_via_identifier((split, segment_id, timestamp))
gt_lc = np.array([lc['points'] for lc in frame.get_annotations_lane_centerlines()], dtype=np.float32)
gt_lc_labels = np.zeros((len(gt_lc), ), dtype=np.int64)
gt_lc_labels = np.zeros((len(gt_lc),), dtype=np.int64)
gt_te = np.array([element['points'].flatten() for element in frame.get_annotations_traffic_elements()], dtype=np.float32).reshape(-1, 4)
gt_te_labels = np.array([element['attribute']for element in frame.get_annotations_traffic_elements()], dtype=np.int64)
gt_te = np.array([element['points'].flatten() for element in frame.get_annotations_traffic_elements()],
dtype=np.float32).reshape(-1, 4)
gt_te_labels = np.array([element['attribute'] for element in frame.get_annotations_traffic_elements()],
dtype=np.int64)
gt_topology_lclc = frame.get_annotations_topology_lclc()
gt_topology_lcte = frame.get_annotations_topology_lcte()
......@@ -346,7 +349,7 @@ class OpenLaneV2SubsetADataset(Custom3DDataset):
if dump:
assert dump_dir is not None
assert check_results(pred_dict), "Please fill the missing keys."
assert check_results(pred_dict), 'Please fill the missing keys.'
output_path = os.path.join(dump_dir, 'result.pkl')
mmcv.dump(pred_dict, output_path)
......@@ -390,6 +393,7 @@ class OpenLaneV2SubsetADataset(Custom3DDataset):
def comb(n, k):
return factorial(n) // (factorial(k) * factorial(n - k))
n_points = 11
n_control = lanes.shape[1]
A = np.zeros((n_points, n_control))
......@@ -480,8 +484,10 @@ class OpenLaneV2SubsetADataset(Custom3DDataset):
if frame.get_annotations():
gt_lc = np.array([lc['points'] for lc in frame.get_annotations_lane_centerlines()])
gt_te = np.array([element['points'].flatten() for element in frame.get_annotations_traffic_elements()]).reshape(-1, 4)
gt_te_attr = np.array([element['attribute']for element in frame.get_annotations_traffic_elements()])
gt_te = np.array(
[element['points'].flatten() for element in frame.get_annotations_traffic_elements()]).reshape(-1,
4)
gt_te_attr = np.array([element['attribute'] for element in frame.get_annotations_traffic_elements()])
gt_topology_lclc = frame.get_annotations_topology_lclc()
gt_topology_lcte = frame.get_annotations_topology_lcte()
......@@ -497,7 +503,8 @@ class OpenLaneV2SubsetADataset(Custom3DDataset):
gt_te=gt_te, gt_te_attr=gt_te_attr, pred_te=pred_te, pred_te_attr=pred_te_attr,
)
for cam_idx, image in enumerate(images):
output_path = os.path.join(visualization_dir, f'{"/".join(key)}/pv_{frame.get_camera_list()[cam_idx]}.jpg')
output_path = os.path.join(visualization_dir,
f'{"/".join(key)}/pv_{frame.get_camera_list()[cam_idx]}.jpg')
mmcv.imwrite(image, output_path)
img_pts = [
......@@ -524,7 +531,8 @@ class OpenLaneV2SubsetADataset(Custom3DDataset):
gt_topology_lcte=gt_topology_lcte,
pred_topology_lcte=pred_topology_lcte,
)
output_path = os.path.join(visualization_dir, f'{"/".join(key)}/pv_{frame.get_camera_list()[0]}_topology.jpg')
output_path = os.path.join(visualization_dir,
f'{"/".join(key)}/pv_{frame.get_camera_list()[0]}_topology.jpg')
mmcv.imwrite(front_view, output_path)
# render bev
......
from .formating import *
from .loading import *
from .transforms import *
\ No newline at end of file
......@@ -21,7 +21,6 @@
# ==============================================================================
import numpy as np
from mmcv.parallel import DataContainer as DC
from mmdet.datasets import PIPELINES
from mmdet.datasets.pipelines import to_tensor
......
......@@ -20,11 +20,10 @@
# limitations under the License.
# ==============================================================================
import numpy as np
import mmcv
from mmdet.datasets import PIPELINES
import numpy as np
from mmdet3d.datasets.pipelines import LoadMultiViewImageFromFiles
from mmdet.datasets import PIPELINES
@PIPELINES.register_module()
......
......@@ -20,12 +20,12 @@
# limitations under the License.
# ==============================================================================
import numpy as np
from numpy import random
from math import factorial
import mmcv
import numpy as np
from mmdet.datasets import PIPELINES
from numpy import random
@PIPELINES.register_module()
......@@ -38,7 +38,7 @@ class ResizeFrontView:
assert 'ring_front_center' in results['img_paths'][0], \
'the first image should be the front view'
#image
# image
front_view = results['img'][0]
h, w, _ = front_view.shape
resiezed_front_view, w_scale, h_scale = mmcv.imresize(
......@@ -83,6 +83,7 @@ class ResizeFrontView:
return results
@PIPELINES.register_module()
class NormalizeMultiviewImage:
r"""
......@@ -104,7 +105,6 @@ class NormalizeMultiviewImage:
self.std = np.array(std, dtype=np.float32)
self.to_rgb = to_rgb
def __call__(self, results):
"""Call function to normalize images.
Args:
......@@ -124,6 +124,7 @@ class NormalizeMultiviewImage:
repr_str += f'(mean={self.mean}, std={self.std}, to_rgb={self.to_rgb})'
return repr_str
@PIPELINES.register_module()
class PhotoMetricDistortionMultiViewImage:
r"""
......@@ -170,7 +171,7 @@ class PhotoMetricDistortionMultiViewImage:
new_imgs = []
for img in imgs:
assert img.dtype == np.float32, \
'PhotoMetricDistortion needs the input image of dtype np.float32,'\
'PhotoMetricDistortion needs the input image of dtype np.float32,' \
' please set "to_float32=True" in "LoadImageFromFile" pipeline'
# random brightness
if random.randint(2):
......@@ -228,6 +229,7 @@ class PhotoMetricDistortionMultiViewImage:
repr_str += f'hue_delta={self.hue_delta})'
return repr_str
@PIPELINES.register_module()
class CustomPadMultiViewImage:
......@@ -256,14 +258,15 @@ class CustomPadMultiViewImage:
repr_str += f'pad_val={self.pad_val})'
return repr_str
@PIPELINES.register_module()
class CustomParameterizeLane:
def __init__(self, method, method_para):
method_list = ['bezier', 'polygon', 'bezier_Direction_attribute', 'bezier_Endpointfixed']
self.method = method
if not self.method in method_list:
raise Exception("Not implemented!")
if self.method not in method_list:
raise Exception('Not implemented!')
self.method_para = method_para
def __call__(self, results):
......@@ -294,7 +297,8 @@ class CustomParameterizeLane:
A[i, j] = self.comb(n_control - 1, j) * np.power(1 - t[i], n_control - 1 - j) * np.power(t[i], j)
A_BE = A[1:-1, 1:-1]
_points = points[1:-1]
_points = _points - A[1:-1, 0].reshape(-1, 1) @ points[0].reshape(1, -1) - A[1:-1, -1].reshape(-1, 1) @ points[-1].reshape(1, -1)
_points = _points - A[1:-1, 0].reshape(-1, 1) @ points[0].reshape(1, -1) - A[1:-1, -1].reshape(-1, 1) @ points[
-1].reshape(1, -1)
conts = np.linalg.lstsq(A_BE, _points, rcond=None)
......@@ -369,7 +373,7 @@ class CustomParameterizeLane:
sorted_y = np.array(centerline[:, 0])
points = np.array(list(zip(sorted_x, sorted_y)))
if key_rep not in ['Bounding Box', 'SME', 'Extreme Points']:
raise Exception(f"{key_rep} not existed!")
raise Exception(f'{key_rep} not existed!')
elif key_rep == 'Bounding Box':
res = np.array(
[points[:, 0].min(), points[:, 1].min(), points[:, 0].max(), points[:, 1].max()]).reshape((2, 2))
......
from .detectors import *
from .heads import *
from .necks import *
from .modules import *
from .backbones import *
......@@ -4,16 +4,17 @@
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
from collections import OrderedDict
import torch
import torch.nn as nn
from collections import OrderedDict
import torch.nn.functional as F
import torch.utils.checkpoint as checkpoint
from timm.models.layers import trunc_normal_, DropPath
from mmcv.runner import _load_checkpoint
from mmcv.cnn import constant_init, trunc_normal_init
from mmdet.utils import get_root_logger
from mmcv.runner import _load_checkpoint
from mmdet.models.builder import BACKBONES
import torch.nn.functional as F
from mmdet.utils import get_root_logger
from timm.models.layers import DropPath, trunc_normal_
from .ops_dcnv3 import modules as opsm
......@@ -187,7 +188,7 @@ class AttentiveBlock(nn.Module):
drop=0.,
attn_drop=0.,
drop_path=0.,
norm_layer="LN",
norm_layer='LN',
attn_head_dim=None,
out_dim=None):
super().__init__()
......@@ -577,7 +578,7 @@ class InternImage(nn.Module):
self.num_levels = len(depths)
self.depths = depths
self.channels = channels
self.num_features = int(channels * 2**(self.num_levels - 1))
self.num_features = int(channels * 2 ** (self.num_levels - 1))
self.post_norm = post_norm
self.mlp_ratio = mlp_ratio
self.init_cfg = init_cfg
......@@ -588,9 +589,9 @@ class InternImage(nn.Module):
logger.info(f'using activation layer: {act_layer}')
logger.info(f'using main norm layer: {norm_layer}')
logger.info(f'using dpr: {drop_path_type}, {drop_path_rate}')
logger.info(f"level2_post_norm: {level2_post_norm}")
logger.info(f"level2_post_norm_block_ids: {level2_post_norm_block_ids}")
logger.info(f"res_post_norm: {res_post_norm}")
logger.info(f'level2_post_norm: {level2_post_norm}')
logger.info(f'level2_post_norm_block_ids: {level2_post_norm_block_ids}')
logger.info(f'res_post_norm: {res_post_norm}')
in_chans = 3
self.patch_embed = StemLayer(in_chans=in_chans,
......@@ -612,7 +613,7 @@ class InternImage(nn.Module):
i == 2) else None # for InternImage-H/G
level = InternImageBlock(
core_op=getattr(opsm, core_op),
channels=int(channels * 2**i),
channels=int(channels * 2 ** i),
depth=depths[i],
groups=groups[i],
mlp_ratio=self.mlp_ratio,
......
......@@ -3,5 +3,3 @@
# Copyright (c) 2022 OpenGVLab
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
from .dcnv3_func import DCNv3Function, dcnv3_core_pytorch
......@@ -4,16 +4,14 @@
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
from __future__ import absolute_import, division, print_function
import DCNv3
import torch
import torch.nn.functional as F
from torch.autograd import Function
from torch.autograd.function import once_differentiable
from torch.cuda.amp import custom_bwd, custom_fwd
import DCNv3
class DCNv3Function(Function):
......@@ -88,7 +86,9 @@ class DCNv3Function(Function):
im2col_step_i=int(im2col_step),
)
def _get_reference_points(spatial_shapes, device, kernel_h, kernel_w, dilation_h, dilation_w, pad_h=0, pad_w=0, stride_h=1, stride_w=1):
def _get_reference_points(spatial_shapes, device, kernel_h, kernel_w, dilation_h, dilation_w, pad_h=0, pad_w=0,
stride_h=1, stride_w=1):
_, H_, W_, _ = spatial_shapes
H_out = (H_ - (dilation_h * (kernel_h - 1) + 1)) // stride_h + 1
W_out = (W_ - (dilation_w * (kernel_w - 1) + 1)) // stride_w + 1
......@@ -137,7 +137,7 @@ def _generate_dilation_grids(spatial_shapes, kernel_h, kernel_w, dilation_h, dil
device=device))
points_list.extend([x / W_, y / H_])
grid = torch.stack(points_list, -1).reshape(-1, 1, 2).\
grid = torch.stack(points_list, -1).reshape(-1, 1, 2). \
repeat(1, group, 1).permute(1, 0, 2)
grid = grid.reshape(1, 1, 1, group * kernel_h * kernel_w, 2)
......@@ -161,8 +161,8 @@ def dcnv3_core_pytorch(
input.shape, input.device, kernel_h, kernel_w, dilation_h, dilation_w, pad_h, pad_w, stride_h, stride_w)
grid = _generate_dilation_grids(
input.shape, kernel_h, kernel_w, dilation_h, dilation_w, group, input.device)
spatial_norm = torch.tensor([W_in, H_in]).reshape(1, 1, 1, 2).\
repeat(1, 1, 1, group*kernel_h*kernel_w).to(input.device)
spatial_norm = torch.tensor([W_in, H_in]).reshape(1, 1, 1, 2). \
repeat(1, 1, 1, group * kernel_h * kernel_w).to(input.device)
sampling_locations = (ref + grid * offset_scale).repeat(N_, 1, 1, 1, 1).flatten(3, 4) + \
offset * offset_scale / spatial_norm
......@@ -170,19 +170,19 @@ def dcnv3_core_pytorch(
P_ = kernel_h * kernel_w
sampling_grids = 2 * sampling_locations - 1
# N_, H_in, W_in, group*group_channels -> N_, H_in*W_in, group*group_channels -> N_, group*group_channels, H_in*W_in -> N_*group, group_channels, H_in, W_in
input_ = input.view(N_, H_in*W_in, group*group_channels).transpose(1, 2).\
reshape(N_*group, group_channels, H_in, W_in)
input_ = input.view(N_, H_in * W_in, group * group_channels).transpose(1, 2). \
reshape(N_ * group, group_channels, H_in, W_in)
# N_, H_out, W_out, group*P_*2 -> N_, H_out*W_out, group, P_, 2 -> N_, group, H_out*W_out, P_, 2 -> N_*group, H_out*W_out, P_, 2
sampling_grid_ = sampling_grids.view(N_, H_out*W_out, group, P_, 2).transpose(1, 2).\
sampling_grid_ = sampling_grids.view(N_, H_out * W_out, group, P_, 2).transpose(1, 2). \
flatten(0, 1)
# N_*group, group_channels, H_out*W_out, P_
sampling_input_ = F.grid_sample(
input_, sampling_grid_, mode='bilinear', padding_mode='zeros', align_corners=False)
# (N_, H_out, W_out, group*P_) -> N_, H_out*W_out, group, P_ -> (N_, group, H_out*W_out, P_) -> (N_*group, 1, H_out*W_out, P_)
mask = mask.view(N_, H_out*W_out, group, P_).transpose(1, 2).\
reshape(N_*group, 1, H_out*W_out, P_)
mask = mask.view(N_, H_out * W_out, group, P_).transpose(1, 2). \
reshape(N_ * group, 1, H_out * W_out, P_)
output = (sampling_input_ * mask).sum(-1).view(N_,
group*group_channels, H_out*W_out)
group * group_channels, H_out * W_out)
return output.transpose(1, 2).reshape(N_, H_out, W_out, -1).contiguous()
......@@ -3,5 +3,3 @@
# Copyright (c) 2022 OpenGVLab
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
from .dcnv3 import DCNv3, DCNv3_pytorch
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment