"git@developer.sourcefind.cn:change/sglang.git" did not exist on "ca9291181df4b6a27c371eb71bf740263e5eb3ef"
Unverified Commit 4c5c4299 authored by Tai-Wang's avatar Tai-Wang Committed by GitHub
Browse files

Merge branch 'master' into v1.0.0.dev0

parents b420cd1d 86cc487c
...@@ -3,74 +3,74 @@ ...@@ -3,74 +3,74 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 7, "execution_count": 7,
"source": [
"from mmdet3d.apis import init_model, inference_detector, show_result_meshlab"
],
"outputs": [],
"metadata": { "metadata": {
"pycharm": { "pycharm": {
"is_executing": false "is_executing": false
} }
}, }
"outputs": [],
"source": [
"from mmdet3d.apis import init_detector, inference_detector, show_result_meshlab"
]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 8, "execution_count": 8,
"metadata": {
"pycharm": {
"is_executing": false
}
},
"outputs": [],
"source": [ "source": [
"config_file = '../configs/second/hv_second_secfpn_6x8_80e_kitti-3d-car.py'\n", "config_file = '../configs/second/hv_second_secfpn_6x8_80e_kitti-3d-car.py'\n",
"# download the checkpoint from model zoo and put it in `checkpoints/`\n", "# download the checkpoint from model zoo and put it in `checkpoints/`\n",
"checkpoint_file = '../work_dirs/second/epoch_40.pth'" "checkpoint_file = '../work_dirs/second/epoch_40.pth'"
] ],
"outputs": [],
"metadata": {
"pycharm": {
"is_executing": false
}
}
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 9, "execution_count": 9,
"source": [
"# build the model from a config file and a checkpoint file\n",
"model = init_model(config_file, checkpoint_file, device='cuda:0')"
],
"outputs": [],
"metadata": { "metadata": {
"pycharm": { "pycharm": {
"is_executing": false "is_executing": false
} }
}, }
"outputs": [],
"source": [
"# build the model from a config file and a checkpoint file\n",
"model = init_detector(config_file, checkpoint_file, device='cuda:0')"
]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 10, "execution_count": 10,
"metadata": {
"pycharm": {
"is_executing": false
}
},
"outputs": [],
"source": [ "source": [
"# test a single sample\n", "# test a single sample\n",
"pcd = 'kitti_000008.bin'\n", "pcd = 'kitti_000008.bin'\n",
"result, data = inference_detector(model, pcd)" "result, data = inference_detector(model, pcd)"
] ],
}, "outputs": [],
{
"cell_type": "code",
"execution_count": 11,
"metadata": { "metadata": {
"pycharm": { "pycharm": {
"is_executing": false "is_executing": false
} }
}, }
"outputs": [], },
{
"cell_type": "code",
"execution_count": 11,
"source": [ "source": [
"# show the results\n", "# show the results\n",
"out_dir = './'\n", "out_dir = './'\n",
"show_result_meshlab(data, result, out_dir)" "show_result_meshlab(data, result, out_dir)"
] ],
"outputs": [],
"metadata": {
"pycharm": {
"is_executing": false
}
}
} }
], ],
"metadata": { "metadata": {
......
...@@ -28,7 +28,7 @@ class GatherPoints(Function): ...@@ -28,7 +28,7 @@ class GatherPoints(Function):
B, npoint = indices.size() B, npoint = indices.size()
_, C, N = features.size() _, C, N = features.size()
output = torch.cuda.FloatTensor(B, C, npoint) output = features.new_zeros((B, C, npoint))
gather_points_ext.gather_points_wrapper(B, C, N, npoint, features, gather_points_ext.gather_points_wrapper(B, C, N, npoint, features,
indices, output) indices, output)
...@@ -42,7 +42,7 @@ class GatherPoints(Function): ...@@ -42,7 +42,7 @@ class GatherPoints(Function):
idx, C, N = ctx.for_backwards idx, C, N = ctx.for_backwards
B, npoint = idx.size() B, npoint = idx.size()
grad_features = torch.cuda.FloatTensor(B, C, N).zero_() grad_features = grad_out.new_zeros((B, C, N))
grad_out_data = grad_out.data.contiguous() grad_out_data = grad_out.data.contiguous()
gather_points_ext.gather_points_grad_wrapper(B, C, N, npoint, gather_points_ext.gather_points_grad_wrapper(B, C, N, npoint,
grad_out_data, idx, grad_out_data, idx,
......
#include <ATen/cuda/CUDAContext.h> #include <ATen/cuda/CUDAContext.h>
#include <ATen/TensorUtils.h>
#include <THC/THC.h> #include <THC/THC.h>
#include <torch/extension.h> #include <torch/extension.h>
#include <torch/serialize/tensor.h> #include <torch/serialize/tensor.h>
#include <vector> #include <vector>
extern THCState *state; extern THCState *state;
int gather_points_wrapper(int b, int c, int n, int npoints, int gather_points_wrapper(int b, int c, int n, int npoints,
at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor& points_tensor, at::Tensor& idx_tensor,
at::Tensor out_tensor); at::Tensor& out_tensor);
void gather_points_kernel_launcher(int b, int c, int n, int npoints, void gather_points_kernel_launcher(int b, int c, int n, int npoints,
const float *points, const int *idx, const at::Tensor& points_tensor,
float *out, cudaStream_t stream); const at::Tensor& idx_tensor,
at::Tensor& out_tensor);
int gather_points_grad_wrapper(int b, int c, int n, int npoints, int gather_points_grad_wrapper(int b, int c, int n, int npoints,
at::Tensor grad_out_tensor, at::Tensor& grad_out_tensor,
at::Tensor idx_tensor, at::Tensor& idx_tensor,
at::Tensor grad_points_tensor); at::Tensor& grad_points_tensor);
void gather_points_grad_kernel_launcher(int b, int c, int n, int npoints, void gather_points_grad_kernel_launcher(int b, int c, int n, int npoints,
const float *grad_out, const int *idx, const at::Tensor& grad_out_tensor,
float *grad_points, const at::Tensor& idx_tensor,
cudaStream_t stream); at::Tensor& grad_points_tensor);
int gather_points_wrapper(int b, int c, int n, int npoints, int gather_points_wrapper(int b, int c, int n, int npoints,
at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor& points_tensor, at::Tensor& idx_tensor,
at::Tensor out_tensor) { at::Tensor& out_tensor)
const float *points = points_tensor.data_ptr<float>(); {
const int *idx = idx_tensor.data_ptr<int>(); gather_points_kernel_launcher(b, c, n, npoints, points_tensor, idx_tensor, out_tensor);
float *out = out_tensor.data_ptr<float>();
cudaStream_t stream = at::cuda::getCurrentCUDAStream().stream();
gather_points_kernel_launcher(b, c, n, npoints, points, idx, out, stream);
return 1; return 1;
} }
int gather_points_grad_wrapper(int b, int c, int n, int npoints, int gather_points_grad_wrapper(int b, int c, int n, int npoints,
at::Tensor grad_out_tensor, at::Tensor& grad_out_tensor,
at::Tensor idx_tensor, at::Tensor& idx_tensor,
at::Tensor grad_points_tensor) { at::Tensor& grad_points_tensor)
const float *grad_out = grad_out_tensor.data_ptr<float>(); {
const int *idx = idx_tensor.data_ptr<int>(); gather_points_grad_kernel_launcher(b, c, n, npoints, grad_out_tensor, idx_tensor,
float *grad_points = grad_points_tensor.data_ptr<float>(); grad_points_tensor);
cudaStream_t stream = at::cuda::getCurrentCUDAStream().stream();
gather_points_grad_kernel_launcher(b, c, n, npoints, grad_out, idx,
grad_points, stream);
return 1; return 1;
} }
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m)
{
m.def("gather_points_wrapper", &gather_points_wrapper, m.def("gather_points_wrapper", &gather_points_wrapper,
"gather_points_wrapper"); "gather_points_wrapper");
m.def("gather_points_grad_wrapper", &gather_points_grad_wrapper, m.def("gather_points_grad_wrapper", &gather_points_grad_wrapper,
......
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <ATen/ATen.h>
#include <ATen/cuda/CUDAContext.h>
#include <c10/cuda/CUDAGuard.h>
#include <torch/types.h>
#include <ATen/cuda/CUDAApplyUtils.cuh>
#define TOTAL_THREADS 1024 #define TOTAL_THREADS 1024
#define THREADS_PER_BLOCK 256 #define THREADS_PER_BLOCK 256
#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0)) #define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
template <typename scalar_t>
__global__ void gather_points_kernel(int b, int c, int n, int m, __global__ void gather_points_kernel(int b, int c, int n, int m,
const float *__restrict__ points, const scalar_t *__restrict__ points,
const int *__restrict__ idx, const int *__restrict__ idx,
float *__restrict__ out) { scalar_t *__restrict__ out) {
// points: (B, C, N) // points: (B, C, N)
// idx: (B, M) // idx: (B, M)
// output: // output:
...@@ -26,8 +33,10 @@ __global__ void gather_points_kernel(int b, int c, int n, int m, ...@@ -26,8 +33,10 @@ __global__ void gather_points_kernel(int b, int c, int n, int m,
} }
void gather_points_kernel_launcher(int b, int c, int n, int npoints, void gather_points_kernel_launcher(int b, int c, int n, int npoints,
const float *points, const int *idx, const at::Tensor& points_tensor,
float *out, cudaStream_t stream) { const at::Tensor& idx_tensor,
at::Tensor& out_tensor)
{
// points: (B, C, N) // points: (B, C, N)
// idx: (B, npoints) // idx: (B, npoints)
// output: // output:
...@@ -35,23 +44,33 @@ void gather_points_kernel_launcher(int b, int c, int n, int npoints, ...@@ -35,23 +44,33 @@ void gather_points_kernel_launcher(int b, int c, int n, int npoints,
cudaError_t err; cudaError_t err;
dim3 blocks(DIVUP(npoints, THREADS_PER_BLOCK), c, dim3 blocks(DIVUP(npoints, THREADS_PER_BLOCK), c,
b); // blockIdx.x(col), blockIdx.y(row) b); // blockIdx.x(col), blockIdx.y(row)
dim3 threads(THREADS_PER_BLOCK); dim3 threads(THREADS_PER_BLOCK);
cudaStream_t stream = at::cuda::getCurrentCUDAStream().stream();
gather_points_kernel<<<blocks, threads, 0, stream>>>(b, c, n, npoints, points,
idx, out); AT_DISPATCH_FLOATING_TYPES_AND_HALF(
out_tensor.scalar_type(), "gather_points_kernel",
[&]
{
const scalar_t *points = points_tensor.data_ptr<scalar_t>();
const int *idx = idx_tensor.data_ptr<int>();
scalar_t *out = out_tensor.data_ptr<scalar_t>();
gather_points_kernel<<<blocks, threads, 0, stream>>>(b, c, n, npoints, points,
idx, out);
});
err = cudaGetLastError(); err = cudaGetLastError();
if (cudaSuccess != err) { if (cudaSuccess != err)
{
fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
exit(-1); exit(-1);
} }
} }
template <typename scalar_t>
__global__ void gather_points_grad_kernel(int b, int c, int n, int m, __global__ void gather_points_grad_kernel(int b, int c, int n, int m,
const float *__restrict__ grad_out, const scalar_t *__restrict__ grad_out,
const int *__restrict__ idx, const int *__restrict__ idx,
float *__restrict__ grad_points) { scalar_t *__restrict__ grad_points) {
// grad_out: (B, C, M) // grad_out: (B, C, M)
// idx: (B, M) // idx: (B, M)
// output: // output:
...@@ -70,9 +89,10 @@ __global__ void gather_points_grad_kernel(int b, int c, int n, int m, ...@@ -70,9 +89,10 @@ __global__ void gather_points_grad_kernel(int b, int c, int n, int m,
} }
void gather_points_grad_kernel_launcher(int b, int c, int n, int npoints, void gather_points_grad_kernel_launcher(int b, int c, int n, int npoints,
const float *grad_out, const int *idx, const at::Tensor& grad_out_tensor,
float *grad_points, const at::Tensor& idx_tensor,
cudaStream_t stream) { at::Tensor& grad_points_tensor)
{
// grad_out: (B, C, npoints) // grad_out: (B, C, npoints)
// idx: (B, npoints) // idx: (B, npoints)
// output: // output:
...@@ -80,14 +100,24 @@ void gather_points_grad_kernel_launcher(int b, int c, int n, int npoints, ...@@ -80,14 +100,24 @@ void gather_points_grad_kernel_launcher(int b, int c, int n, int npoints,
cudaError_t err; cudaError_t err;
dim3 blocks(DIVUP(npoints, THREADS_PER_BLOCK), c, dim3 blocks(DIVUP(npoints, THREADS_PER_BLOCK), c,
b); // blockIdx.x(col), blockIdx.y(row) b); // blockIdx.x(col), blockIdx.y(row)
dim3 threads(THREADS_PER_BLOCK); dim3 threads(THREADS_PER_BLOCK);
gather_points_grad_kernel<<<blocks, threads, 0, stream>>>( cudaStream_t stream = at::cuda::getCurrentCUDAStream().stream();
b, c, n, npoints, grad_out, idx, grad_points); AT_DISPATCH_FLOATING_TYPES_AND_HALF(
grad_points_tensor.scalar_type(), "gather_points_grad_kernel",
[&]
{
const scalar_t *grad_out = grad_out_tensor.data_ptr<scalar_t>();
const int *idx = idx_tensor.data_ptr<int>();
scalar_t *grad_points = grad_points_tensor.data_ptr<scalar_t>();
gather_points_grad_kernel<scalar_t><<<blocks, threads, 0, stream>>>(
b, c, n, npoints, grad_out, idx, grad_points);
});
err = cudaGetLastError(); err = cudaGetLastError();
if (cudaSuccess != err) { if (cudaSuccess != err)
{
fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
exit(-1); exit(-1);
} }
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
from typing import Tuple from typing import Tuple
import torch import torch
from mmcv.runner import force_fp32
from torch import nn as nn from torch import nn as nn
from torch.autograd import Function from torch.autograd import Function
...@@ -62,7 +63,9 @@ class QueryAndGroup(nn.Module): ...@@ -62,7 +63,9 @@ class QueryAndGroup(nn.Module):
if self.max_radius is None: if self.max_radius is None:
assert not self.normalize_xyz, \ assert not self.normalize_xyz, \
'can not normalize grouped xyz when max_radius is None' 'can not normalize grouped xyz when max_radius is None'
self.fp16_enabled = False
@force_fp32()
def forward(self, points_xyz, center_xyz, features=None): def forward(self, points_xyz, center_xyz, features=None):
"""forward. """forward.
...@@ -143,7 +146,9 @@ class GroupAll(nn.Module): ...@@ -143,7 +146,9 @@ class GroupAll(nn.Module):
def __init__(self, use_xyz: bool = True): def __init__(self, use_xyz: bool = True):
super().__init__() super().__init__()
self.use_xyz = use_xyz self.use_xyz = use_xyz
self.fp16_enabled = False
@force_fp32()
def forward(self, def forward(self,
xyz: torch.Tensor, xyz: torch.Tensor,
new_xyz: torch.Tensor, new_xyz: torch.Tensor,
......
...@@ -2,9 +2,16 @@ ...@@ -2,9 +2,16 @@
import pytest import pytest
import torch import torch
from mmdet3d.ops import (ball_query, furthest_point_sample, from mmdet3d.ops import (
furthest_point_sample_with_dist, gather_points, ball_query,
grouping_operation, knn, three_interpolate, three_nn) furthest_point_sample,
furthest_point_sample_with_dist,
gather_points,
grouping_operation,
knn,
three_interpolate,
three_nn,
)
def test_fps(): def test_fps():
...@@ -236,6 +243,8 @@ def test_gather_points(): ...@@ -236,6 +243,8 @@ def test_gather_points():
[-0.7172, 0.0462, -0.6227, -0.7172, -0.7172, -0.7172]]]).cuda() [-0.7172, 0.0462, -0.6227, -0.7172, -0.7172, -0.7172]]]).cuda()
assert torch.allclose(output, expected_output) assert torch.allclose(output, expected_output)
output_half = gather_points(features.half(), idx)
assert torch.allclose(output_half, expected_output.half())
def test_three_interpolate(): def test_three_interpolate():
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment