Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
MMCV
Commits
a4dc2a72
Unverified
Commit
a4dc2a72
authored
Dec 24, 2021
by
pc
Committed by
GitHub
Dec 24, 2021
Browse files
support device dispatch in parrots (#1588)
parent
0bcbeadb
Changes
46
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
143 additions
and
293 deletions
+143
-293
mmcv/ops/csrc/parrots/three_nn.cpp
mmcv/ops/csrc/parrots/three_nn.cpp
+7
-19
mmcv/ops/csrc/parrots/tin_shift.cpp
mmcv/ops/csrc/parrots/tin_shift.cpp
+7
-39
mmcv/ops/csrc/parrots/upfirdn2d.cpp
mmcv/ops/csrc/parrots/upfirdn2d.cpp
+111
-19
mmcv/ops/csrc/parrots/voxelization.cpp
mmcv/ops/csrc/parrots/voxelization.cpp
+15
-62
mmcv/ops/csrc/parrots/voxelization_cpu.cpp
mmcv/ops/csrc/parrots/voxelization_cpu.cpp
+0
-152
setup.py
setup.py
+3
-2
No files found.
mmcv/ops/csrc/parrots/three_nn.cpp
View file @
a4dc2a72
...
...
@@ -2,29 +2,17 @@
// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/interpolate.cpp
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
#ifdef MMCV_WITH_CUDA
void
ThreeNNForwardCUDAKernelLauncher
(
int
b
,
int
n
,
int
m
,
const
Tensor
unknown
,
const
Tensor
known
,
Tensor
dist2
,
Tensor
idx
);
void
three_nn_forward_cuda
(
int
b
,
int
n
,
int
m
,
const
Tensor
unknown
,
void
three_nn_forward_impl
(
int
b
,
int
n
,
int
m
,
const
Tensor
unknown
,
const
Tensor
known
,
Tensor
dist2
,
Tensor
idx
)
{
ThreeNNForwardCUDAKernelLauncher
(
b
,
n
,
m
,
unknown
,
known
,
dist2
,
idx
);
}
;
#endif
DISPATCH_DEVICE_IMPL
(
three_nn_forward_impl
,
b
,
n
,
m
,
unknown
,
known
,
dist2
,
idx
)
;
}
void
three_nn_forward
(
Tensor
unknown_tensor
,
Tensor
known_tensor
,
Tensor
dist2_tensor
,
Tensor
idx_tensor
,
int
b
,
int
n
,
int
m
)
{
if
(
unknown_tensor
.
device
().
is_cuda
())
{
#ifdef MMCV_WITH_CUDA
three_nn_forward_cuda
(
b
,
n
,
m
,
unknown_tensor
,
known_tensor
,
dist2_tensor
,
idx_tensor
);
#else
AT_ERROR
(
"three_nn is not compiled with GPU support"
);
#endif
}
else
{
AT_ERROR
(
"three_nn is not implemented on CPU"
);
}
three_nn_forward_impl
(
b
,
n
,
m
,
unknown_tensor
,
known_tensor
,
dist2_tensor
,
idx_tensor
);
}
mmcv/ops/csrc/parrots/tin_shift.cpp
View file @
a4dc2a72
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
#ifdef MMCV_WITH_CUDA
void
TINShiftForwardCUDAKernelLauncher
(
Tensor
input
,
Tensor
shift
,
Tensor
output
);
void
TINShiftBackwardCUDAKernelLauncher
(
Tensor
grad_output
,
Tensor
shift
,
Tensor
grad_input
);
void
tin_shift_forward_cuda
(
Tensor
input
,
Tensor
shift
,
Tensor
output
)
{
TINShiftForwardCUDAKernelLauncher
(
input
,
shift
,
output
);
void
tin_shift_forward_impl
(
Tensor
input
,
Tensor
shift
,
Tensor
output
)
{
DISPATCH_DEVICE_IMPL
(
tin_shift_forward_impl
,
input
,
shift
,
output
);
}
void
tin_shift_backward_
cuda
(
Tensor
grad_output
,
Tensor
shift
,
void
tin_shift_backward_
impl
(
Tensor
grad_output
,
Tensor
shift
,
Tensor
grad_input
)
{
TINS
hift
B
ackward
CUDAKernelLauncher
(
grad_output
,
shift
,
grad_input
);
DISPATCH_DEVICE_IMPL
(
tin_s
hift
_b
ackward
_impl
,
grad_output
,
shift
,
grad_input
);
}
#endif
void
tin_shift_forward
(
Tensor
input
,
Tensor
shift
,
Tensor
output
)
{
if
(
input
.
device
().
is_cuda
())
{
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT
(
input
);
CHECK_CUDA_INPUT
(
shift
);
CHECK_CUDA_INPUT
(
output
);
tin_shift_forward_cuda
(
input
,
shift
,
output
);
#else
AT_ERROR
(
"TINShift is not compiled with GPU support"
);
#endif
}
else
{
AT_ERROR
(
"TINShift is not implemented on CPU"
);
}
tin_shift_forward_impl
(
input
,
shift
,
output
);
}
void
tin_shift_backward
(
Tensor
grad_output
,
Tensor
shift
,
Tensor
grad_input
)
{
if
(
grad_output
.
device
().
is_cuda
())
{
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT
(
grad_output
);
CHECK_CUDA_INPUT
(
shift
);
CHECK_CUDA_INPUT
(
grad_input
);
tin_shift_backward_cuda
(
grad_output
,
shift
,
grad_input
);
#else
AT_ERROR
(
"TINShift is not compiled with GPU support"
);
#endif
}
else
{
AT_ERROR
(
"TINShift is not implemented on CPU"
);
}
tin_shift_backward_impl
(
grad_output
,
shift
,
grad_input
);
}
mmcv/ops/csrc/parrots/upfirdn2d.cpp
View file @
a4dc2a72
// Copyright (c) OpenMMLab. All rights reserved
// from
// Modified from
// https://github.com/rosinality/stylegan2-pytorch/blob/master/op/upfirdn2d.cpp
#include "pytorch_cpp_helper.hpp"
#ifdef MMCV_WITH_CUDA
torch
::
Tensor
upfirdn2d_op
(
const
torch
::
Tensor
&
input
,
const
torch
::
Tensor
&
kernel
,
int
up_x
,
int
up_y
,
int
down_x
,
int
down_y
,
int
pad_x0
,
int
pad_x1
,
int
pad_y0
,
int
pad_y1
);
/*
Copyright (c) 2021, NVIDIA Corporation. All rights reserved.
NVIDIA Source Code License for StyleGAN2 with Adaptive Discriminator
Augmentation (ADA)
=======================================================================
1. Definitions
"Licensor" means any person or entity that distributes its Work.
"Software" means the original work of authorship made available under
this License.
"Work" means the Software and any additions to or derivative works of
the Software that are made available under this License.
The terms "reproduce," "reproduction," "derivative works," and
"distribution" have the meaning as provided under U.S. copyright law;
provided, however, that for the purposes of this License, derivative
works shall not include works that remain separable from, or merely
link (or bind by name) to the interfaces of, the Work.
Works, including the Software, are "made available" under this License
by including in or with the Work either (a) a copyright notice
referencing the applicability of this License to the Work, or (b) a
copy of this License.
2. License Grants
2.1 Copyright Grant. Subject to the terms and conditions of this
License, each Licensor grants to you a perpetual, worldwide,
non-exclusive, royalty-free, copyright license to reproduce,
prepare derivative works of, publicly display, publicly perform,
sublicense and distribute its Work and any resulting derivative
works in any form.
3. Limitations
3.1 Redistribution. You may reproduce or distribute the Work only
if (a) you do so under this License, (b) you include a complete
copy of this License with your distribution, and (c) you retain
without modification any copyright, patent, trademark, or
attribution notices that are present in the Work.
3.2 Derivative Works. You may specify that additional or different
terms apply to the use, reproduction, and distribution of your
derivative works of the Work ("Your Terms") only if (a) Your Terms
provide that the use limitation in Section 3.3 applies to your
derivative works, and (b) you identify the specific derivative
works that are subject to Your Terms. Notwithstanding Your Terms,
this License (including the redistribution requirements in Section
3.1) will continue to apply to the Work itself.
#endif
3.3 Use Limitation. The Work and any derivative works thereof only
may be used or intended for use non-commercially. Notwithstanding
the foregoing, NVIDIA and its affiliates may use the Work and any
derivative works commercially. As used herein, "non-commercially"
means for research or evaluation purposes only.
3.4 Patent Claims. If you bring or threaten to bring a patent claim
against any Licensor (including any claim, cross-claim or
counterclaim in a lawsuit) to enforce any patents that you allege
are infringed by any Work, then your rights under this License from
such Licensor (including the grant in Section 2.1) will terminate
immediately.
3.5 Trademarks. This License does not grant any rights to use any
Licensor’s or its affiliates’ names, logos, or trademarks, except
as necessary to reproduce the notices described in this License.
3.6 Termination. If you violate any term of this License, then your
rights under this License (including the grant in Section 2.1) will
terminate immediately.
4. Disclaimer of Warranty.
THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR
NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER
THIS LICENSE.
5. Limitation of Liability.
EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL
THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE
SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT,
INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF
OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK
(INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION,
LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER
COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF
THE POSSIBILITY OF SUCH DAMAGES.
=======================================================================
*/
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
torch
::
Tensor
upfirdn2d_op_impl
(
const
torch
::
Tensor
&
input
,
const
torch
::
Tensor
&
kernel
,
int
up_x
,
int
up_y
,
int
down_x
,
int
down_y
,
int
pad_x0
,
int
pad_x1
,
int
pad_y0
,
int
pad_y1
)
{
return
DISPATCH_DEVICE_IMPL
(
upfirdn2d_op_impl
,
input
,
kernel
,
up_x
,
up_y
,
down_x
,
down_y
,
pad_x0
,
pad_x1
,
pad_y0
,
pad_y1
);
}
torch
::
Tensor
upfirdn2d
(
const
torch
::
Tensor
&
input
,
const
torch
::
Tensor
&
kernel
,
torch
::
Tensor
upfirdn2d
(
const
torch
::
Tensor
&
input
,
const
torch
::
Tensor
&
kernel
,
int
up_x
,
int
up_y
,
int
down_x
,
int
down_y
,
int
pad_x0
,
int
pad_x1
,
int
pad_y0
,
int
pad_y1
)
{
#ifdef MMCV_WITH_CUDA
CHECK_CUDA
(
input
);
CHECK_CUDA
(
kernel
);
return
upfirdn2d_op
(
input
,
kernel
,
up_x
,
up_y
,
down_x
,
down_y
,
pad_x0
,
pad_x1
,
pad_y0
,
pad_y1
);
#else
AT_ERROR
(
"UpFirDn2d is not compiled with GPU support"
);
#endif
return
upfirdn2d_op_impl
(
input
,
kernel
,
up_x
,
up_y
,
down_x
,
down_y
,
pad_x0
,
pad_x1
,
pad_y0
,
pad_y1
);
}
mmcv/ops/csrc/parrots/voxelization.cpp
View file @
a4dc2a72
// Copyright (c) OpenMMLab. All rights reserved.
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
#ifdef MMCV_WITH_CUDA
int
HardVoxelizeForwardCUDAKernelLauncher
(
const
at
::
Tensor
&
points
,
at
::
Tensor
&
voxels
,
at
::
Tensor
&
coors
,
at
::
Tensor
&
num_points_per_voxel
,
const
std
::
vector
<
float
>
voxel_size
,
const
std
::
vector
<
float
>
coors_range
,
const
int
max_points
,
const
int
max_voxels
,
const
int
NDim
=
3
);
int
hard_voxelize_forward_cuda
(
const
at
::
Tensor
&
points
,
at
::
Tensor
&
voxels
,
int
hard_voxelize_forward_impl
(
const
at
::
Tensor
&
points
,
at
::
Tensor
&
voxels
,
at
::
Tensor
&
coors
,
at
::
Tensor
&
num_points_per_voxel
,
const
std
::
vector
<
float
>
voxel_size
,
const
std
::
vector
<
float
>
coors_range
,
const
int
max_points
,
const
int
max_voxels
,
const
int
NDim
=
3
)
{
return
HardVoxelizeForwardCUDAKernelLauncher
(
points
,
voxels
,
coors
,
num_points_per_voxel
,
voxel_size
,
coors_range
,
max_points
,
max_voxels
,
NDim
);
};
void
DynamicVoxelizeForwardCUDAKernelLauncher
(
const
at
::
Tensor
&
points
,
at
::
Tensor
&
coors
,
const
std
::
vector
<
float
>
voxel_size
,
const
std
::
vector
<
float
>
coors_range
,
const
int
NDim
=
3
);
return
DISPATCH_DEVICE_IMPL
(
hard_voxelize_forward_impl
,
points
,
voxels
,
coors
,
num_points_per_voxel
,
voxel_size
,
coors_range
,
max_points
,
max_voxels
,
NDim
);
}
void
dynamic_voxelize_forward_
cuda
(
const
at
::
Tensor
&
points
,
at
::
Tensor
&
coors
,
void
dynamic_voxelize_forward_
impl
(
const
at
::
Tensor
&
points
,
at
::
Tensor
&
coors
,
const
std
::
vector
<
float
>
voxel_size
,
const
std
::
vector
<
float
>
coors_range
,
const
int
NDim
=
3
)
{
DynamicVoxelizeForwardCUDAKernelLauncher
(
points
,
coors
,
voxel_size
,
coors_range
,
NDim
);
};
#endif
int
hard_voxelize_forward_cpu
(
const
at
::
Tensor
&
points
,
at
::
Tensor
&
voxels
,
at
::
Tensor
&
coors
,
at
::
Tensor
&
num_points_per_voxel
,
const
std
::
vector
<
float
>
voxel_size
,
const
std
::
vector
<
float
>
coors_range
,
const
int
max_points
,
const
int
max_voxels
,
const
int
NDim
=
3
);
void
dynamic_voxelize_forward_cpu
(
const
at
::
Tensor
&
points
,
at
::
Tensor
&
coors
,
const
std
::
vector
<
float
>
voxel_size
,
const
std
::
vector
<
float
>
coors_range
,
const
int
NDim
=
3
);
DISPATCH_DEVICE_IMPL
(
dynamic_voxelize_forward_impl
,
points
,
coors
,
voxel_size
,
coors_range
,
NDim
);
}
void
hard_voxelize_forward
(
const
at
::
Tensor
&
points
,
const
at
::
Tensor
&
voxel_size
,
...
...
@@ -60,21 +35,10 @@ void hard_voxelize_forward(const at::Tensor &points,
std
::
vector
<
float
>
coors_range_v
(
coors_range
.
data_ptr
<
float
>
(),
coors_range
.
data_ptr
<
float
>
()
+
coors_range
.
numel
());
if
(
points
.
device
().
is_cuda
())
{
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT
(
points
);
*
voxel_num_data
=
hard_voxelize_forward_cuda
(
points
,
voxels
,
coors
,
num_points_per_voxel
,
voxel_size_v
,
coors_range_v
,
max_points
,
max_voxels
,
NDim
);
#else
AT_ERROR
(
"hard_voxelize is not compiled with GPU support"
);
#endif
}
else
{
*
voxel_num_data
=
hard_voxelize_forward_cpu
(
points
,
voxels
,
coors
,
num_points_per_voxel
,
voxel_size_v
,
coors_range_v
,
max_points
,
max_voxels
,
NDim
);
}
*
voxel_num_data
=
hard_voxelize_forward_impl
(
points
,
voxels
,
coors
,
num_points_per_voxel
,
voxel_size_v
,
coors_range_v
,
max_points
,
max_voxels
,
NDim
);
}
void
dynamic_voxelize_forward
(
const
at
::
Tensor
&
points
,
...
...
@@ -87,17 +51,6 @@ void dynamic_voxelize_forward(const at::Tensor &points,
std
::
vector
<
float
>
coors_range_v
(
coors_range
.
data_ptr
<
float
>
(),
coors_range
.
data_ptr
<
float
>
()
+
coors_range
.
numel
());
if
(
points
.
device
().
is_cuda
())
{
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT
(
points
);
dynamic_voxelize_forward_cuda
(
points
,
coors
,
voxel_size_v
,
coors_range_v
,
NDim
);
#else
AT_ERROR
(
"dynamic_voxelize is not compiled with GPU support"
);
#endif
}
else
{
dynamic_voxelize_forward_cpu
(
points
,
coors
,
voxel_size_v
,
coors_range_v
,
NDim
);
}
dynamic_voxelize_forward_impl
(
points
,
coors
,
voxel_size_v
,
coors_range_v
,
NDim
);
}
mmcv/ops/csrc/parrots/voxelization_cpu.cpp
deleted
100644 → 0
View file @
0bcbeadb
// Copyright (c) OpenMMLab. All rights reserved.
#include "pytorch_cpp_helper.hpp"
template
<
typename
T
,
typename
T_int
>
void
dynamic_voxelize_forward_cpu_kernel
(
const
torch
::
TensorAccessor
<
T
,
2
>
points
,
torch
::
TensorAccessor
<
T_int
,
2
>
coors
,
const
std
::
vector
<
float
>
voxel_size
,
const
std
::
vector
<
float
>
coors_range
,
const
std
::
vector
<
int
>
grid_size
,
const
int
num_points
,
const
int
num_features
,
const
int
NDim
)
{
const
int
ndim_minus_1
=
NDim
-
1
;
bool
failed
=
false
;
// int coor[NDim];
int
*
coor
=
new
int
[
NDim
]();
int
c
;
for
(
int
i
=
0
;
i
<
num_points
;
++
i
)
{
failed
=
false
;
for
(
int
j
=
0
;
j
<
NDim
;
++
j
)
{
c
=
floor
((
points
[
i
][
j
]
-
coors_range
[
j
])
/
voxel_size
[
j
]);
// necessary to rm points out of range
if
((
c
<
0
||
c
>=
grid_size
[
j
]))
{
failed
=
true
;
break
;
}
coor
[
ndim_minus_1
-
j
]
=
c
;
}
if
(
failed
)
memset
(
&
coors
[
i
][
0
],
-
1
,
NDim
*
sizeof
(
T_int
));
else
memcpy
(
&
coors
[
i
][
0
],
&
coor
[
0
],
NDim
*
sizeof
(
T_int
));
}
delete
[]
coor
;
}
template
<
typename
T
,
typename
T_int
>
void
hard_voxelize_forward_cpu_kernel
(
const
torch
::
TensorAccessor
<
T
,
2
>
points
,
torch
::
TensorAccessor
<
T
,
3
>
voxels
,
torch
::
TensorAccessor
<
T_int
,
2
>
coors
,
torch
::
TensorAccessor
<
T_int
,
1
>
num_points_per_voxel
,
torch
::
TensorAccessor
<
T_int
,
3
>
coor_to_voxelidx
,
int
&
voxel_num
,
const
std
::
vector
<
float
>
voxel_size
,
const
std
::
vector
<
float
>
coors_range
,
const
std
::
vector
<
int
>
grid_size
,
const
int
max_points
,
const
int
max_voxels
,
const
int
num_points
,
const
int
num_features
,
const
int
NDim
)
{
// declare a temp coors
at
::
Tensor
temp_coors
=
at
::
zeros
(
{
num_points
,
NDim
},
at
::
TensorOptions
().
dtype
(
at
::
kInt
).
device
(
at
::
kCPU
));
// First use dynamic voxelization to get coors,
// then check max points/voxels constraints
dynamic_voxelize_forward_cpu_kernel
<
T
,
int
>
(
points
,
temp_coors
.
accessor
<
int
,
2
>
(),
voxel_size
,
coors_range
,
grid_size
,
num_points
,
num_features
,
NDim
);
int
voxelidx
,
num
;
auto
coor
=
temp_coors
.
accessor
<
int
,
2
>
();
for
(
int
i
=
0
;
i
<
num_points
;
++
i
)
{
// T_int* coor = temp_coors.data_ptr<int>() + i * NDim;
if
(
coor
[
i
][
0
]
==
-
1
)
continue
;
voxelidx
=
coor_to_voxelidx
[
coor
[
i
][
0
]][
coor
[
i
][
1
]][
coor
[
i
][
2
]];
// record voxel
if
(
voxelidx
==
-
1
)
{
voxelidx
=
voxel_num
;
if
(
max_voxels
!=
-
1
&&
voxel_num
>=
max_voxels
)
continue
;
voxel_num
+=
1
;
coor_to_voxelidx
[
coor
[
i
][
0
]][
coor
[
i
][
1
]][
coor
[
i
][
2
]]
=
voxelidx
;
memcpy
(
&
coors
[
voxelidx
][
0
],
&
coor
[
i
][
0
],
NDim
*
sizeof
(
T_int
));
}
// put points into voxel
num
=
num_points_per_voxel
[
voxelidx
];
if
(
max_points
==
-
1
||
num
<
max_points
)
{
memcpy
(
&
voxels
[
voxelidx
][
num
][
0
],
&
points
[
i
][
0
],
num_features
*
sizeof
(
T
));
num_points_per_voxel
[
voxelidx
]
+=
1
;
}
}
return
;
}
void
dynamic_voxelize_forward_cpu
(
const
at
::
Tensor
&
points
,
at
::
Tensor
&
coors
,
const
std
::
vector
<
float
>
voxel_size
,
const
std
::
vector
<
float
>
coors_range
,
const
int
NDim
=
3
)
{
// check device
AT_ASSERTM
(
points
.
device
().
is_cpu
(),
"points must be a CPU tensor"
);
std
::
vector
<
int
>
grid_size
(
NDim
);
const
int
num_points
=
points
.
size
(
0
);
const
int
num_features
=
points
.
size
(
1
);
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
grid_size
[
i
]
=
round
((
coors_range
[
NDim
+
i
]
-
coors_range
[
i
])
/
voxel_size
[
i
]);
}
// coors, num_points_per_voxel, coor_to_voxelidx are int Tensor
AT_DISPATCH_FLOATING_TYPES_AND_HALF
(
points
.
scalar_type
(),
"dynamic_voxelize_forward_cpu_kernel"
,
[
&
]
{
dynamic_voxelize_forward_cpu_kernel
<
scalar_t
,
int
>
(
points
.
accessor
<
scalar_t
,
2
>
(),
coors
.
accessor
<
int
,
2
>
(),
voxel_size
,
coors_range
,
grid_size
,
num_points
,
num_features
,
NDim
);
});
}
int
hard_voxelize_forward_cpu
(
const
at
::
Tensor
&
points
,
at
::
Tensor
&
voxels
,
at
::
Tensor
&
coors
,
at
::
Tensor
&
num_points_per_voxel
,
const
std
::
vector
<
float
>
voxel_size
,
const
std
::
vector
<
float
>
coors_range
,
const
int
max_points
,
const
int
max_voxels
,
const
int
NDim
=
3
)
{
// current version tooks about 0.02s_0.03s for one frame on cpu
// check device
AT_ASSERTM
(
points
.
device
().
is_cpu
(),
"points must be a CPU tensor"
);
std
::
vector
<
int
>
grid_size
(
NDim
);
const
int
num_points
=
points
.
size
(
0
);
const
int
num_features
=
points
.
size
(
1
);
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
grid_size
[
i
]
=
round
((
coors_range
[
NDim
+
i
]
-
coors_range
[
i
])
/
voxel_size
[
i
]);
}
// coors, num_points_per_voxel, coor_to_voxelidx are int Tensor
// printf("cpu coor_to_voxelidx size: [%d, %d, %d]\n", grid_size[2],
// grid_size[1], grid_size[0]);
at
::
Tensor
coor_to_voxelidx
=
-
at
::
ones
({
grid_size
[
2
],
grid_size
[
1
],
grid_size
[
0
]},
coors
.
options
());
int
voxel_num
=
0
;
AT_DISPATCH_FLOATING_TYPES_AND_HALF
(
points
.
scalar_type
(),
"hard_voxelize_forward_cpu_kernel"
,
[
&
]
{
hard_voxelize_forward_cpu_kernel
<
scalar_t
,
int
>
(
points
.
accessor
<
scalar_t
,
2
>
(),
voxels
.
accessor
<
scalar_t
,
3
>
(),
coors
.
accessor
<
int
,
2
>
(),
num_points_per_voxel
.
accessor
<
int
,
1
>
(),
coor_to_voxelidx
.
accessor
<
int
,
3
>
(),
voxel_num
,
voxel_size
,
coors_range
,
grid_size
,
max_points
,
max_voxels
,
num_points
,
num_features
,
NDim
);
});
return
voxel_num
;
}
setup.py
View file @
a4dc2a72
...
...
@@ -189,13 +189,14 @@ def get_extensions():
define_macros
=
[]
include_dirs
=
[]
op_files
=
glob
.
glob
(
'./mmcv/ops/csrc/pytorch/cuda/*.cu'
)
+
\
glob
.
glob
(
'./mmcv/ops/csrc/pytorch/cpu/*.cpp'
)
+
\
glob
.
glob
(
'./mmcv/ops/csrc/parrots/*.cpp'
)
include_dirs
.
append
(
os
.
path
.
abspath
(
'./mmcv/ops/csrc/common'
))
include_dirs
.
append
(
os
.
path
.
abspath
(
'./mmcv/ops/csrc/common/cuda'
))
cuda_args
=
os
.
getenv
(
'MMCV_CUDA_ARGS'
)
extra_compile_args
=
{
'nvcc'
:
[
cuda_args
]
if
cuda_args
else
[],
'cxx'
:
[],
'nvcc'
:
[
cuda_args
,
'-std=c++14'
]
if
cuda_args
else
[
'-std=c++14'
],
'cxx'
:
[
'-std=c++14'
],
}
if
torch
.
cuda
.
is_available
()
or
os
.
getenv
(
'FORCE_CUDA'
,
'0'
)
==
'1'
:
define_macros
+=
[(
'MMCV_WITH_CUDA'
,
None
)]
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment