Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
MMCV
Commits
6f3c5f1c
Commit
6f3c5f1c
authored
Jul 11, 2024
by
limm
Browse files
support v1.4.0
parent
6f674c7e
Changes
339
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1545 additions
and
119 deletions
+1545
-119
mmcv/ops/csrc/onnxruntime/cpu/onnxruntime_register.cpp
mmcv/ops/csrc/onnxruntime/cpu/onnxruntime_register.cpp
+81
-0
mmcv/ops/csrc/onnxruntime/cpu/reduce_ops.cpp
mmcv/ops/csrc/onnxruntime/cpu/reduce_ops.cpp
+188
-0
mmcv/ops/csrc/onnxruntime/cpu/roi_align.cpp
mmcv/ops/csrc/onnxruntime/cpu/roi_align.cpp
+265
-0
mmcv/ops/csrc/onnxruntime/cpu/roi_align_rotated.cpp
mmcv/ops/csrc/onnxruntime/cpu/roi_align_rotated.cpp
+247
-0
mmcv/ops/csrc/onnxruntime/cpu/soft_nms.cpp
mmcv/ops/csrc/onnxruntime/cpu/soft_nms.cpp
+156
-0
mmcv/ops/csrc/onnxruntime/deform_conv.h
mmcv/ops/csrc/onnxruntime/deform_conv.h
+57
-0
mmcv/ops/csrc/onnxruntime/grid_sample.h
mmcv/ops/csrc/onnxruntime/grid_sample.h
+44
-0
mmcv/ops/csrc/onnxruntime/modulated_deform_conv.h
mmcv/ops/csrc/onnxruntime/modulated_deform_conv.h
+61
-0
mmcv/ops/csrc/onnxruntime/nms.h
mmcv/ops/csrc/onnxruntime/nms.h
+45
-0
mmcv/ops/csrc/onnxruntime/onnxruntime_register.h
mmcv/ops/csrc/onnxruntime/onnxruntime_register.h
+16
-0
mmcv/ops/csrc/onnxruntime/onnxruntime_session_options_config_keys.h
...src/onnxruntime/onnxruntime_session_options_config_keys.h
+44
-0
mmcv/ops/csrc/onnxruntime/ort_mmcv_utils.h
mmcv/ops/csrc/onnxruntime/ort_mmcv_utils.h
+15
-0
mmcv/ops/csrc/onnxruntime/reduce_ops.h
mmcv/ops/csrc/onnxruntime/reduce_ops.h
+95
-0
mmcv/ops/csrc/onnxruntime/roi_align.h
mmcv/ops/csrc/onnxruntime/roi_align.h
+62
-0
mmcv/ops/csrc/onnxruntime/roi_align_rotated.h
mmcv/ops/csrc/onnxruntime/roi_align_rotated.h
+62
-0
mmcv/ops/csrc/onnxruntime/soft_nms.h
mmcv/ops/csrc/onnxruntime/soft_nms.h
+49
-0
mmcv/ops/csrc/parrots/active_rotated_filter.cpp
mmcv/ops/csrc/parrots/active_rotated_filter.cpp
+0
-28
mmcv/ops/csrc/parrots/active_rotated_filter_parrots.cpp
mmcv/ops/csrc/parrots/active_rotated_filter_parrots.cpp
+0
-63
mmcv/ops/csrc/parrots/active_rotated_filter_pytorch.h
mmcv/ops/csrc/parrots/active_rotated_filter_pytorch.h
+0
-13
mmcv/ops/csrc/parrots/assign_score_withk.cpp
mmcv/ops/csrc/parrots/assign_score_withk.cpp
+58
-15
No files found.
Too many changes to show.
To preserve performance only
339 of 339+
files are displayed.
Plain diff
Email patch
mmcv/ops/csrc/onnxruntime/cpu/onnxruntime_register.cpp
0 → 100644
View file @
6f3c5f1c
// Copyright (c) OpenMMLab. All rights reserved
#include "onnxruntime_register.h"
#include "corner_pool.h"
#include "deform_conv.h"
#include "grid_sample.h"
#include "modulated_deform_conv.h"
#include "nms.h"
#include "ort_mmcv_utils.h"
#include "reduce_ops.h"
#include "roi_align.h"
#include "roi_align_rotated.h"
#include "soft_nms.h"
const
char
*
c_MMCVOpDomain
=
"mmcv"
;
SoftNmsOp
c_SoftNmsOp
;
NmsOp
c_NmsOp
;
MMCVRoiAlignCustomOp
c_MMCVRoiAlignCustomOp
;
MMCVRoIAlignRotatedCustomOp
c_MMCVRoIAlignRotatedCustomOp
;
GridSampleOp
c_GridSampleOp
;
MMCVCumMaxCustomOp
c_MMCVCumMaxCustomOp
;
MMCVCumMinCustomOp
c_MMCVCumMinCustomOp
;
MMCVCornerPoolCustomOp
c_MMCVCornerPoolCustomOp
;
MMCVModulatedDeformConvOp
c_MMCVModulatedDeformConvOp
;
MMCVDeformConvOp
c_MMCVDeformConvOp
;
OrtStatus
*
ORT_API_CALL
RegisterCustomOps
(
OrtSessionOptions
*
options
,
const
OrtApiBase
*
api
)
{
OrtCustomOpDomain
*
domain
=
nullptr
;
const
OrtApi
*
ortApi
=
api
->
GetApi
(
ORT_API_VERSION
);
if
(
auto
status
=
ortApi
->
CreateCustomOpDomain
(
c_MMCVOpDomain
,
&
domain
))
{
return
status
;
}
if
(
auto
status
=
ortApi
->
CustomOpDomain_Add
(
domain
,
&
c_SoftNmsOp
))
{
return
status
;
}
if
(
auto
status
=
ortApi
->
CustomOpDomain_Add
(
domain
,
&
c_NmsOp
))
{
return
status
;
}
if
(
auto
status
=
ortApi
->
CustomOpDomain_Add
(
domain
,
&
c_MMCVRoiAlignCustomOp
))
{
return
status
;
}
if
(
auto
status
=
ortApi
->
CustomOpDomain_Add
(
domain
,
&
c_MMCVRoIAlignRotatedCustomOp
))
{
return
status
;
}
if
(
auto
status
=
ortApi
->
CustomOpDomain_Add
(
domain
,
&
c_GridSampleOp
))
{
return
status
;
}
if
(
auto
status
=
ortApi
->
CustomOpDomain_Add
(
domain
,
&
c_MMCVCornerPoolCustomOp
))
{
return
status
;
}
if
(
auto
status
=
ortApi
->
CustomOpDomain_Add
(
domain
,
&
c_MMCVCumMaxCustomOp
))
{
return
status
;
}
if
(
auto
status
=
ortApi
->
CustomOpDomain_Add
(
domain
,
&
c_MMCVCumMinCustomOp
))
{
return
status
;
}
if
(
auto
status
=
ortApi
->
CustomOpDomain_Add
(
domain
,
&
c_MMCVModulatedDeformConvOp
))
{
return
status
;
}
if
(
auto
status
=
ortApi
->
CustomOpDomain_Add
(
domain
,
&
c_MMCVDeformConvOp
))
{
return
status
;
}
return
ortApi
->
AddCustomOpDomain
(
options
,
domain
);
}
mmcv/ops/csrc/onnxruntime/cpu/reduce_ops.cpp
0 → 100644
View file @
6f3c5f1c
// Copyright (c) OpenMMLab. All rights reserved
#include "reduce_ops.h"
#include <assert.h>
#include <vector>
#include "../ort_mmcv_utils.h"
// modified from
// https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/native/ReduceOps.cpp
static
inline
int64_t
maybe_wrap_dim
(
int64_t
dim
,
int64_t
ndims
)
{
int64_t
min
=
-
ndims
;
int64_t
max
=
ndims
-
1
;
assert
(
dim
>=
min
&&
dim
<=
max
);
if
(
dim
<
0
)
dim
+=
ndims
;
return
dim
;
}
static
inline
int64_t
get_dim_stride
(
const
int64_t
dim
,
const
int64_t
ndims
,
const
int64_t
*
reversed_dim_cumprod
)
{
return
dim
==
ndims
-
1
?
1
:
reversed_dim_cumprod
[
dim
+
1
];
}
static
inline
int64_t
get_dim_size
(
const
int64_t
dim
,
const
int64_t
ndims
,
const
int64_t
*
reversed_dim_cumprod
)
{
return
dim
==
ndims
-
1
?
reversed_dim_cumprod
[
dim
]
:
reversed_dim_cumprod
[
dim
]
/
reversed_dim_cumprod
[
dim
+
1
];
}
template
<
typename
T1
,
typename
T2
,
typename
Operation
>
void
cummax_cummin_helper
(
const
T1
*
input
,
T1
*
output
,
T2
*
indices
,
const
int64_t
input_dim_size
,
const
int64_t
stride
)
{
Operation
op
;
T1
out
=
input
[
0
];
int64_t
idx
=
0
;
for
(
int64_t
i
=
0
;
i
<
input_dim_size
;
i
++
)
{
T1
curr_elem
=
input
[
i
*
stride
];
if
(
op
(
curr_elem
,
out
))
{
out
=
curr_elem
;
idx
=
i
;
}
output
[
i
*
stride
]
=
out
;
indices
[
i
*
stride
]
=
idx
;
}
}
// modified `tensor_dim_apply3` from
// https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/native/TensorDimApply.h.
// the difference is that: (1) use `reversed_dim_cumprod` for fast computing of
// tensor `size` and `stride`. (2) the same `stride` is used for input, output,
// and indices, since it's unnecessary to use separate values. currently
// `tensor_dim_apply3` is only used for `cummax` and `cummin`, according to the
// official pytorch projects: https://github.com/pytorch/pytorch.
template
<
typename
T1
,
typename
T2
,
typename
Function
>
void
tensor_dim_apply3
(
const
T1
*
input
,
T1
*
output
,
T2
*
indices
,
const
int64_t
dim
,
const
int64_t
ndims
,
const
int64_t
*
reversed_dim_cumprod
,
Function
func
)
{
int
dim_apply_finished
=
0
;
int64_t
input_dim_size
=
get_dim_size
(
dim
,
ndims
,
reversed_dim_cumprod
);
// the same stride is used for input, output and indices
int64_t
stride
=
get_dim_stride
(
dim
,
ndims
,
reversed_dim_cumprod
);
std
::
vector
<
int64_t
>
counter
(
ndims
,
0
);
while
(
!
dim_apply_finished
)
{
// call `func` once to update output and indices
func
(
input
,
output
,
indices
,
input_dim_size
,
stride
);
if
(
ndims
==
1
)
break
;
for
(
int64_t
dim_i
=
0
;
dim_i
<
ndims
;
dim_i
++
)
{
if
(
dim_i
==
dim
)
{
if
(
dim_i
==
(
ndims
-
1
))
{
dim_apply_finished
=
1
;
break
;
}
continue
;
}
counter
[
dim_i
]
++
;
// the same stride is used for input, output, and indices
int64_t
stride_dim_i
=
get_dim_stride
(
dim_i
,
ndims
,
reversed_dim_cumprod
);
input
+=
stride_dim_i
;
output
+=
stride_dim_i
;
indices
+=
stride_dim_i
;
if
(
counter
[
dim_i
]
==
get_dim_size
(
dim_i
,
ndims
,
reversed_dim_cumprod
))
{
if
(
dim_i
==
ndims
-
1
)
{
dim_apply_finished
=
1
;
break
;
}
else
{
input
-=
counter
[
dim_i
]
*
stride_dim_i
;
output
-=
counter
[
dim_i
]
*
stride_dim_i
;
indices
-=
counter
[
dim_i
]
*
stride_dim_i
;
counter
[
dim_i
]
=
0
;
}
}
else
{
break
;
}
// if
}
// for
}
// while
}
template
<
typename
T1
,
typename
T2
,
typename
Operation
>
void
CumMax_CumMin_CPU
(
const
T1
*
input
,
T1
*
output
,
T2
*
indices
,
int64_t
*
reversed_dim_cumprod
,
const
int64_t
dim
,
const
OrtTensorDimensions
&
out_dimensions
)
{
// calculate numel
const
int64_t
ndims
=
out_dimensions
.
size
();
int64_t
numel
=
1
;
for
(
int64_t
dim_i
=
0
;
dim_i
<
ndims
;
dim_i
++
)
{
numel
*=
out_dimensions
.
data
()[
dim_i
];
}
// cummax is only applied to input which is non-zero dim and non-empty
if
(
numel
)
{
// compute the cumulative production on dimension size,
// which is then used for computing the stride or size of a specific `dim`.
reversed_dim_cumprod
[
ndims
-
1
]
=
out_dimensions
.
data
()[
ndims
-
1
];
for
(
int64_t
dim_i
=
ndims
-
2
;
dim_i
>=
0
;
dim_i
--
)
{
reversed_dim_cumprod
[
dim_i
]
=
reversed_dim_cumprod
[
dim_i
+
1
]
*
out_dimensions
.
data
()[
dim_i
];
}
// do cummax or cummin based on `Operation` type
tensor_dim_apply3
<
float
,
int64_t
>
(
input
,
output
,
indices
,
dim
,
ndims
,
reversed_dim_cumprod
,
cummax_cummin_helper
<
float
,
int64_t
,
Operation
>
);
}
}
void
MMCVCumMaxKernel
::
Compute
(
OrtKernelContext
*
context
)
{
// get input
const
OrtValue
*
input
=
ort_
.
KernelContext_GetInput
(
context
,
0
);
const
float
*
input_data
=
reinterpret_cast
<
const
float
*>
(
ort_
.
GetTensorData
<
float
>
(
input
));
// get output
OrtTensorDimensions
out_dimensions
(
ort_
,
input
);
OrtValue
*
output
=
ort_
.
KernelContext_GetOutput
(
context
,
0
,
out_dimensions
.
data
(),
out_dimensions
.
size
());
float
*
output_data
=
ort_
.
GetTensorMutableData
<
float
>
(
output
);
OrtValue
*
indices
=
ort_
.
KernelContext_GetOutput
(
context
,
1
,
out_dimensions
.
data
(),
out_dimensions
.
size
());
int64_t
*
indices_data
=
ort_
.
GetTensorMutableData
<
int64_t
>
(
indices
);
// allocate tmp memory for computing the cumulative production on dimension
// size
const
int64_t
ndims
=
out_dimensions
.
size
();
assert
(
ndims
>
0
);
int64_t
*
reversed_dim_cumprod
=
(
int64_t
*
)
allocator_
.
Alloc
(
sizeof
(
int64_t
)
*
ndims
);
// dim should be wrapped if it's negative (e.g. -1)
const
int64_t
dim
=
maybe_wrap_dim
(
dim_
,
ndims
);
CumMax_CumMin_CPU
<
float
,
int64_t
,
std
::
greater_equal
<
float
>>
(
input_data
,
output_data
,
indices_data
,
reversed_dim_cumprod
,
dim
,
out_dimensions
);
}
void
MMCVCumMinKernel
::
Compute
(
OrtKernelContext
*
context
)
{
// get input
const
OrtValue
*
input
=
ort_
.
KernelContext_GetInput
(
context
,
0
);
const
float
*
input_data
=
reinterpret_cast
<
const
float
*>
(
ort_
.
GetTensorData
<
float
>
(
input
));
// get output
OrtTensorDimensions
out_dimensions
(
ort_
,
input
);
OrtValue
*
output
=
ort_
.
KernelContext_GetOutput
(
context
,
0
,
out_dimensions
.
data
(),
out_dimensions
.
size
());
float
*
output_data
=
ort_
.
GetTensorMutableData
<
float
>
(
output
);
OrtValue
*
indices
=
ort_
.
KernelContext_GetOutput
(
context
,
1
,
out_dimensions
.
data
(),
out_dimensions
.
size
());
int64_t
*
indices_data
=
ort_
.
GetTensorMutableData
<
int64_t
>
(
indices
);
// allocate tmp memory for computing the cumulative production on dimension
// size
const
int64_t
ndims
=
out_dimensions
.
size
();
assert
(
ndims
>
0
);
int64_t
*
reversed_dim_cumprod
=
(
int64_t
*
)
allocator_
.
Alloc
(
sizeof
(
int64_t
)
*
ndims
);
// dim should be wrapped if it's negative (e.g. -1)
const
int64_t
dim
=
maybe_wrap_dim
(
dim_
,
ndims
);
CumMax_CumMin_CPU
<
float
,
int64_t
,
std
::
less_equal
<
float
>>
(
input_data
,
output_data
,
indices_data
,
reversed_dim_cumprod
,
dim
,
out_dimensions
);
}
mmcv/ops/csrc/onnxruntime/cpu/roi_align.cpp
0 → 100644
View file @
6f3c5f1c
// Copyright (c) OpenMMLab. All rights reserved
#include "roi_align.h"
#include "../ort_mmcv_utils.h"
// implementation taken from Caffe2
struct
PreCalc
{
int
pos1
;
int
pos2
;
int
pos3
;
int
pos4
;
float
w1
;
float
w2
;
float
w3
;
float
w4
;
};
void
pre_calc_for_bilinear_interpolate
(
const
int
height
,
const
int
width
,
const
int
pooled_height
,
const
int
pooled_width
,
const
int
iy_upper
,
const
int
ix_upper
,
float
roi_start_h
,
float
roi_start_w
,
float
bin_size_h
,
float
bin_size_w
,
int
roi_bin_grid_h
,
int
roi_bin_grid_w
,
std
::
vector
<
PreCalc
>
&
pre_calc
)
{
int
pre_calc_index
=
0
;
for
(
int
ph
=
0
;
ph
<
pooled_height
;
ph
++
)
{
for
(
int
pw
=
0
;
pw
<
pooled_width
;
pw
++
)
{
for
(
int
iy
=
0
;
iy
<
iy_upper
;
iy
++
)
{
const
float
yy
=
roi_start_h
+
ph
*
bin_size_h
+
static_cast
<
float
>
(
iy
+
.5
f
)
*
bin_size_h
/
static_cast
<
float
>
(
roi_bin_grid_h
);
// e.g., 0.5, 1.5
for
(
int
ix
=
0
;
ix
<
ix_upper
;
ix
++
)
{
const
float
xx
=
roi_start_w
+
pw
*
bin_size_w
+
static_cast
<
float
>
(
ix
+
.5
f
)
*
bin_size_w
/
static_cast
<
float
>
(
roi_bin_grid_w
);
float
x
=
xx
;
float
y
=
yy
;
// deal with: inverse elements are out of feature map boundary
if
(
y
<
-
1.0
||
y
>
height
||
x
<
-
1.0
||
x
>
width
)
{
// empty
PreCalc
pc
;
pc
.
pos1
=
0
;
pc
.
pos2
=
0
;
pc
.
pos3
=
0
;
pc
.
pos4
=
0
;
pc
.
w1
=
0
;
pc
.
w2
=
0
;
pc
.
w3
=
0
;
pc
.
w4
=
0
;
pre_calc
[
pre_calc_index
]
=
pc
;
pre_calc_index
+=
1
;
continue
;
}
if
(
y
<=
0
)
{
y
=
0
;
}
if
(
x
<=
0
)
{
x
=
0
;
}
int
y_low
=
(
int
)
y
;
int
x_low
=
(
int
)
x
;
int
y_high
;
int
x_high
;
if
(
y_low
>=
height
-
1
)
{
y_high
=
y_low
=
height
-
1
;
y
=
(
float
)
y_low
;
}
else
{
y_high
=
y_low
+
1
;
}
if
(
x_low
>=
width
-
1
)
{
x_high
=
x_low
=
width
-
1
;
x
=
(
float
)
x_low
;
}
else
{
x_high
=
x_low
+
1
;
}
float
ly
=
y
-
y_low
;
float
lx
=
x
-
x_low
;
float
hy
=
1.
-
ly
,
hx
=
1.
-
lx
;
float
w1
=
hy
*
hx
,
w2
=
hy
*
lx
,
w3
=
ly
*
hx
,
w4
=
ly
*
lx
;
// save weights and indices
PreCalc
pc
;
pc
.
pos1
=
y_low
*
width
+
x_low
;
pc
.
pos2
=
y_low
*
width
+
x_high
;
pc
.
pos3
=
y_high
*
width
+
x_low
;
pc
.
pos4
=
y_high
*
width
+
x_high
;
pc
.
w1
=
w1
;
pc
.
w2
=
w2
;
pc
.
w3
=
w3
;
pc
.
w4
=
w4
;
pre_calc
[
pre_calc_index
]
=
pc
;
pre_calc_index
+=
1
;
}
}
}
}
}
void
ROIAlignForwardCPU
(
const
int
nthreads
,
const
float
*
input
,
const
float
*
rois
,
float
*
output
,
float
*
argmax_y
,
float
*
argmax_x
,
const
int
pooled_height
,
const
int
pooled_width
,
const
float
spatial_scale
,
const
int
sampling_ratio
,
const
int
pool_mode
,
// 0 - max pool, 1 - avg pool
const
bool
aligned
,
const
int
channels
,
const
int
height
,
const
int
width
)
{
int
n_rois
=
nthreads
/
channels
/
pooled_width
/
pooled_height
;
// (n, c, ph, pw) is an element in the pooled output
// can be parallelized using omp
// #pragma omp parallel for num_threads(32)
for
(
int
n
=
0
;
n
<
n_rois
;
n
++
)
{
int
index_n
=
n
*
channels
*
pooled_width
*
pooled_height
;
const
float
*
offset_rois
=
rois
+
n
*
5
;
int
roi_batch_ind
=
offset_rois
[
0
];
// Do not use rounding; this implementation detail is critical
float
offset
=
aligned
?
(
float
)
0.5
:
(
float
)
0.0
;
float
roi_start_w
=
offset_rois
[
1
]
*
spatial_scale
-
offset
;
float
roi_start_h
=
offset_rois
[
2
]
*
spatial_scale
-
offset
;
float
roi_end_w
=
offset_rois
[
3
]
*
spatial_scale
-
offset
;
float
roi_end_h
=
offset_rois
[
4
]
*
spatial_scale
-
offset
;
float
roi_width
=
roi_end_w
-
roi_start_w
;
float
roi_height
=
roi_end_h
-
roi_start_h
;
if
(
aligned
)
{
/*AT_ASSERTM(roi_width >= 0 && roi_height >= 0,
"ROIs in ROIAlign cannot have non-negative size!");*/
assert
(
roi_width
>=
0
&&
roi_height
>=
0
);
}
else
{
// for backward-compatibility only
roi_width
=
std
::
max
(
roi_width
,
(
float
)
1.
);
roi_height
=
std
::
max
(
roi_height
,
(
float
)
1.
);
}
float
bin_size_h
=
static_cast
<
float
>
(
roi_height
)
/
static_cast
<
float
>
(
pooled_height
);
float
bin_size_w
=
static_cast
<
float
>
(
roi_width
)
/
static_cast
<
float
>
(
pooled_width
);
// We use roi_bin_grid to sample the grid and mimic integral
int
roi_bin_grid_h
=
(
sampling_ratio
>
0
)
?
sampling_ratio
:
ceil
(
roi_height
/
pooled_height
);
// e.g., = 2
int
roi_bin_grid_w
=
(
sampling_ratio
>
0
)
?
sampling_ratio
:
ceil
(
roi_width
/
pooled_width
);
// When the grid is empty, output zeros == 0/1, instead of NaN.
const
float
count
=
std
::
max
(
roi_bin_grid_h
*
roi_bin_grid_w
,
1
);
// e.g. = 4
// we want to precalculate indices and weights shared by all channels,
// this is the key point of optimization
std
::
vector
<
PreCalc
>
pre_calc
(
roi_bin_grid_h
*
roi_bin_grid_w
*
pooled_width
*
pooled_height
);
pre_calc_for_bilinear_interpolate
(
height
,
width
,
pooled_height
,
pooled_width
,
roi_bin_grid_h
,
roi_bin_grid_w
,
roi_start_h
,
roi_start_w
,
bin_size_h
,
bin_size_w
,
roi_bin_grid_h
,
roi_bin_grid_w
,
pre_calc
);
for
(
int
c
=
0
;
c
<
channels
;
c
++
)
{
int
index_n_c
=
index_n
+
c
*
pooled_width
*
pooled_height
;
const
float
*
offset_input
=
input
+
(
roi_batch_ind
*
channels
+
c
)
*
height
*
width
;
int
pre_calc_index
=
0
;
for
(
int
ph
=
0
;
ph
<
pooled_height
;
ph
++
)
{
for
(
int
pw
=
0
;
pw
<
pooled_width
;
pw
++
)
{
int
index
=
index_n_c
+
ph
*
pooled_width
+
pw
;
float
output_val
=
0.
;
float
maxval
=
-
10000
;
float
maxidx_y
=
-
1.
f
,
maxidx_x
=
-
1.
f
;
for
(
int
iy
=
0
;
iy
<
roi_bin_grid_h
;
iy
++
)
{
const
float
y
=
roi_start_h
+
ph
*
bin_size_h
+
static_cast
<
float
>
(
iy
+
.5
f
)
*
bin_size_h
/
static_cast
<
float
>
(
roi_bin_grid_h
);
for
(
int
ix
=
0
;
ix
<
roi_bin_grid_w
;
ix
++
)
{
const
float
x
=
roi_start_w
+
pw
*
bin_size_w
+
static_cast
<
float
>
(
ix
+
.5
f
)
*
bin_size_w
/
static_cast
<
float
>
(
roi_bin_grid_w
);
PreCalc
pc
=
pre_calc
[
pre_calc_index
];
float
val
=
pc
.
w1
*
offset_input
[
pc
.
pos1
]
+
pc
.
w2
*
offset_input
[
pc
.
pos2
]
+
pc
.
w3
*
offset_input
[
pc
.
pos3
]
+
pc
.
w4
*
offset_input
[
pc
.
pos4
];
if
(
val
>
maxval
)
{
maxval
=
val
;
maxidx_y
=
y
;
maxidx_x
=
x
;
}
output_val
+=
val
;
pre_calc_index
+=
1
;
}
}
if
(
pool_mode
==
0
)
{
// We do max pooling inside a bin
output
[
index
]
=
maxval
;
argmax_y
[
index
]
=
maxidx_y
;
argmax_x
[
index
]
=
maxidx_x
;
}
else
if
(
pool_mode
==
1
)
{
// We do average (integral) pooling inside a bin
output
[
index
]
=
output_val
/
count
;
}
// if
}
// for pw
}
// for ph
}
// for c
}
// for n
}
void
MMCVRoiAlignKernel
::
Compute
(
OrtKernelContext
*
context
)
{
// Setup inputs
const
OrtValue
*
input_X
=
ort_
.
KernelContext_GetInput
(
context
,
0
);
const
float
*
X_data
=
reinterpret_cast
<
const
float
*>
(
ort_
.
GetTensorData
<
float
>
(
input_X
));
const
OrtValue
*
input_rois
=
ort_
.
KernelContext_GetInput
(
context
,
1
);
const
float
*
rois
=
reinterpret_cast
<
const
float
*>
(
ort_
.
GetTensorData
<
const
float
*>
(
input_rois
));
// Setup output
OrtTensorDimensions
out_dimensions
(
ort_
,
input_X
);
OrtTensorDimensions
roi_dimensions
(
ort_
,
input_rois
);
int
batch_size
=
out_dimensions
.
data
()[
0
];
int
input_channels
=
out_dimensions
.
data
()[
1
];
int
input_height
=
out_dimensions
.
data
()[
2
];
int
input_width
=
out_dimensions
.
data
()[
3
];
out_dimensions
.
data
()[
0
]
=
roi_dimensions
.
data
()[
0
];
out_dimensions
.
data
()[
2
]
=
aligned_height_
;
out_dimensions
.
data
()[
3
]
=
aligned_width_
;
OrtValue
*
output
=
ort_
.
KernelContext_GetOutput
(
context
,
0
,
out_dimensions
.
data
(),
out_dimensions
.
size
());
float
*
out
=
ort_
.
GetTensorMutableData
<
float
>
(
output
);
OrtTensorTypeAndShapeInfo
*
output_info
=
ort_
.
GetTensorTypeAndShape
(
output
);
ort_
.
ReleaseTensorTypeAndShapeInfo
(
output_info
);
// TODO: forward here
int
output_size
=
out_dimensions
.
data
()[
0
];
for
(
auto
i
=
1
;
i
<
out_dimensions
.
size
();
++
i
)
{
output_size
*=
out_dimensions
.
data
()[
i
];
}
int
poolMod
=
1
;
if
(
pool_mode_
==
"max"
)
poolMod
=
0
;
float
*
argmax_x
=
nullptr
,
*
argmax_y
=
nullptr
;
if
(
poolMod
==
0
)
{
argmax_y
=
new
float
[
output_size
];
argmax_x
=
new
float
[
output_size
];
}
ROIAlignForwardCPU
(
output_size
,
X_data
,
rois
,
out
,
argmax_y
,
argmax_x
,
aligned_height_
,
aligned_width_
,
spatial_scale_
,
sampling_ratio_
,
poolMod
,
aligned_
,
input_channels
,
input_height
,
input_width
);
if
(
argmax_x
)
delete
argmax_x
;
if
(
argmax_y
)
delete
argmax_y
;
}
mmcv/ops/csrc/onnxruntime/cpu/roi_align_rotated.cpp
0 → 100644
View file @
6f3c5f1c
// Modified from
// https://github.com/facebookresearch/detectron2/tree/master/detectron2/layers/csrc/ROIAlignRotated
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
#include "roi_align_rotated.h"
#include "../ort_mmcv_utils.h"
struct
PreCalc
{
int
pos1
;
int
pos2
;
int
pos3
;
int
pos4
;
float
w1
;
float
w2
;
float
w3
;
float
w4
;
};
void
pre_calc_for_bilinear_interpolate
(
const
int
height
,
const
int
width
,
const
int
pooled_height
,
const
int
pooled_width
,
const
int
iy_upper
,
const
int
ix_upper
,
float
roi_start_h
,
float
roi_start_w
,
float
bin_size_h
,
float
bin_size_w
,
int
roi_bin_grid_h
,
int
roi_bin_grid_w
,
float
roi_center_h
,
float
roi_center_w
,
float
cos_theta
,
float
sin_theta
,
std
::
vector
<
PreCalc
>
&
pre_calc
)
{
int
pre_calc_index
=
0
;
for
(
int
ph
=
0
;
ph
<
pooled_height
;
ph
++
)
{
for
(
int
pw
=
0
;
pw
<
pooled_width
;
pw
++
)
{
for
(
int
iy
=
0
;
iy
<
iy_upper
;
iy
++
)
{
const
float
yy
=
roi_start_h
+
ph
*
bin_size_h
+
static_cast
<
float
>
(
iy
+
.5
f
)
*
bin_size_h
/
static_cast
<
float
>
(
roi_bin_grid_h
);
// e.g., 0.5, 1.5
for
(
int
ix
=
0
;
ix
<
ix_upper
;
ix
++
)
{
const
float
xx
=
roi_start_w
+
pw
*
bin_size_w
+
static_cast
<
float
>
(
ix
+
.5
f
)
*
bin_size_w
/
static_cast
<
float
>
(
roi_bin_grid_w
);
// Rotate by theta around the center and translate
// In image space, (y, x) is the order for Right Handed System,
// and this is essentially multiplying the point by a rotation matrix
// to rotate it counterclockwise through angle theta.
float
y
=
yy
*
cos_theta
-
xx
*
sin_theta
+
roi_center_h
;
float
x
=
yy
*
sin_theta
+
xx
*
cos_theta
+
roi_center_w
;
// deal with: inverse elements are out of feature map boundary
if
(
y
<
-
1.0
||
y
>
height
||
x
<
-
1.0
||
x
>
width
)
{
// empty
PreCalc
pc
;
pc
.
pos1
=
0
;
pc
.
pos2
=
0
;
pc
.
pos3
=
0
;
pc
.
pos4
=
0
;
pc
.
w1
=
0
;
pc
.
w2
=
0
;
pc
.
w3
=
0
;
pc
.
w4
=
0
;
pre_calc
[
pre_calc_index
]
=
pc
;
pre_calc_index
+=
1
;
continue
;
}
if
(
y
<
0
)
{
y
=
0
;
}
if
(
x
<
0
)
{
x
=
0
;
}
int
y_low
=
(
int
)
y
;
int
x_low
=
(
int
)
x
;
int
y_high
;
int
x_high
;
if
(
y_low
>=
height
-
1
)
{
y_high
=
y_low
=
height
-
1
;
y
=
(
float
)
y_low
;
}
else
{
y_high
=
y_low
+
1
;
}
if
(
x_low
>=
width
-
1
)
{
x_high
=
x_low
=
width
-
1
;
x
=
(
float
)
x_low
;
}
else
{
x_high
=
x_low
+
1
;
}
float
ly
=
y
-
y_low
;
float
lx
=
x
-
x_low
;
float
hy
=
1.
-
ly
,
hx
=
1.
-
lx
;
float
w1
=
hy
*
hx
,
w2
=
hy
*
lx
,
w3
=
ly
*
hx
,
w4
=
ly
*
lx
;
// save weights and indices
PreCalc
pc
;
pc
.
pos1
=
y_low
*
width
+
x_low
;
pc
.
pos2
=
y_low
*
width
+
x_high
;
pc
.
pos3
=
y_high
*
width
+
x_low
;
pc
.
pos4
=
y_high
*
width
+
x_high
;
pc
.
w1
=
w1
;
pc
.
w2
=
w2
;
pc
.
w3
=
w3
;
pc
.
w4
=
w4
;
pre_calc
[
pre_calc_index
]
=
pc
;
pre_calc_index
+=
1
;
}
}
}
}
}
void
ROIAlignRotatedForwardCPU
(
const
int
nthreads
,
const
float
*
input
,
const
float
*
rois
,
float
*
output
,
const
float
&
spatial_scale
,
const
int
aligned
,
const
int
clockwise
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
pooled_height
,
const
int
pooled_width
,
const
int
sampling_ratio
)
{
int
n_rois
=
nthreads
/
channels
/
pooled_width
/
pooled_height
;
// (n, c, ph, pw) is an element in the pooled output
// can be parallelized using omp
// #pragma omp parallel for num_threads(32)
for
(
int
n
=
0
;
n
<
n_rois
;
n
++
)
{
int
index_n
=
n
*
channels
*
pooled_width
*
pooled_height
;
const
float
*
current_roi
=
rois
+
n
*
6
;
int
roi_batch_ind
=
current_roi
[
0
];
// Do not use rounding; this implementation detail is critical
float
offset
=
aligned
?
(
float
)
0.5
:
(
float
)
0.0
;
float
roi_center_w
=
current_roi
[
1
]
*
spatial_scale
-
offset
;
float
roi_center_h
=
current_roi
[
2
]
*
spatial_scale
-
offset
;
float
roi_width
=
current_roi
[
3
]
*
spatial_scale
;
float
roi_height
=
current_roi
[
4
]
*
spatial_scale
;
// float theta = current_roi[5] * M_PI / 180.0;
float
theta
=
current_roi
[
5
];
// Radian angle by default
if
(
clockwise
)
{
theta
=
-
theta
;
}
float
cos_theta
=
cos
(
theta
);
float
sin_theta
=
sin
(
theta
);
if
(
!
aligned
)
{
// for backward-compatibility only
roi_width
=
std
::
max
(
roi_width
,
(
float
)
1.
);
roi_height
=
std
::
max
(
roi_height
,
(
float
)
1.
);
}
float
bin_size_h
=
static_cast
<
float
>
(
roi_height
)
/
static_cast
<
float
>
(
pooled_height
);
float
bin_size_w
=
static_cast
<
float
>
(
roi_width
)
/
static_cast
<
float
>
(
pooled_width
);
// We use roi_bin_grid to sample the grid and mimic integral
int
roi_bin_grid_h
=
(
sampling_ratio
>
0
)
?
sampling_ratio
:
ceil
(
roi_height
/
pooled_height
);
// e.g., = 2
int
roi_bin_grid_w
=
(
sampling_ratio
>
0
)
?
sampling_ratio
:
ceil
(
roi_width
/
pooled_width
);
// We do average (integral) pooling inside a bin
const
float
count
=
std
::
max
(
roi_bin_grid_h
*
roi_bin_grid_w
,
1
);
// e.g. = 4
// we want to precalculate indices and weights shared by all channels,
// this is the key point of optimization
std
::
vector
<
PreCalc
>
pre_calc
(
roi_bin_grid_h
*
roi_bin_grid_w
*
pooled_width
*
pooled_height
);
// roi_start_h and roi_start_w are computed wrt the center of RoI (x, y).
// Appropriate translation needs to be applied after.
float
roi_start_h
=
-
roi_height
/
2.0
;
float
roi_start_w
=
-
roi_width
/
2.0
;
pre_calc_for_bilinear_interpolate
(
height
,
width
,
pooled_height
,
pooled_width
,
roi_bin_grid_h
,
roi_bin_grid_w
,
roi_start_h
,
roi_start_w
,
bin_size_h
,
bin_size_w
,
roi_bin_grid_h
,
roi_bin_grid_w
,
roi_center_h
,
roi_center_w
,
cos_theta
,
sin_theta
,
pre_calc
);
for
(
int
c
=
0
;
c
<
channels
;
c
++
)
{
int
index_n_c
=
index_n
+
c
*
pooled_width
*
pooled_height
;
const
float
*
offset_input
=
input
+
(
roi_batch_ind
*
channels
+
c
)
*
height
*
width
;
int
pre_calc_index
=
0
;
for
(
int
ph
=
0
;
ph
<
pooled_height
;
ph
++
)
{
for
(
int
pw
=
0
;
pw
<
pooled_width
;
pw
++
)
{
int
index
=
index_n_c
+
ph
*
pooled_width
+
pw
;
float
output_val
=
0.
;
for
(
int
iy
=
0
;
iy
<
roi_bin_grid_h
;
iy
++
)
{
for
(
int
ix
=
0
;
ix
<
roi_bin_grid_w
;
ix
++
)
{
PreCalc
pc
=
pre_calc
[
pre_calc_index
];
output_val
+=
pc
.
w1
*
offset_input
[
pc
.
pos1
]
+
pc
.
w2
*
offset_input
[
pc
.
pos2
]
+
pc
.
w3
*
offset_input
[
pc
.
pos3
]
+
pc
.
w4
*
offset_input
[
pc
.
pos4
];
pre_calc_index
+=
1
;
}
}
output_val
/=
count
;
output
[
index
]
=
output_val
;
}
// for pw
}
// for ph
}
// for c
}
// for n
}
void
MMCVRoIAlignRotatedKernel
::
Compute
(
OrtKernelContext
*
context
)
{
// Setup inputs
const
OrtValue
*
input_X
=
ort_
.
KernelContext_GetInput
(
context
,
0
);
const
float
*
X_data
=
reinterpret_cast
<
const
float
*>
(
ort_
.
GetTensorData
<
float
>
(
input_X
));
const
OrtValue
*
input_rois
=
ort_
.
KernelContext_GetInput
(
context
,
1
);
const
float
*
rois
=
reinterpret_cast
<
const
float
*>
(
ort_
.
GetTensorData
<
const
float
*>
(
input_rois
));
// Setup output
OrtTensorDimensions
out_dimensions
(
ort_
,
input_X
);
OrtTensorDimensions
roi_dimensions
(
ort_
,
input_rois
);
int
batch_size
=
out_dimensions
.
data
()[
0
];
int
input_channels
=
out_dimensions
.
data
()[
1
];
int
input_height
=
out_dimensions
.
data
()[
2
];
int
input_width
=
out_dimensions
.
data
()[
3
];
out_dimensions
.
data
()[
0
]
=
roi_dimensions
.
data
()[
0
];
out_dimensions
.
data
()[
2
]
=
aligned_height_
;
out_dimensions
.
data
()[
3
]
=
aligned_width_
;
OrtValue
*
output
=
ort_
.
KernelContext_GetOutput
(
context
,
0
,
out_dimensions
.
data
(),
out_dimensions
.
size
());
float
*
out
=
ort_
.
GetTensorMutableData
<
float
>
(
output
);
OrtTensorTypeAndShapeInfo
*
output_info
=
ort_
.
GetTensorTypeAndShape
(
output
);
ort_
.
ReleaseTensorTypeAndShapeInfo
(
output_info
);
// TODO: forward here
int
output_size
=
out_dimensions
.
data
()[
0
];
for
(
auto
i
=
1
;
i
<
out_dimensions
.
size
();
++
i
)
{
output_size
*=
out_dimensions
.
data
()[
i
];
}
ROIAlignRotatedForwardCPU
(
output_size
,
X_data
,
rois
,
out
,
spatial_scale_
,
aligned_
,
clockwise_
,
input_channels
,
input_height
,
input_width
,
aligned_height_
,
aligned_width_
,
sampling_ratio_
);
}
mmcv/ops/csrc/onnxruntime/cpu/soft_nms.cpp
0 → 100644
View file @
6f3c5f1c
// Copyright (c) OpenMMLab. All rights reserved
#include "soft_nms.h"
#include <assert.h>
#include <algorithm>
#include <cmath>
#include "../ort_mmcv_utils.h"
SoftNmsKernel
::
SoftNmsKernel
(
OrtApi
api
,
const
OrtKernelInfo
*
info
)
:
api_
(
api
),
ort_
(
api_
),
info_
(
info
)
{
iou_threshold_
=
ort_
.
KernelInfoGetAttribute
<
float
>
(
info
,
"iou_threshold"
);
sigma_
=
ort_
.
KernelInfoGetAttribute
<
float
>
(
info
,
"sigma"
);
min_score_
=
ort_
.
KernelInfoGetAttribute
<
float
>
(
info
,
"min_score"
);
method_
=
ort_
.
KernelInfoGetAttribute
<
int64_t
>
(
info
,
"method"
);
offset_
=
ort_
.
KernelInfoGetAttribute
<
int64_t
>
(
info
,
"offset"
);
// create allocator
allocator_
=
Ort
::
AllocatorWithDefaultOptions
();
}
void
SoftNmsKernel
::
Compute
(
OrtKernelContext
*
context
)
{
typedef
float
T
;
const
T
iou_threshold
=
T
(
iou_threshold_
);
const
T
sigma
=
T
(
sigma_
);
const
T
min_score
=
T
(
min_score_
);
const
int
method
=
int
(
method_
);
const
T
offset
=
T
(
offset_
);
const
OrtValue
*
boxes
=
ort_
.
KernelContext_GetInput
(
context
,
0
);
const
T
*
boxes_data
=
reinterpret_cast
<
const
float
*>
(
ort_
.
GetTensorData
<
T
>
(
boxes
));
const
OrtValue
*
scores
=
ort_
.
KernelContext_GetInput
(
context
,
1
);
const
T
*
scores_data
=
reinterpret_cast
<
const
float
*>
(
ort_
.
GetTensorData
<
T
>
(
scores
));
OrtTensorDimensions
boxes_dim
(
ort_
,
boxes
);
OrtTensorDimensions
scores_dim
(
ort_
,
scores
);
int64_t
nboxes
=
boxes_dim
[
0
];
assert
(
boxes_dim
[
1
]
==
4
);
// allocate tmp memory
T
*
tmp_boxes
=
(
T
*
)
allocator_
.
Alloc
(
sizeof
(
T
)
*
nboxes
*
4
);
T
*
x1
=
tmp_boxes
;
T
*
y1
=
tmp_boxes
+
1
;
T
*
x2
=
tmp_boxes
+
2
;
T
*
y2
=
tmp_boxes
+
3
;
T
*
sc
=
(
T
*
)
allocator_
.
Alloc
(
sizeof
(
T
)
*
nboxes
);
T
*
areas
=
(
T
*
)
allocator_
.
Alloc
(
sizeof
(
T
)
*
nboxes
);
T
*
de
=
(
T
*
)
allocator_
.
Alloc
(
sizeof
(
T
)
*
nboxes
*
5
);
int64_t
*
inds
=
(
int64_t
*
)
allocator_
.
Alloc
(
sizeof
(
int64_t
)
*
nboxes
);
memcpy
(
tmp_boxes
,
boxes_data
,
sizeof
(
T
)
*
nboxes
*
4
);
memcpy
(
sc
,
scores_data
,
sizeof
(
T
)
*
nboxes
);
// init inds as arange(nboxes)
std
::
generate
(
inds
,
inds
+
nboxes
,
[
n
=
0
]()
mutable
{
return
n
++
;
});
// area = (x2-x1+offset)*(y2-y1+offset)
for
(
int64_t
i
=
0
;
i
<
nboxes
;
i
++
)
{
areas
[
i
]
=
(
x2
[
i
*
4
]
-
x1
[
i
*
4
]
+
offset
)
*
(
y2
[
i
*
4
]
-
y1
[
i
*
4
]
+
offset
);
}
int64_t
pos
=
0
;
for
(
int64_t
i
=
0
;
i
<
nboxes
;
i
++
)
{
auto
max_score
=
sc
[
i
];
auto
max_pos
=
i
;
pos
=
i
+
1
;
// get max box
while
(
pos
<
nboxes
)
{
if
(
max_score
<
sc
[
pos
])
{
max_score
=
sc
[
pos
];
max_pos
=
pos
;
}
pos
=
pos
+
1
;
}
// swap
auto
ix1
=
de
[
i
*
5
+
0
]
=
x1
[
max_pos
*
4
];
auto
iy1
=
de
[
i
*
5
+
1
]
=
y1
[
max_pos
*
4
];
auto
ix2
=
de
[
i
*
5
+
2
]
=
x2
[
max_pos
*
4
];
auto
iy2
=
de
[
i
*
5
+
3
]
=
y2
[
max_pos
*
4
];
auto
iscore
=
de
[
i
*
5
+
4
]
=
sc
[
max_pos
];
auto
iarea
=
areas
[
max_pos
];
auto
iind
=
inds
[
max_pos
];
x1
[
max_pos
*
4
]
=
x1
[
i
*
4
];
y1
[
max_pos
*
4
]
=
y1
[
i
*
4
];
x2
[
max_pos
*
4
]
=
x2
[
i
*
4
];
y2
[
max_pos
*
4
]
=
y2
[
i
*
4
];
sc
[
max_pos
]
=
sc
[
i
];
areas
[
max_pos
]
=
areas
[
i
];
inds
[
max_pos
]
=
inds
[
i
];
x1
[
i
*
4
]
=
ix1
;
y1
[
i
*
4
]
=
iy1
;
x2
[
i
*
4
]
=
ix2
;
y2
[
i
*
4
]
=
iy2
;
sc
[
i
]
=
iscore
;
areas
[
i
]
=
iarea
;
inds
[
i
]
=
iind
;
pos
=
i
+
1
;
while
(
pos
<
nboxes
)
{
auto
xx1
=
std
::
max
(
ix1
,
x1
[
pos
*
4
]);
auto
yy1
=
std
::
max
(
iy1
,
y1
[
pos
*
4
]);
auto
xx2
=
std
::
min
(
ix2
,
x2
[
pos
*
4
]);
auto
yy2
=
std
::
min
(
iy2
,
y2
[
pos
*
4
]);
auto
w
=
std
::
max
(
0.
f
,
xx2
-
xx1
+
offset
);
auto
h
=
std
::
max
(
0.
f
,
yy2
-
yy1
+
offset
);
auto
inter
=
w
*
h
;
auto
ovr
=
inter
/
(
iarea
+
areas
[
pos
]
-
inter
);
float
weight
=
1.
;
if
(
method
==
0
)
{
if
(
ovr
>=
iou_threshold
)
weight
=
0
;
}
else
if
(
method
==
1
)
{
if
(
ovr
>=
iou_threshold
)
weight
=
1
-
ovr
;
}
else
if
(
method
==
2
)
{
weight
=
std
::
exp
(
-
(
ovr
*
ovr
)
/
sigma
);
}
sc
[
pos
]
*=
weight
;
// if box score falls below threshold, discard the box by
// swapping with last box update N
if
(
sc
[
pos
]
<
min_score
)
{
x1
[
pos
*
4
]
=
x1
[(
nboxes
-
1
)
*
4
];
y1
[
pos
*
4
]
=
y1
[(
nboxes
-
1
)
*
4
];
x2
[
pos
*
4
]
=
x2
[(
nboxes
-
1
)
*
4
];
y2
[
pos
*
4
]
=
y2
[(
nboxes
-
1
)
*
4
];
sc
[
pos
]
=
sc
[
nboxes
-
1
];
areas
[
pos
]
=
areas
[
nboxes
-
1
];
inds
[
pos
]
=
inds
[
nboxes
-
1
];
nboxes
=
nboxes
-
1
;
pos
=
pos
-
1
;
}
pos
=
pos
+
1
;
}
}
std
::
vector
<
int64_t
>
dets_dim
({
nboxes
,
5
});
OrtValue
*
dets
=
ort_
.
KernelContext_GetOutput
(
context
,
0
,
dets_dim
.
data
(),
dets_dim
.
size
());
T
*
dets_data
=
ort_
.
GetTensorMutableData
<
T
>
(
dets
);
std
::
vector
<
int64_t
>
inds_dim
({
nboxes
});
OrtValue
*
inds_ov
=
ort_
.
KernelContext_GetOutput
(
context
,
1
,
inds_dim
.
data
(),
inds_dim
.
size
());
int64_t
*
inds_data
=
ort_
.
GetTensorMutableData
<
int64_t
>
(
inds_ov
);
memcpy
(
dets_data
,
de
,
sizeof
(
T
)
*
nboxes
*
5
);
memcpy
(
inds_data
,
inds
,
sizeof
(
int64_t
)
*
nboxes
);
}
mmcv/ops/csrc/onnxruntime/deform_conv.h
0 → 100644
View file @
6f3c5f1c
// Copyright (c) OpenMMLab. All rights reserved
#ifndef ONNXRUNTIME_DEFORM_CONV_H
#define ONNXRUNTIME_DEFORM_CONV_H
#include <onnxruntime_cxx_api.h>
struct
MMCVDeformConvKernel
{
MMCVDeformConvKernel
(
OrtApi
api
,
const
OrtKernelInfo
*
info
);
void
Compute
(
OrtKernelContext
*
context
);
protected:
OrtApi
api_
;
Ort
::
CustomOpApi
ort_
;
const
OrtKernelInfo
*
info_
;
Ort
::
AllocatorWithDefaultOptions
allocator_
;
int64_t
stride_height_
;
int64_t
stride_width_
;
int64_t
padding_height_
;
int64_t
padding_width_
;
int64_t
dilation_height_
;
int64_t
dilation_width_
;
int64_t
deformable_group_
;
int64_t
group_
;
int64_t
im2col_step_
;
};
struct
MMCVDeformConvOp
:
Ort
::
CustomOpBase
<
MMCVDeformConvOp
,
MMCVDeformConvKernel
>
{
void
*
CreateKernel
(
OrtApi
api
,
const
OrtKernelInfo
*
info
)
const
{
return
new
MMCVDeformConvKernel
(
api
,
info
);
}
const
char
*
GetName
()
const
{
return
"MMCVDeformConv2d"
;
};
size_t
GetInputTypeCount
()
const
{
return
3
;
};
ONNXTensorElementDataType
GetInputType
(
size_t
/*index*/
)
const
{
return
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT
;
};
OrtCustomOpInputOutputCharacteristic
GetInputCharacteristic
(
size_t
index
)
const
{
return
OrtCustomOpInputOutputCharacteristic
::
INPUT_OUTPUT_REQUIRED
;
}
size_t
GetOutputTypeCount
()
const
{
return
1
;
};
ONNXTensorElementDataType
GetOutputType
(
size_t
/*index*/
)
const
{
return
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT
;
};
// force cpu
const
char
*
GetExecutionProviderType
()
const
{
return
"CPUExecutionProvider"
;
};
};
#endif
mmcv/ops/csrc/onnxruntime/grid_sample.h
0 → 100644
View file @
6f3c5f1c
// Copyright (c) OpenMMLab. All rights reserved
#ifndef ONNXRUNTIME_GRIDSAMPLE_H
#define ONNXRUNTIME_GRIDSAMPLE_H
#include <onnxruntime_cxx_api.h>
struct
GridSampleKernel
{
GridSampleKernel
(
OrtApi
api
,
const
OrtKernelInfo
*
info
);
void
Compute
(
OrtKernelContext
*
context
);
protected:
OrtApi
api_
;
Ort
::
CustomOpApi
ort_
;
const
OrtKernelInfo
*
info_
;
Ort
::
AllocatorWithDefaultOptions
allocator_
;
int64_t
align_corners_
;
int64_t
interpolation_mode_
;
int64_t
padding_mode_
;
};
struct
GridSampleOp
:
Ort
::
CustomOpBase
<
GridSampleOp
,
GridSampleKernel
>
{
void
*
CreateKernel
(
OrtApi
api
,
const
OrtKernelInfo
*
info
)
const
{
return
new
GridSampleKernel
(
api
,
info
);
};
const
char
*
GetName
()
const
{
return
"grid_sampler"
;
};
size_t
GetInputTypeCount
()
const
{
return
2
;
};
ONNXTensorElementDataType
GetInputType
(
size_t
/*index*/
)
const
{
return
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT
;
};
size_t
GetOutputTypeCount
()
const
{
return
1
;
};
ONNXTensorElementDataType
GetOutputType
(
size_t
/*index*/
)
const
{
return
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT
;
};
const
char
*
GetExecutionProviderType
()
const
{
return
"CPUExecutionProvider"
;
};
};
#endif
mmcv/ops/csrc/onnxruntime/modulated_deform_conv.h
0 → 100644
View file @
6f3c5f1c
// Copyright (c) OpenMMLab. All rights reserved
#ifndef ONNXRUNTIME_MODULATED_DEFORM_CONV_H
#define ONNXRUNTIME_MODULATED_DEFORM_CONV_H
#include <onnxruntime_cxx_api.h>
struct
MMCVModulatedDeformConvKernel
{
MMCVModulatedDeformConvKernel
(
OrtApi
api
,
const
OrtKernelInfo
*
info
);
void
Compute
(
OrtKernelContext
*
context
);
protected:
OrtApi
api_
;
Ort
::
CustomOpApi
ort_
;
const
OrtKernelInfo
*
info_
;
Ort
::
AllocatorWithDefaultOptions
allocator_
;
int64_t
stride_height_
;
int64_t
stride_width_
;
int64_t
padding_height_
;
int64_t
padding_width_
;
int64_t
dilation_height_
;
int64_t
dilation_width_
;
int64_t
deformable_group_
;
int64_t
group_
;
};
struct
MMCVModulatedDeformConvOp
:
Ort
::
CustomOpBase
<
MMCVModulatedDeformConvOp
,
MMCVModulatedDeformConvKernel
>
{
void
*
CreateKernel
(
OrtApi
api
,
const
OrtKernelInfo
*
info
)
const
{
return
new
MMCVModulatedDeformConvKernel
(
api
,
info
);
}
const
char
*
GetName
()
const
{
return
"MMCVModulatedDeformConv2d"
;
};
size_t
GetInputTypeCount
()
const
{
return
5
;
};
ONNXTensorElementDataType
GetInputType
(
size_t
/*index*/
)
const
{
return
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT
;
};
OrtCustomOpInputOutputCharacteristic
GetInputCharacteristic
(
size_t
index
)
const
{
// The last input (index == 4) is optional, which is bias
if
(
index
==
4
)
return
OrtCustomOpInputOutputCharacteristic
::
INPUT_OUTPUT_OPTIONAL
;
return
OrtCustomOpInputOutputCharacteristic
::
INPUT_OUTPUT_REQUIRED
;
}
size_t
GetOutputTypeCount
()
const
{
return
1
;
};
ONNXTensorElementDataType
GetOutputType
(
size_t
/*index*/
)
const
{
return
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT
;
};
// force cpu
const
char
*
GetExecutionProviderType
()
const
{
return
"CPUExecutionProvider"
;
};
};
#endif
mmcv/ops/csrc/onnxruntime/nms.h
0 → 100644
View file @
6f3c5f1c
// Copyright (c) OpenMMLab. All rights reserved
#ifndef ONNXRUNTIME_NMS_H
#define ONNXRUNTIME_NMS_H
#include <onnxruntime_cxx_api.h>
struct
NmsKernel
{
NmsKernel
(
OrtApi
api
,
const
OrtKernelInfo
*
info
);
void
Compute
(
OrtKernelContext
*
context
);
protected:
OrtApi
api_
;
Ort
::
CustomOpApi
ort_
;
const
OrtKernelInfo
*
info_
;
Ort
::
AllocatorWithDefaultOptions
allocator_
;
float
iou_threshold_
;
int64_t
offset_
;
};
struct
NmsOp
:
Ort
::
CustomOpBase
<
NmsOp
,
NmsKernel
>
{
void
*
CreateKernel
(
OrtApi
api
,
const
OrtKernelInfo
*
info
)
const
{
return
new
NmsKernel
(
api
,
info
);
};
const
char
*
GetName
()
const
{
return
"NonMaxSuppression"
;
};
size_t
GetInputTypeCount
()
const
{
return
2
;
};
ONNXTensorElementDataType
GetInputType
(
size_t
/*index*/
)
const
{
return
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT
;
};
size_t
GetOutputTypeCount
()
const
{
return
1
;
};
ONNXTensorElementDataType
GetOutputType
(
size_t
index
)
const
{
return
ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64
;
}
// force cpu
const
char
*
GetExecutionProviderType
()
const
{
return
"CPUExecutionProvider"
;
}
};
#endif
mmcv/ops/csrc/onnxruntime/onnxruntime_register.h
0 → 100644
View file @
6f3c5f1c
// Copyright (c) OpenMMLab. All rights reserved
#ifndef ONNXRUNTIME_REGISTER_H
#define ONNXRUNTIME_REGISTER_H
#include <onnxruntime_c_api.h>
#ifdef __cplusplus
extern
"C"
{
#endif
OrtStatus
*
ORT_API_CALL
RegisterCustomOps
(
OrtSessionOptions
*
options
,
const
OrtApiBase
*
api
);
#ifdef __cplusplus
}
#endif
#endif // ONNXRUNTIME_REGISTER_H
mmcv/ops/csrc/onnxruntime/onnxruntime_session_options_config_keys.h
0 → 100644
View file @
6f3c5f1c
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#ifndef ONNXRUNTIME_SESSION_OPTIONS_CONFIG_KEYS_H
#define ONNXRUNTIME_SESSION_OPTIONS_CONFIG_KEYS_H
/*
* This file defines SessionOptions Config Keys and format of the Config Values.
*
* The Naming Convention for a SessionOptions Config Key,
* "[Area][.[SubArea1].[SubArea2]...].[Keyname]"
* Such as "ep.cuda.use_arena"
* The Config Key cannot be empty
* The maximum length of the Config Key is 128
*
* The string format of a SessionOptions Config Value is defined individually
* for each Config. The maximum length of the Config Value is 1024
*/
// Key for disable PrePacking,
// If the config value is set to "1" then the prepacking is disabled, otherwise
// prepacking is enabled (default value)
static
const
char
*
const
kOrtSessionOptionsConfigDisablePrepacking
=
"session.disable_prepacking"
;
// A value of "1" means allocators registered in the env will be used. "0" means
// the allocators created in the session will be used. Use this to override the
// usage of env allocators on a per session level.
static
const
char
*
const
kOrtSessionOptionsConfigUseEnvAllocators
=
"session.use_env_allocators"
;
// Set to 'ORT' (case sensitive) to load an ORT format model.
// If unset, model type will default to ONNX unless inferred from filename
// ('.ort' == ORT format) or bytes to be ORT
static
const
char
*
const
kOrtSessionOptionsConfigLoadModelFormat
=
"session.load_model_format"
;
// Set to 'ORT' (case sensitive) to save optimized model in ORT format when
// SessionOptions.optimized_model_path is set. If unset, format will default to
// ONNX unless optimized_model_filepath ends in '.ort'.
static
const
char
*
const
kOrtSessionOptionsConfigSaveModelFormat
=
"session.save_model_format"
;
#endif // ONNXRUNTIME_SESSION_OPTIONS_CONFIG_KEYS_H
mmcv/ops/csrc/onnxruntime/ort_mmcv_utils.h
0 → 100644
View file @
6f3c5f1c
// Copyright (c) OpenMMLab. All rights reserved
#ifndef ORT_MMCV_UTILS_H
#define ORT_MMCV_UTILS_H
#include <onnxruntime_cxx_api.h>
#include <vector>
struct
OrtTensorDimensions
:
std
::
vector
<
int64_t
>
{
OrtTensorDimensions
(
Ort
::
CustomOpApi
ort
,
const
OrtValue
*
value
)
{
OrtTensorTypeAndShapeInfo
*
info
=
ort
.
GetTensorTypeAndShape
(
value
);
std
::
vector
<
int64_t
>::
operator
=
(
ort
.
GetTensorShape
(
info
));
ort
.
ReleaseTensorTypeAndShapeInfo
(
info
);
}
};
#endif // ORT_MMCV_UTILS_H
mmcv/ops/csrc/onnxruntime/reduce_ops.h
0 → 100644
View file @
6f3c5f1c
// Copyright (c) OpenMMLab. All rights reserved
#ifndef ONNXRUNTIME_REDUCE_OPS_H
#define ONNXRUNTIME_REDUCE_OPS_H
#include <onnxruntime_cxx_api.h>
struct
MMCVCumMaxKernel
{
public:
MMCVCumMaxKernel
(
Ort
::
CustomOpApi
ort
,
const
OrtKernelInfo
*
info
)
:
ort_
(
ort
)
{
dim_
=
ort_
.
KernelInfoGetAttribute
<
int64_t
>
(
info
,
"dim"
);
// create allocator
allocator_
=
Ort
::
AllocatorWithDefaultOptions
();
}
void
Compute
(
OrtKernelContext
*
context
);
private:
Ort
::
CustomOpApi
ort_
;
Ort
::
AllocatorWithDefaultOptions
allocator_
;
int64_t
dim_
;
};
struct
MMCVCumMinKernel
{
public:
MMCVCumMinKernel
(
Ort
::
CustomOpApi
ort
,
const
OrtKernelInfo
*
info
)
:
ort_
(
ort
)
{
dim_
=
ort_
.
KernelInfoGetAttribute
<
int64_t
>
(
info
,
"dim"
);
// create allocator
allocator_
=
Ort
::
AllocatorWithDefaultOptions
();
}
void
Compute
(
OrtKernelContext
*
context
);
private:
Ort
::
CustomOpApi
ort_
;
Ort
::
AllocatorWithDefaultOptions
allocator_
;
int64_t
dim_
;
};
struct
MMCVCumMaxCustomOp
:
Ort
::
CustomOpBase
<
MMCVCumMaxCustomOp
,
MMCVCumMaxKernel
>
{
void
*
CreateKernel
(
Ort
::
CustomOpApi
api
,
const
OrtKernelInfo
*
info
)
const
{
return
new
MMCVCumMaxKernel
(
api
,
info
);
}
const
char
*
GetName
()
const
{
return
"cummax"
;
}
size_t
GetInputTypeCount
()
const
{
return
1
;
}
ONNXTensorElementDataType
GetInputType
(
size_t
)
const
{
return
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT
;
};
size_t
GetOutputTypeCount
()
const
{
return
2
;
}
ONNXTensorElementDataType
GetOutputType
(
size_t
index
)
const
{
if
(
index
==
1
)
return
ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64
;
return
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT
;
};
// force cpu
const
char
*
GetExecutionProviderType
()
const
{
return
"CPUExecutionProvider"
;
};
};
struct
MMCVCumMinCustomOp
:
Ort
::
CustomOpBase
<
MMCVCumMinCustomOp
,
MMCVCumMinKernel
>
{
void
*
CreateKernel
(
Ort
::
CustomOpApi
api
,
const
OrtKernelInfo
*
info
)
const
{
return
new
MMCVCumMinKernel
(
api
,
info
);
}
const
char
*
GetName
()
const
{
return
"cummin"
;
}
size_t
GetInputTypeCount
()
const
{
return
1
;
}
ONNXTensorElementDataType
GetInputType
(
size_t
)
const
{
return
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT
;
};
size_t
GetOutputTypeCount
()
const
{
return
2
;
}
ONNXTensorElementDataType
GetOutputType
(
size_t
index
)
const
{
if
(
index
==
1
)
return
ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64
;
return
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT
;
};
// force cpu
const
char
*
GetExecutionProviderType
()
const
{
return
"CPUExecutionProvider"
;
};
};
#endif // ONNXRUNTIME_REDUCE_OPS_H
mmcv/ops/csrc/onnxruntime/roi_align.h
0 → 100644
View file @
6f3c5f1c
// Copyright (c) OpenMMLab. All rights reserved
#ifndef ONNXRUNTIME_ROI_ALIGN_H
#define ONNXRUNTIME_ROI_ALIGN_H
#include <assert.h>
#include <onnxruntime_cxx_api.h>
#include <cmath>
#include <mutex>
#include <string>
#include <vector>
struct
MMCVRoiAlignKernel
{
public:
MMCVRoiAlignKernel
(
Ort
::
CustomOpApi
ort
,
const
OrtKernelInfo
*
info
)
:
ort_
(
ort
)
{
aligned_
=
ort_
.
KernelInfoGetAttribute
<
int64_t
>
(
info
,
"aligned"
);
aligned_height_
=
ort_
.
KernelInfoGetAttribute
<
int64_t
>
(
info
,
"output_height"
);
aligned_width_
=
ort_
.
KernelInfoGetAttribute
<
int64_t
>
(
info
,
"output_width"
);
pool_mode_
=
ort_
.
KernelInfoGetAttribute
<
std
::
string
>
(
info
,
"mode"
);
sampling_ratio_
=
ort_
.
KernelInfoGetAttribute
<
int64_t
>
(
info
,
"sampling_ratio"
);
spatial_scale_
=
ort_
.
KernelInfoGetAttribute
<
float
>
(
info
,
"spatial_scale"
);
}
void
Compute
(
OrtKernelContext
*
context
);
private:
Ort
::
CustomOpApi
ort_
;
int
aligned_height_
;
int
aligned_width_
;
float
spatial_scale_
;
int
sampling_ratio_
;
std
::
string
pool_mode_
;
int
aligned_
;
};
struct
MMCVRoiAlignCustomOp
:
Ort
::
CustomOpBase
<
MMCVRoiAlignCustomOp
,
MMCVRoiAlignKernel
>
{
void
*
CreateKernel
(
Ort
::
CustomOpApi
api
,
const
OrtKernelInfo
*
info
)
const
{
return
new
MMCVRoiAlignKernel
(
api
,
info
);
}
const
char
*
GetName
()
const
{
return
"MMCVRoiAlign"
;
}
size_t
GetInputTypeCount
()
const
{
return
2
;
}
ONNXTensorElementDataType
GetInputType
(
size_t
)
const
{
return
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT
;
}
size_t
GetOutputTypeCount
()
const
{
return
1
;
}
ONNXTensorElementDataType
GetOutputType
(
size_t
)
const
{
return
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT
;
}
// force cpu
const
char
*
GetExecutionProviderType
()
const
{
return
"CPUExecutionProvider"
;
}
};
#endif // ONNXRUNTIME_ROI_ALIGN_H
mmcv/ops/csrc/onnxruntime/roi_align_rotated.h
0 → 100644
View file @
6f3c5f1c
// Copyright (c) OpenMMLab. All rights reserved
#ifndef ONNXRUNTIME_ROI_ALIGN_ROTATED_H
#define ONNXRUNTIME_ROI_ALIGN_ROTATED_H
#include <assert.h>
#include <onnxruntime_cxx_api.h>
#include <cmath>
#include <mutex>
#include <string>
#include <vector>
struct
MMCVRoIAlignRotatedKernel
{
public:
MMCVRoIAlignRotatedKernel
(
Ort
::
CustomOpApi
ort
,
const
OrtKernelInfo
*
info
)
:
ort_
(
ort
)
{
aligned_height_
=
ort_
.
KernelInfoGetAttribute
<
int64_t
>
(
info
,
"output_height"
);
aligned_width_
=
ort_
.
KernelInfoGetAttribute
<
int64_t
>
(
info
,
"output_width"
);
sampling_ratio_
=
ort_
.
KernelInfoGetAttribute
<
int64_t
>
(
info
,
"sampling_ratio"
);
spatial_scale_
=
ort_
.
KernelInfoGetAttribute
<
float
>
(
info
,
"spatial_scale"
);
aligned_
=
ort_
.
KernelInfoGetAttribute
<
int64_t
>
(
info
,
"aligned"
);
clockwise_
=
ort_
.
KernelInfoGetAttribute
<
int64_t
>
(
info
,
"clockwise"
);
}
void
Compute
(
OrtKernelContext
*
context
);
private:
Ort
::
CustomOpApi
ort_
;
int
aligned_height_
;
int
aligned_width_
;
float
spatial_scale_
;
int
sampling_ratio_
;
int
aligned_
;
int
clockwise_
;
};
struct
MMCVRoIAlignRotatedCustomOp
:
Ort
::
CustomOpBase
<
MMCVRoIAlignRotatedCustomOp
,
MMCVRoIAlignRotatedKernel
>
{
void
*
CreateKernel
(
Ort
::
CustomOpApi
api
,
const
OrtKernelInfo
*
info
)
const
{
return
new
MMCVRoIAlignRotatedKernel
(
api
,
info
);
}
const
char
*
GetName
()
const
{
return
"MMCVRoIAlignRotated"
;
}
size_t
GetInputTypeCount
()
const
{
return
2
;
}
ONNXTensorElementDataType
GetInputType
(
size_t
)
const
{
return
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT
;
}
size_t
GetOutputTypeCount
()
const
{
return
1
;
}
ONNXTensorElementDataType
GetOutputType
(
size_t
)
const
{
return
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT
;
}
// force cpu
const
char
*
GetExecutionProviderType
()
const
{
return
"CPUExecutionProvider"
;
}
};
#endif // ONNXRUNTIME_ROI_ALIGN_ROTATED_H
mmcv/ops/csrc/onnxruntime/soft_nms.h
0 → 100644
View file @
6f3c5f1c
// Copyright (c) OpenMMLab. All rights reserved
#ifndef ONNXRUNTIME_SOFT_NMS_H
#define ONNXRUNTIME_SOFT_NMS_H
#include <onnxruntime_cxx_api.h>
struct
SoftNmsKernel
{
SoftNmsKernel
(
OrtApi
api
,
const
OrtKernelInfo
*
info
);
void
Compute
(
OrtKernelContext
*
context
);
protected:
OrtApi
api_
;
Ort
::
CustomOpApi
ort_
;
const
OrtKernelInfo
*
info_
;
Ort
::
AllocatorWithDefaultOptions
allocator_
;
float
iou_threshold_
;
float
sigma_
;
float
min_score_
;
int64_t
method_
;
int64_t
offset_
;
};
struct
SoftNmsOp
:
Ort
::
CustomOpBase
<
SoftNmsOp
,
SoftNmsKernel
>
{
void
*
CreateKernel
(
OrtApi
api
,
const
OrtKernelInfo
*
info
)
const
{
return
new
SoftNmsKernel
(
api
,
info
);
};
const
char
*
GetName
()
const
{
return
"SoftNonMaxSuppression"
;
};
size_t
GetInputTypeCount
()
const
{
return
2
;
};
ONNXTensorElementDataType
GetInputType
(
size_t
/*index*/
)
const
{
return
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT
;
};
size_t
GetOutputTypeCount
()
const
{
return
2
;
};
ONNXTensorElementDataType
GetOutputType
(
size_t
index
)
const
{
if
(
index
==
1
)
{
return
ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64
;
}
return
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT
;
};
// force cpu
const
char
*
GetExecutionProviderType
()
const
{
return
"CPUExecutionProvider"
;
};
};
#endif // ONNXRUNTIME_SOFT_NMS_H
mmcv/ops/csrc/parrots/active_rotated_filter.cpp
deleted
100644 → 0
View file @
6f674c7e
// Copyright (c) OpenMMLab. All rights reserved.
// Modified from
// https://github.com/csuhan/s2anet/blob/master/mmdet/ops/orn/src/ActiveRotatingFilter.h
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
void
active_rotated_filter_forward_impl
(
const
Tensor
input
,
const
Tensor
indices
,
Tensor
output
)
{
DISPATCH_DEVICE_IMPL
(
active_rotated_filter_forward_impl
,
input
,
indices
,
output
);
}
void
active_rotated_filter_backward_impl
(
const
Tensor
grad_out
,
const
Tensor
indices
,
Tensor
grad_in
)
{
DISPATCH_DEVICE_IMPL
(
active_rotated_filter_backward_impl
,
grad_out
,
indices
,
grad_in
);
}
void
active_rotated_filter_forward
(
const
Tensor
input
,
const
Tensor
indices
,
Tensor
output
)
{
active_rotated_filter_forward_impl
(
input
,
indices
,
output
);
}
void
active_rotated_filter_backward
(
const
Tensor
grad_out
,
const
Tensor
indices
,
Tensor
grad_in
)
{
active_rotated_filter_backward_impl
(
grad_out
,
indices
,
grad_in
);
}
mmcv/ops/csrc/parrots/active_rotated_filter_parrots.cpp
deleted
100644 → 0
View file @
6f674c7e
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>
#include "active_rotated_filter_pytorch.h"
using
namespace
parrots
;
#ifdef MMCV_WITH_CUDA
void
active_rotated_filter_forward_cuda_parrots
(
CudaContext
&
ctx
,
const
SSElement
&
attr
,
const
OperatorBase
::
in_list_t
&
ins
,
OperatorBase
::
out_list_t
&
outs
)
{
auto
input
=
buildATensor
(
ctx
,
ins
[
0
]);
auto
indices
=
buildATensor
(
ctx
,
ins
[
1
]);
auto
output
=
buildATensor
(
ctx
,
outs
[
0
]);
active_rotated_filter_forward
(
input
,
indices
,
output
);
}
void
active_rotated_filter_backward_cuda_parrots
(
CudaContext
&
ctx
,
const
SSElement
&
attr
,
const
OperatorBase
::
in_list_t
&
ins
,
OperatorBase
::
out_list_t
&
outs
)
{
auto
grad_out
=
buildATensor
(
ctx
,
ins
[
0
]);
auto
indices
=
buildATensor
(
ctx
,
ins
[
1
]);
auto
grad_in
=
buildATensor
(
ctx
,
outs
[
0
]);
active_rotated_filter_backward
(
grad_out
,
indices
,
grad_in
);
}
#endif
void
active_rotated_filter_forward_cpu_parrots
(
HostContext
&
ctx
,
const
SSElement
&
attr
,
const
OperatorBase
::
in_list_t
&
ins
,
OperatorBase
::
out_list_t
&
outs
)
{
auto
input
=
buildATensor
(
ctx
,
ins
[
0
]);
auto
indices
=
buildATensor
(
ctx
,
ins
[
1
]);
auto
output
=
buildATensor
(
ctx
,
outs
[
0
]);
active_rotated_filter_forward
(
input
,
indices
,
output
);
}
void
active_rotated_filter_backward_cpu_parrots
(
HostContext
&
ctx
,
const
SSElement
&
attr
,
const
OperatorBase
::
in_list_t
&
ins
,
OperatorBase
::
out_list_t
&
outs
)
{
auto
grad_out
=
buildATensor
(
ctx
,
ins
[
0
]);
auto
indices
=
buildATensor
(
ctx
,
ins
[
1
]);
auto
grad_in
=
buildATensor
(
ctx
,
outs
[
0
]);
active_rotated_filter_backward
(
grad_out
,
indices
,
grad_in
);
}
PARROTS_EXTENSION_REGISTER
(
active_rotated_filter_forward
)
.
input
(
2
)
.
output
(
1
)
.
apply
(
active_rotated_filter_forward_cpu_parrots
)
#ifdef MMCV_WITH_CUDA
.
apply
(
active_rotated_filter_forward_cuda_parrots
)
#endif
.
done
();
PARROTS_EXTENSION_REGISTER
(
active_rotated_filter_backward
)
.
input
(
2
)
.
output
(
1
)
.
apply
(
active_rotated_filter_backward_cpu_parrots
)
#ifdef MMCV_WITH_CUDA
.
apply
(
active_rotated_filter_backward_cuda_parrots
)
#endif
.
done
();
mmcv/ops/csrc/parrots/active_rotated_filter_pytorch.h
deleted
100644 → 0
View file @
6f674c7e
// Copyright (c) OpenMMLab. All rights reserved
#ifndef ACTIVE_ROTATED_FILTER_PYTORCH_H
#define ACTIVE_ROTATED_FILTER_PYTORCH_H
#include <torch/extension.h>
using
namespace
at
;
void
active_rotated_filter_forward
(
const
Tensor
input
,
const
Tensor
indices
,
Tensor
output
);
void
active_rotated_filter_backward
(
const
Tensor
grad_out
,
const
Tensor
indices
,
Tensor
grad_in
);
#endif // ACTIVE_ROTATED_FILTER_PYTORCH_H
mmcv/ops/csrc/parrots/assign_score_withk.cpp
View file @
6f3c5f1c
// Modified from
// https://github.com/CVMI-Lab/PAConv/tree/main/scene_seg/lib/paconv_lib/src/gpu
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
void
assign_score_withk_forward_impl
(
int
B
,
int
N0
,
int
N1
,
int
M
,
int
K
,
int
O
,
#ifdef MMCV_WITH_CUDA
void
AssignScoreWithKForwardCUDAKernelLauncher
(
int
B
,
int
N0
,
int
N1
,
int
M
,
int
K
,
int
O
,
int
aggregate
,
const
Tensor
&
points
,
const
Tensor
&
centers
,
const
Tensor
&
scores
,
const
Tensor
&
knn_idx
,
Tensor
&
output
);
void
assign_score_withk_forward_cuda
(
int
B
,
int
N0
,
int
N1
,
int
M
,
int
K
,
int
O
,
int
aggregate
,
const
Tensor
&
points
,
const
Tensor
&
centers
,
const
Tensor
&
scores
,
const
Tensor
&
knn_idx
,
Tensor
&
output
)
{
DISPATCH_DEVICE_IMPL
(
assign_score_withk_forward_impl
,
B
,
N0
,
N1
,
M
,
K
,
O
,
aggregate
,
points
,
centers
,
scores
,
knn_idx
,
output
);
}
AssignScoreWithKForwardCUDAKernelLauncher
(
B
,
N0
,
N1
,
M
,
K
,
O
,
aggregate
,
points
,
centers
,
scores
,
knn_idx
,
output
);
};
void
AssignScoreWithKBackwardCUDAKernelLauncher
(
int
B
,
int
N0
,
int
N1
,
int
M
,
int
K
,
int
O
,
int
aggregate
,
const
Tensor
&
grad_out
,
const
Tensor
&
points
,
const
Tensor
&
centers
,
const
Tensor
&
scores
,
const
Tensor
&
knn_idx
,
Tensor
&
grad_points
,
Tensor
&
grad_centers
,
Tensor
&
grad_scores
);
void
assign_score_withk_backward_
impl
(
void
assign_score_withk_backward_
cuda
(
int
B
,
int
N0
,
int
N1
,
int
M
,
int
K
,
int
O
,
int
aggregate
,
const
Tensor
&
grad_out
,
const
Tensor
&
points
,
const
Tensor
&
centers
,
const
Tensor
&
scores
,
const
Tensor
&
knn_idx
,
Tensor
&
grad_points
,
Tensor
&
grad_centers
,
Tensor
&
grad_scores
)
{
DISPATCH_DEVICE_IMPL
(
assign_score_withk_backward_impl
,
B
,
N0
,
N1
,
M
,
K
,
O
,
aggregate
,
grad_out
,
points
,
centers
,
scores
,
knn_idx
,
grad_points
,
grad_centers
,
grad_scores
);
}
AssignScoreWithKBackwardCUDAKernelLauncher
(
B
,
N0
,
N1
,
M
,
K
,
O
,
aggregate
,
grad_out
,
points
,
centers
,
scores
,
knn_idx
,
grad_points
,
grad_centers
,
grad_scores
);
};
#endif
void
assign_score_withk_forward
(
const
Tensor
&
points
,
const
Tensor
&
centers
,
const
Tensor
&
scores
,
const
Tensor
&
knn_idx
,
Tensor
&
output
,
int
B
,
int
N0
,
int
N1
,
int
M
,
int
K
,
int
O
,
int
aggregate
)
{
assign_score_withk_forward_impl
(
B
,
N0
,
N1
,
M
,
K
,
O
,
aggregate
,
points
,
centers
,
scores
,
knn_idx
,
output
);
if
(
points
.
device
().
is_cuda
())
{
#ifdef MMCV_WITH_CUDA
CHECK_CONTIGUOUS
(
points
);
CHECK_CONTIGUOUS
(
centers
);
CHECK_CONTIGUOUS
(
scores
);
CHECK_CONTIGUOUS
(
knn_idx
);
CHECK_CONTIGUOUS
(
output
);
assign_score_withk_forward_cuda
(
B
,
N0
,
N1
,
M
,
K
,
O
,
aggregate
,
points
,
centers
,
scores
,
knn_idx
,
output
);
#else
AT_ERROR
(
"assign_score_withk is not compiled with GPU support"
);
#endif
}
else
{
AT_ERROR
(
"assign_score_withk is not implemented on CPU"
);
}
}
void
assign_score_withk_backward
(
const
Tensor
&
grad_out
,
const
Tensor
&
points
,
...
...
@@ -36,7 +62,24 @@ void assign_score_withk_backward(const Tensor& grad_out, const Tensor& points,
Tensor
&
grad_centers
,
Tensor
&
grad_scores
,
int
B
,
int
N0
,
int
N1
,
int
M
,
int
K
,
int
O
,
int
aggregate
)
{
assign_score_withk_backward_impl
(
B
,
N0
,
N1
,
M
,
K
,
O
,
aggregate
,
grad_out
,
points
,
centers
,
scores
,
knn_idx
,
grad_points
,
grad_centers
,
grad_scores
);
if
(
grad_points
.
device
().
is_cuda
())
{
#ifdef MMCV_WITH_CUDA
CHECK_CONTIGUOUS
(
grad_out
);
CHECK_CONTIGUOUS
(
scores
);
CHECK_CONTIGUOUS
(
points
);
CHECK_CONTIGUOUS
(
centers
);
CHECK_CONTIGUOUS
(
knn_idx
);
CHECK_CONTIGUOUS
(
grad_scores
);
CHECK_CONTIGUOUS
(
grad_points
);
CHECK_CONTIGUOUS
(
grad_centers
);
assign_score_withk_backward_cuda
(
B
,
N0
,
N1
,
M
,
K
,
O
,
aggregate
,
grad_out
,
points
,
centers
,
scores
,
knn_idx
,
grad_points
,
grad_centers
,
grad_scores
);
#else
AT_ERROR
(
"assign_score_withk is not compiled with GPU support"
);
#endif
}
else
{
AT_ERROR
(
"assign_score_withk is not implemented on CPU"
);
}
}
Prev
1
…
12
13
14
15
16
17
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment