Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
MMCV
Commits
2e5628b4
You need to sign in or sign up before continuing.
Unverified
Commit
2e5628b4
authored
Aug 26, 2022
by
q.yao
Committed by
GitHub
Aug 26, 2022
Browse files
[Refactor]: Remove deployment for dev-2.x (#2225)
* remove deploy for 2.0 * update onnx ut
parent
961373ad
Changes
94
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
0 additions
and
3468 deletions
+0
-3468
mmcv/ops/csrc/onnxruntime/ort_mmcv_utils.h
mmcv/ops/csrc/onnxruntime/ort_mmcv_utils.h
+0
-15
mmcv/ops/csrc/onnxruntime/reduce_ops.h
mmcv/ops/csrc/onnxruntime/reduce_ops.h
+0
-95
mmcv/ops/csrc/onnxruntime/roi_align.h
mmcv/ops/csrc/onnxruntime/roi_align.h
+0
-62
mmcv/ops/csrc/onnxruntime/roi_align_rotated.h
mmcv/ops/csrc/onnxruntime/roi_align_rotated.h
+0
-62
mmcv/ops/csrc/onnxruntime/rotated_feature_align.h
mmcv/ops/csrc/onnxruntime/rotated_feature_align.h
+0
-50
mmcv/ops/csrc/onnxruntime/soft_nms.h
mmcv/ops/csrc/onnxruntime/soft_nms.h
+0
-49
mmcv/ops/csrc/tensorrt/plugins/trt_corner_pool.cpp
mmcv/ops/csrc/tensorrt/plugins/trt_corner_pool.cpp
+0
-217
mmcv/ops/csrc/tensorrt/plugins/trt_corner_pool_kernel.cu
mmcv/ops/csrc/tensorrt/plugins/trt_corner_pool_kernel.cu
+0
-110
mmcv/ops/csrc/tensorrt/plugins/trt_cuda_helper.cu
mmcv/ops/csrc/tensorrt/plugins/trt_cuda_helper.cu
+0
-91
mmcv/ops/csrc/tensorrt/plugins/trt_cummaxmin.cpp
mmcv/ops/csrc/tensorrt/plugins/trt_cummaxmin.cpp
+0
-242
mmcv/ops/csrc/tensorrt/plugins/trt_cummaxmin_kernel.cu
mmcv/ops/csrc/tensorrt/plugins/trt_cummaxmin_kernel.cu
+0
-90
mmcv/ops/csrc/tensorrt/plugins/trt_deform_conv.cpp
mmcv/ops/csrc/tensorrt/plugins/trt_deform_conv.cpp
+0
-318
mmcv/ops/csrc/tensorrt/plugins/trt_deform_conv_kernel.cu
mmcv/ops/csrc/tensorrt/plugins/trt_deform_conv_kernel.cu
+0
-129
mmcv/ops/csrc/tensorrt/plugins/trt_grid_sampler.cpp
mmcv/ops/csrc/tensorrt/plugins/trt_grid_sampler.cpp
+0
-256
mmcv/ops/csrc/tensorrt/plugins/trt_grid_sampler_kernel.cu
mmcv/ops/csrc/tensorrt/plugins/trt_grid_sampler_kernel.cu
+0
-441
mmcv/ops/csrc/tensorrt/plugins/trt_instance_norm.cpp
mmcv/ops/csrc/tensorrt/plugins/trt_instance_norm.cpp
+0
-246
mmcv/ops/csrc/tensorrt/plugins/trt_modulated_deform_conv.cpp
mmcv/ops/csrc/tensorrt/plugins/trt_modulated_deform_conv.cpp
+0
-308
mmcv/ops/csrc/tensorrt/plugins/trt_modulated_deform_conv_kernel.cu
...csrc/tensorrt/plugins/trt_modulated_deform_conv_kernel.cu
+0
-134
mmcv/ops/csrc/tensorrt/plugins/trt_nms.cpp
mmcv/ops/csrc/tensorrt/plugins/trt_nms.cpp
+0
-279
mmcv/ops/csrc/tensorrt/plugins/trt_nms_kernel.cu
mmcv/ops/csrc/tensorrt/plugins/trt_nms_kernel.cu
+0
-274
No files found.
mmcv/ops/csrc/onnxruntime/ort_mmcv_utils.h
deleted
100644 → 0
View file @
961373ad
// Copyright (c) OpenMMLab. All rights reserved
#ifndef ORT_MMCV_UTILS_H
#define ORT_MMCV_UTILS_H
#include <onnxruntime_cxx_api.h>
#include <vector>
struct
OrtTensorDimensions
:
std
::
vector
<
int64_t
>
{
OrtTensorDimensions
(
Ort
::
CustomOpApi
ort
,
const
OrtValue
*
value
)
{
OrtTensorTypeAndShapeInfo
*
info
=
ort
.
GetTensorTypeAndShape
(
value
);
std
::
vector
<
int64_t
>::
operator
=
(
ort
.
GetTensorShape
(
info
));
ort
.
ReleaseTensorTypeAndShapeInfo
(
info
);
}
};
#endif // ORT_MMCV_UTILS_H
mmcv/ops/csrc/onnxruntime/reduce_ops.h
deleted
100644 → 0
View file @
961373ad
// Copyright (c) OpenMMLab. All rights reserved
#ifndef ONNXRUNTIME_REDUCE_OPS_H
#define ONNXRUNTIME_REDUCE_OPS_H
#include <onnxruntime_cxx_api.h>
struct
MMCVCumMaxKernel
{
public:
MMCVCumMaxKernel
(
Ort
::
CustomOpApi
ort
,
const
OrtKernelInfo
*
info
)
:
ort_
(
ort
)
{
dim_
=
ort_
.
KernelInfoGetAttribute
<
int64_t
>
(
info
,
"dim"
);
// create allocator
allocator_
=
Ort
::
AllocatorWithDefaultOptions
();
}
void
Compute
(
OrtKernelContext
*
context
);
private:
Ort
::
CustomOpApi
ort_
;
Ort
::
AllocatorWithDefaultOptions
allocator_
;
int64_t
dim_
;
};
struct
MMCVCumMinKernel
{
public:
MMCVCumMinKernel
(
Ort
::
CustomOpApi
ort
,
const
OrtKernelInfo
*
info
)
:
ort_
(
ort
)
{
dim_
=
ort_
.
KernelInfoGetAttribute
<
int64_t
>
(
info
,
"dim"
);
// create allocator
allocator_
=
Ort
::
AllocatorWithDefaultOptions
();
}
void
Compute
(
OrtKernelContext
*
context
);
private:
Ort
::
CustomOpApi
ort_
;
Ort
::
AllocatorWithDefaultOptions
allocator_
;
int64_t
dim_
;
};
struct
MMCVCumMaxCustomOp
:
Ort
::
CustomOpBase
<
MMCVCumMaxCustomOp
,
MMCVCumMaxKernel
>
{
void
*
CreateKernel
(
Ort
::
CustomOpApi
api
,
const
OrtKernelInfo
*
info
)
const
{
return
new
MMCVCumMaxKernel
(
api
,
info
);
}
const
char
*
GetName
()
const
{
return
"cummax"
;
}
size_t
GetInputTypeCount
()
const
{
return
1
;
}
ONNXTensorElementDataType
GetInputType
(
size_t
)
const
{
return
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT
;
};
size_t
GetOutputTypeCount
()
const
{
return
2
;
}
ONNXTensorElementDataType
GetOutputType
(
size_t
index
)
const
{
if
(
index
==
1
)
return
ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64
;
return
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT
;
};
// force cpu
const
char
*
GetExecutionProviderType
()
const
{
return
"CPUExecutionProvider"
;
};
};
struct
MMCVCumMinCustomOp
:
Ort
::
CustomOpBase
<
MMCVCumMinCustomOp
,
MMCVCumMinKernel
>
{
void
*
CreateKernel
(
Ort
::
CustomOpApi
api
,
const
OrtKernelInfo
*
info
)
const
{
return
new
MMCVCumMinKernel
(
api
,
info
);
}
const
char
*
GetName
()
const
{
return
"cummin"
;
}
size_t
GetInputTypeCount
()
const
{
return
1
;
}
ONNXTensorElementDataType
GetInputType
(
size_t
)
const
{
return
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT
;
};
size_t
GetOutputTypeCount
()
const
{
return
2
;
}
ONNXTensorElementDataType
GetOutputType
(
size_t
index
)
const
{
if
(
index
==
1
)
return
ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64
;
return
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT
;
};
// force cpu
const
char
*
GetExecutionProviderType
()
const
{
return
"CPUExecutionProvider"
;
};
};
#endif // ONNXRUNTIME_REDUCE_OPS_H
mmcv/ops/csrc/onnxruntime/roi_align.h
deleted
100644 → 0
View file @
961373ad
// Copyright (c) OpenMMLab. All rights reserved
#ifndef ONNXRUNTIME_ROI_ALIGN_H
#define ONNXRUNTIME_ROI_ALIGN_H
#include <assert.h>
#include <onnxruntime_cxx_api.h>
#include <cmath>
#include <mutex>
#include <string>
#include <vector>
struct
MMCVRoiAlignKernel
{
public:
MMCVRoiAlignKernel
(
Ort
::
CustomOpApi
ort
,
const
OrtKernelInfo
*
info
)
:
ort_
(
ort
)
{
aligned_
=
ort_
.
KernelInfoGetAttribute
<
int64_t
>
(
info
,
"aligned"
);
aligned_height_
=
ort_
.
KernelInfoGetAttribute
<
int64_t
>
(
info
,
"output_height"
);
aligned_width_
=
ort_
.
KernelInfoGetAttribute
<
int64_t
>
(
info
,
"output_width"
);
pool_mode_
=
ort_
.
KernelInfoGetAttribute
<
std
::
string
>
(
info
,
"mode"
);
sampling_ratio_
=
ort_
.
KernelInfoGetAttribute
<
int64_t
>
(
info
,
"sampling_ratio"
);
spatial_scale_
=
ort_
.
KernelInfoGetAttribute
<
float
>
(
info
,
"spatial_scale"
);
}
void
Compute
(
OrtKernelContext
*
context
);
private:
Ort
::
CustomOpApi
ort_
;
int
aligned_height_
;
int
aligned_width_
;
float
spatial_scale_
;
int
sampling_ratio_
;
std
::
string
pool_mode_
;
int
aligned_
;
};
struct
MMCVRoiAlignCustomOp
:
Ort
::
CustomOpBase
<
MMCVRoiAlignCustomOp
,
MMCVRoiAlignKernel
>
{
void
*
CreateKernel
(
Ort
::
CustomOpApi
api
,
const
OrtKernelInfo
*
info
)
const
{
return
new
MMCVRoiAlignKernel
(
api
,
info
);
}
const
char
*
GetName
()
const
{
return
"MMCVRoiAlign"
;
}
size_t
GetInputTypeCount
()
const
{
return
2
;
}
ONNXTensorElementDataType
GetInputType
(
size_t
)
const
{
return
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT
;
}
size_t
GetOutputTypeCount
()
const
{
return
1
;
}
ONNXTensorElementDataType
GetOutputType
(
size_t
)
const
{
return
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT
;
}
// force cpu
const
char
*
GetExecutionProviderType
()
const
{
return
"CPUExecutionProvider"
;
}
};
#endif // ONNXRUNTIME_ROI_ALIGN_H
mmcv/ops/csrc/onnxruntime/roi_align_rotated.h
deleted
100644 → 0
View file @
961373ad
// Copyright (c) OpenMMLab. All rights reserved
#ifndef ONNXRUNTIME_ROI_ALIGN_ROTATED_H
#define ONNXRUNTIME_ROI_ALIGN_ROTATED_H
#include <assert.h>
#include <onnxruntime_cxx_api.h>
#include <cmath>
#include <mutex>
#include <string>
#include <vector>
struct
MMCVRoIAlignRotatedKernel
{
public:
MMCVRoIAlignRotatedKernel
(
Ort
::
CustomOpApi
ort
,
const
OrtKernelInfo
*
info
)
:
ort_
(
ort
)
{
aligned_height_
=
ort_
.
KernelInfoGetAttribute
<
int64_t
>
(
info
,
"output_height"
);
aligned_width_
=
ort_
.
KernelInfoGetAttribute
<
int64_t
>
(
info
,
"output_width"
);
sampling_ratio_
=
ort_
.
KernelInfoGetAttribute
<
int64_t
>
(
info
,
"sampling_ratio"
);
spatial_scale_
=
ort_
.
KernelInfoGetAttribute
<
float
>
(
info
,
"spatial_scale"
);
aligned_
=
ort_
.
KernelInfoGetAttribute
<
int64_t
>
(
info
,
"aligned"
);
clockwise_
=
ort_
.
KernelInfoGetAttribute
<
int64_t
>
(
info
,
"clockwise"
);
}
void
Compute
(
OrtKernelContext
*
context
);
private:
Ort
::
CustomOpApi
ort_
;
int
aligned_height_
;
int
aligned_width_
;
float
spatial_scale_
;
int
sampling_ratio_
;
int
aligned_
;
int
clockwise_
;
};
struct
MMCVRoIAlignRotatedCustomOp
:
Ort
::
CustomOpBase
<
MMCVRoIAlignRotatedCustomOp
,
MMCVRoIAlignRotatedKernel
>
{
void
*
CreateKernel
(
Ort
::
CustomOpApi
api
,
const
OrtKernelInfo
*
info
)
const
{
return
new
MMCVRoIAlignRotatedKernel
(
api
,
info
);
}
const
char
*
GetName
()
const
{
return
"MMCVRoIAlignRotated"
;
}
size_t
GetInputTypeCount
()
const
{
return
2
;
}
ONNXTensorElementDataType
GetInputType
(
size_t
)
const
{
return
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT
;
}
size_t
GetOutputTypeCount
()
const
{
return
1
;
}
ONNXTensorElementDataType
GetOutputType
(
size_t
)
const
{
return
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT
;
}
// force cpu
const
char
*
GetExecutionProviderType
()
const
{
return
"CPUExecutionProvider"
;
}
};
#endif // ONNXRUNTIME_ROI_ALIGN_ROTATED_H
mmcv/ops/csrc/onnxruntime/rotated_feature_align.h
deleted
100644 → 0
View file @
961373ad
#ifndef ONNXRUNTIME_ROTATED_FEATURE_ALIGN_H
#define ONNXRUNTIME_ROTATED_FEATURE_ALIGN_H
#include <onnxruntime_cxx_api.h>
#include <cmath>
struct
MMCVRotatedFeatureAlignKernel
{
public:
MMCVRotatedFeatureAlignKernel
(
Ort
::
CustomOpApi
ort
,
const
OrtKernelInfo
*
info
)
:
ort_
(
ort
)
{
spatial_scale_
=
ort_
.
KernelInfoGetAttribute
<
float
>
(
info
,
"spatial_scale"
);
points_
=
ort_
.
KernelInfoGetAttribute
<
int64_t
>
(
info
,
"points"
);
}
void
Compute
(
OrtKernelContext
*
context
);
private:
Ort
::
CustomOpApi
ort_
;
float
spatial_scale_
;
int
points_
;
};
struct
MMCVRotatedFeatureAlignCustomOp
:
Ort
::
CustomOpBase
<
MMCVRotatedFeatureAlignCustomOp
,
MMCVRotatedFeatureAlignKernel
>
{
void
*
CreateKernel
(
Ort
::
CustomOpApi
api
,
const
OrtKernelInfo
*
info
)
const
{
return
new
MMCVRotatedFeatureAlignKernel
(
api
,
info
);
}
const
char
*
GetName
()
const
{
return
"MMCVRotatedFeatureAlign"
;
}
size_t
GetInputTypeCount
()
const
{
return
2
;
}
ONNXTensorElementDataType
GetInputType
(
size_t
)
const
{
return
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT
;
}
size_t
GetOutputTypeCount
()
const
{
return
1
;
}
ONNXTensorElementDataType
GetOutputType
(
size_t
)
const
{
return
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT
;
}
// force cpu
const
char
*
GetExecutionProviderType
()
const
{
return
"CPUExecutionProvider"
;
}
};
#endif // ONNXRUNTIME_ROTATED_FEATURE_ALIGN_H
mmcv/ops/csrc/onnxruntime/soft_nms.h
deleted
100644 → 0
View file @
961373ad
// Copyright (c) OpenMMLab. All rights reserved
#ifndef ONNXRUNTIME_SOFT_NMS_H
#define ONNXRUNTIME_SOFT_NMS_H
#include <onnxruntime_cxx_api.h>
struct
SoftNmsKernel
{
SoftNmsKernel
(
OrtApi
api
,
const
OrtKernelInfo
*
info
);
void
Compute
(
OrtKernelContext
*
context
);
protected:
OrtApi
api_
;
Ort
::
CustomOpApi
ort_
;
const
OrtKernelInfo
*
info_
;
Ort
::
AllocatorWithDefaultOptions
allocator_
;
float
iou_threshold_
;
float
sigma_
;
float
min_score_
;
int64_t
method_
;
int64_t
offset_
;
};
struct
SoftNmsOp
:
Ort
::
CustomOpBase
<
SoftNmsOp
,
SoftNmsKernel
>
{
void
*
CreateKernel
(
OrtApi
api
,
const
OrtKernelInfo
*
info
)
const
{
return
new
SoftNmsKernel
(
api
,
info
);
};
const
char
*
GetName
()
const
{
return
"SoftNonMaxSuppression"
;
};
size_t
GetInputTypeCount
()
const
{
return
2
;
};
ONNXTensorElementDataType
GetInputType
(
size_t
/*index*/
)
const
{
return
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT
;
};
size_t
GetOutputTypeCount
()
const
{
return
2
;
};
ONNXTensorElementDataType
GetOutputType
(
size_t
index
)
const
{
if
(
index
==
1
)
{
return
ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64
;
}
return
ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT
;
};
// force cpu
const
char
*
GetExecutionProviderType
()
const
{
return
"CPUExecutionProvider"
;
};
};
#endif // ONNXRUNTIME_SOFT_NMS_H
mmcv/ops/csrc/tensorrt/plugins/trt_corner_pool.cpp
deleted
100644 → 0
View file @
961373ad
// Copyright (c) OpenMMLab. All rights reserved
#include "trt_corner_pool.hpp"
#include <assert.h>
#include "trt_serialize.hpp"
void
CornerPoolForwardLauncher_float
(
const
float
*
input
,
float
*
output
,
const
int
batch_size
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
pool_type
,
cudaStream_t
stream
);
namespace
{
static
const
char
*
PLUGIN_VERSION
{
"1"
};
static
const
char
*
CORNER_POOL_PLUGIN_NAME
{
"MMCVCornerPool"
};
}
// namespace
CornerPoolPluginDynamic
::
CornerPoolPluginDynamic
(
const
std
::
string
&
name
,
TRT_CORNER_POOL_TYPE
poolType
)
:
mLayerName
(
name
),
mPoolType
(
poolType
)
{}
CornerPoolPluginDynamic
::
CornerPoolPluginDynamic
(
const
std
::
string
name
,
const
void
*
data
,
size_t
length
)
:
mLayerName
(
name
)
{
deserialize_value
(
&
data
,
&
length
,
&
mPoolType
);
}
CornerPoolPluginDynamic
::~
CornerPoolPluginDynamic
()
{}
nvinfer1
::
IPluginV2DynamicExt
*
CornerPoolPluginDynamic
::
clone
()
const
{
CornerPoolPluginDynamic
*
plugin
=
new
CornerPoolPluginDynamic
(
mLayerName
,
mPoolType
);
plugin
->
setPluginNamespace
(
getPluginNamespace
());
return
plugin
;
}
nvinfer1
::
DimsExprs
CornerPoolPluginDynamic
::
getOutputDimensions
(
int
outputIndex
,
const
nvinfer1
::
DimsExprs
*
inputs
,
int
nbInputs
,
nvinfer1
::
IExprBuilder
&
exprBuilder
)
{
return
inputs
[
0
];
}
bool
CornerPoolPluginDynamic
::
supportsFormatCombination
(
int
pos
,
const
nvinfer1
::
PluginTensorDesc
*
inOut
,
int
nbInputs
,
int
nbOutputs
)
{
switch
(
pos
)
{
// input[0]
case
0
:
return
inOut
[
pos
].
type
==
nvinfer1
::
DataType
::
kFLOAT
&&
inOut
[
pos
].
format
==
nvinfer1
::
TensorFormat
::
kLINEAR
;
// output[0]
case
1
:
return
inOut
[
pos
].
type
==
inOut
[
0
].
type
&&
inOut
[
pos
].
format
==
inOut
[
0
].
format
;
default:
return
false
;
}
}
void
CornerPoolPluginDynamic
::
configurePlugin
(
const
nvinfer1
::
DynamicPluginTensorDesc
*
inputs
,
int
nbInputs
,
const
nvinfer1
::
DynamicPluginTensorDesc
*
outputs
,
int
nbOutputs
)
{}
size_t
CornerPoolPluginDynamic
::
getWorkspaceSize
(
const
nvinfer1
::
PluginTensorDesc
*
inputs
,
int
nbInputs
,
const
nvinfer1
::
PluginTensorDesc
*
outputs
,
int
nbOutputs
)
const
{
int
sizeof_dtype
=
mmcv
::
getElementSize
(
outputs
[
0
].
type
);
}
int
CornerPoolPluginDynamic
::
enqueue
(
const
nvinfer1
::
PluginTensorDesc
*
inputDesc
,
const
nvinfer1
::
PluginTensorDesc
*
outputDesc
,
const
void
*
const
*
inputs
,
void
*
const
*
outputs
,
void
*
workSpace
,
cudaStream_t
stream
)
{
const
void
*
input
=
inputs
[
0
];
void
*
output_value
=
outputs
[
0
];
const
int
batch_size
=
inputDesc
[
0
].
dims
.
d
[
0
];
const
int
channels
=
inputDesc
[
0
].
dims
.
d
[
1
];
const
int
height
=
inputDesc
[
0
].
dims
.
d
[
2
];
const
int
width
=
inputDesc
[
0
].
dims
.
d
[
3
];
CornerPoolForwardLauncher_float
((
float
*
)
input
,
(
float
*
)
output_value
,
batch_size
,
channels
,
height
,
width
,
int
(
mPoolType
),
stream
);
return
0
;
}
nvinfer1
::
DataType
CornerPoolPluginDynamic
::
getOutputDataType
(
int
index
,
const
nvinfer1
::
DataType
*
inputTypes
,
int
nbInputs
)
const
{
return
inputTypes
[
0
];
}
// IPluginV2 Methods
const
char
*
CornerPoolPluginDynamic
::
getPluginType
()
const
{
switch
(
mPoolType
)
{
case
TRT_CORNER_POOL_TYPE
::
TRT_TOP_POOL
:
case
TRT_CORNER_POOL_TYPE
::
TRT_BOTTOM_POOL
:
case
TRT_CORNER_POOL_TYPE
::
TRT_LEFT_POOL
:
case
TRT_CORNER_POOL_TYPE
::
TRT_RIGHT_POOL
:
return
CORNER_POOL_PLUGIN_NAME
;
default:
return
"UnknownpoolType"
;
}
}
const
char
*
CornerPoolPluginDynamic
::
getPluginVersion
()
const
{
return
PLUGIN_VERSION
;
}
int
CornerPoolPluginDynamic
::
getNbOutputs
()
const
{
return
1
;
}
int
CornerPoolPluginDynamic
::
initialize
()
{
return
0
;
}
void
CornerPoolPluginDynamic
::
terminate
()
{}
size_t
CornerPoolPluginDynamic
::
getSerializationSize
()
const
{
return
sizeof
(
mPoolType
);
}
void
CornerPoolPluginDynamic
::
serialize
(
void
*
buffer
)
const
{
serialize_value
(
&
buffer
,
mPoolType
);
}
void
CornerPoolPluginDynamic
::
destroy
()
{
// This gets called when the network containing plugin is destroyed
delete
this
;
}
void
CornerPoolPluginDynamic
::
setPluginNamespace
(
const
char
*
libNamespace
)
{
mNamespace
=
libNamespace
;
}
const
char
*
CornerPoolPluginDynamic
::
getPluginNamespace
()
const
{
return
mNamespace
.
c_str
();
}
CornerPoolPluginDynamicCreator
::
CornerPoolPluginDynamicCreator
()
{
mPluginAttributes
.
clear
();
mPluginAttributes
.
emplace_back
(
nvinfer1
::
PluginField
(
"mode"
));
mFC
.
nbFields
=
mPluginAttributes
.
size
();
mFC
.
fields
=
mPluginAttributes
.
data
();
}
const
char
*
CornerPoolPluginDynamicCreator
::
getPluginName
()
const
{
return
CORNER_POOL_PLUGIN_NAME
;
}
const
char
*
CornerPoolPluginDynamicCreator
::
getPluginVersion
()
const
{
return
PLUGIN_VERSION
;
}
const
nvinfer1
::
PluginFieldCollection
*
CornerPoolPluginDynamicCreator
::
getFieldNames
()
{
return
&
mFC
;
}
nvinfer1
::
IPluginV2
*
CornerPoolPluginDynamicCreator
::
createPlugin
(
const
char
*
name
,
const
nvinfer1
::
PluginFieldCollection
*
fc
)
{
TRT_CORNER_POOL_TYPE
poolType
;
int
poolMode
=
-
1
;
for
(
int
i
=
0
;
i
<
fc
->
nbFields
;
i
++
)
{
if
(
fc
->
fields
[
i
].
data
==
nullptr
)
{
continue
;
}
std
::
string
field_name
(
fc
->
fields
[
i
].
name
);
if
(
field_name
.
compare
(
"mode"
)
==
0
)
{
poolMode
=
static_cast
<
const
int
*>
(
fc
->
fields
[
i
].
data
)[
0
];
}
}
assert
(
poolMode
>=
0
&&
poolMode
<=
3
);
switch
(
poolMode
)
{
case
0
:
poolType
=
TRT_CORNER_POOL_TYPE
::
TRT_TOP_POOL
;
break
;
case
1
:
poolType
=
TRT_CORNER_POOL_TYPE
::
TRT_BOTTOM_POOL
;
break
;
case
2
:
poolType
=
TRT_CORNER_POOL_TYPE
::
TRT_LEFT_POOL
;
break
;
case
3
:
poolType
=
TRT_CORNER_POOL_TYPE
::
TRT_RIGHT_POOL
;
break
;
default:
break
;
}
CornerPoolPluginDynamic
*
plugin
=
new
CornerPoolPluginDynamic
(
name
,
poolType
);
plugin
->
setPluginNamespace
(
getPluginNamespace
());
return
plugin
;
}
nvinfer1
::
IPluginV2
*
CornerPoolPluginDynamicCreator
::
deserializePlugin
(
const
char
*
name
,
const
void
*
serialData
,
size_t
serialLength
)
{
// This object will be deleted when the network is destroyed, which will
// call FCPluginDynamic::destroy()
auto
plugin
=
new
CornerPoolPluginDynamic
(
name
,
serialData
,
serialLength
);
plugin
->
setPluginNamespace
(
getPluginNamespace
());
return
plugin
;
}
void
CornerPoolPluginDynamicCreator
::
setPluginNamespace
(
const
char
*
libNamespace
)
{
mNamespace
=
libNamespace
;
}
const
char
*
CornerPoolPluginDynamicCreator
::
getPluginNamespace
()
const
{
return
mNamespace
.
c_str
();
}
mmcv/ops/csrc/tensorrt/plugins/trt_corner_pool_kernel.cu
deleted
100644 → 0
View file @
961373ad
// Copyright (c) OpenMMLab. All rights reserved
#include "common_cuda_helper.hpp"
#include "trt_cuda_helper.cuh"
#include "trt_plugin_helper.hpp"
template
<
typename
scalar_t
>
__global__
void
top_bottom_pool_kernel
(
const
scalar_t
*
input
,
scalar_t
*
output
,
const
int
batch_size
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
pool_type
)
{
const
int
nthreads
=
batch_size
*
channels
*
width
;
CUDA_1D_KERNEL_LOOP
(
index
,
nthreads
)
{
int
n_idx
=
index
/
(
channels
*
width
);
// batch
int
w_idx
=
index
%
width
;
// width
int
c_idx
=
(
index
/
width
)
%
channels
;
// channels
int
offset_n
=
n_idx
*
channels
*
width
*
height
;
int
offset_n_c
=
offset_n
+
c_idx
*
width
*
height
;
int
direction
=
-
1
;
// in [-1, 1], default for TopPool
int
index_start
=
height
-
2
;
// default for TopPool
// pool_type in [0, 1]
if
(
pool_type
==
0
)
{
// TopPool
// directly copy the most bottom value from input to output
output
[
offset_n_c
+
(
height
-
1
)
*
width
+
w_idx
]
=
input
[
offset_n_c
+
(
height
-
1
)
*
width
+
w_idx
];
}
else
{
// BottomPool
// directly copy the most top value from input to output
output
[
offset_n_c
+
w_idx
]
=
input
[
offset_n_c
+
w_idx
];
index_start
=
1
;
direction
=
1
;
}
// do pool
for
(
int
h
=
index_start
;
h
>=
0
&&
h
<
height
;
h
+=
direction
)
{
output
[
offset_n_c
+
h
*
width
+
w_idx
]
=
max
(
output
[
offset_n_c
+
(
h
-
direction
)
*
width
+
w_idx
],
input
[
offset_n_c
+
h
*
width
+
w_idx
]);
}
}
}
template
<
typename
scalar_t
>
__global__
void
left_right_pool_kernel
(
const
scalar_t
*
input
,
scalar_t
*
output
,
const
int
batch_size
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
pool_type
)
{
const
int
nthreads
=
batch_size
*
channels
*
height
;
CUDA_1D_KERNEL_LOOP
(
index
,
nthreads
)
{
int
n_idx
=
index
/
(
channels
*
height
);
// batch
int
h_idx
=
index
%
height
;
// height
int
c_idx
=
(
index
/
height
)
%
channels
;
// channels
int
offset_n
=
n_idx
*
channels
*
width
*
height
;
int
offset_n_c
=
offset_n
+
c_idx
*
width
*
height
;
int
offset_n_c_h
=
offset_n_c
+
h_idx
*
width
;
int
direction
=
-
1
;
// in [-1, 1], default for LeftPool
int
index_start
=
width
-
2
;
// default for LeftPool
// pool_type in [2, 3]
if
(
pool_type
==
2
)
{
// LeftPool
// directly copy the most right value from input to output
output
[
offset_n_c_h
+
width
-
1
]
=
input
[
offset_n_c_h
+
width
-
1
];
}
else
{
// RightPool
// directly copy the most left value from input to output
output
[
offset_n_c_h
]
=
input
[
offset_n_c_h
];
index_start
=
1
;
direction
=
1
;
}
// do pool
for
(
int
w
=
index_start
;
w
>=
0
&&
w
<
width
;
w
+=
direction
)
{
output
[
offset_n_c_h
+
w
]
=
max
(
output
[
offset_n_c_h
+
w
-
direction
],
input
[
offset_n_c_h
+
w
]);
}
}
}
template
<
typename
scalar_t
>
void
CornerPoolForwardLauncher
(
const
scalar_t
*
input
,
scalar_t
*
output
,
const
int
batch_size
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
pool_type
,
cudaStream_t
stream
)
{
int
nthreads
=
-
1
,
col_block
=
-
1
;
switch
(
pool_type
)
{
case
0
:
case
1
:
nthreads
=
batch_size
*
channels
*
width
;
col_block
=
GET_BLOCKS
(
nthreads
,
THREADS_PER_BLOCK
);
top_bottom_pool_kernel
<
scalar_t
>
<<<
col_block
,
THREADS_PER_BLOCK
,
0
,
stream
>>>
(
input
,
output
,
batch_size
,
channels
,
height
,
width
,
pool_type
);
break
;
case
2
:
case
3
:
nthreads
=
batch_size
*
channels
*
height
;
col_block
=
GET_BLOCKS
(
nthreads
,
THREADS_PER_BLOCK
);
left_right_pool_kernel
<
scalar_t
>
<<<
col_block
,
THREADS_PER_BLOCK
,
0
,
stream
>>>
(
input
,
output
,
batch_size
,
channels
,
height
,
width
,
pool_type
);
break
;
}
}
void
CornerPoolForwardLauncher_float
(
const
float
*
input
,
float
*
output
,
const
int
batch_size
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
pool_type
,
cudaStream_t
stream
)
{
CornerPoolForwardLauncher
<
float
>
(
input
,
output
,
batch_size
,
channels
,
height
,
width
,
pool_type
,
stream
);
}
mmcv/ops/csrc/tensorrt/plugins/trt_cuda_helper.cu
deleted
100644 → 0
View file @
961373ad
// Copyright (c) OpenMMLab. All rights reserved
#include <cublas_v2.h>
#include "common_cuda_helper.hpp"
#include "trt_cuda_helper.cuh"
#include "trt_plugin_helper.hpp"
using
mmcv
::
TensorDesc
;
template
<
class
scalar_t
>
__global__
void
copy_permute_kernel
(
scalar_t
*
dst
,
const
scalar_t
*
src
,
int
n
,
TensorDesc
ts_src_stride
,
TensorDesc
ts_dst_stride
,
TensorDesc
ts_permute
)
{
const
int
src_dim
=
ts_src_stride
.
dim
;
int
*
src_stride
=
&
(
ts_src_stride
.
stride
[
0
]);
int
*
dst_stride
=
&
(
ts_dst_stride
.
stride
[
0
]);
int
*
permute
=
&
(
ts_permute
.
shape
[
0
]);
CUDA_1D_KERNEL_LOOP
(
index
,
n
)
{
size_t
dst_index
=
index
;
size_t
src_index
=
0
;
for
(
int
i
=
0
;
i
<
src_dim
;
++
i
)
{
int
dim_index
=
dst_index
/
dst_stride
[
i
];
dst_index
=
dst_index
%
dst_stride
[
i
];
src_index
+=
dim_index
*
src_stride
[
permute
[
i
]];
}
dst
[
index
]
=
src
[
src_index
];
}
}
template
<
class
scalar_t
>
void
memcpyPermute
(
scalar_t
*
dst
,
const
scalar_t
*
src
,
int
*
src_size
,
int
*
permute
,
int
src_dim
,
cudaStream_t
stream
)
{
size_t
copy_size
=
1
;
TensorDesc
ts_permute
;
memcpy
(
&
(
ts_permute
.
shape
[
0
]),
permute
,
src_dim
*
sizeof
(
int
));
TensorDesc
ts_src_stride
;
TensorDesc
ts_dst_stride
;
ts_src_stride
.
dim
=
src_dim
;
ts_dst_stride
.
dim
=
src_dim
;
int
*
src_stride
=
&
(
ts_src_stride
.
stride
[
0
]);
int
*
dst_stride
=
&
(
ts_dst_stride
.
stride
[
0
]);
int
*
dst_size
=
&
(
ts_dst_stride
.
shape
[
0
]);
src_stride
[
src_dim
-
1
]
=
1
;
dst_stride
[
src_dim
-
1
]
=
1
;
for
(
int
i
=
src_dim
-
1
;
i
>=
0
;
--
i
)
{
dst_size
[
i
]
=
src_size
[
permute
[
i
]];
if
(
i
<
src_dim
-
1
)
{
src_stride
[
i
]
=
src_stride
[
i
+
1
]
*
src_size
[
i
+
1
];
}
}
for
(
int
i
=
src_dim
-
1
;
i
>=
0
;
--
i
)
{
copy_size
*=
dst_size
[
i
];
if
(
i
<
src_dim
-
1
)
{
dst_stride
[
i
]
=
dst_stride
[
i
+
1
]
*
dst_size
[
i
+
1
];
}
}
copy_permute_kernel
<
scalar_t
>
<<<
GET_BLOCKS
(
copy_size
),
THREADS_PER_BLOCK
,
0
,
stream
>>>
(
dst
,
src
,
copy_size
,
ts_src_stride
,
ts_dst_stride
,
ts_permute
);
}
template
void
memcpyPermute
<
float
>(
float
*
dst
,
const
float
*
src
,
int
*
src_size
,
int
*
permute
,
int
src_dim
,
cudaStream_t
stream
);
template
<
>
cublasStatus_t
cublasGemmWrap
<
float
>
(
cublasHandle_t
handle
,
cublasOperation_t
transa
,
cublasOperation_t
transb
,
int
m
,
int
n
,
int
k
,
const
float
*
alpha
,
const
float
*
A
,
int
lda
,
const
float
*
B
,
int
ldb
,
const
float
*
beta
,
float
*
C
,
int
ldc
)
{
return
cublasSgemm
(
handle
,
transa
,
transb
,
m
,
n
,
k
,
alpha
,
A
,
lda
,
B
,
ldb
,
beta
,
C
,
ldc
);
}
template
<
>
cublasStatus_t
cublasGemmWrap
<
half
>
(
cublasHandle_t
handle
,
cublasOperation_t
transa
,
cublasOperation_t
transb
,
int
m
,
int
n
,
int
k
,
const
half
*
alpha
,
const
half
*
A
,
int
lda
,
const
half
*
B
,
int
ldb
,
const
half
*
beta
,
half
*
C
,
int
ldc
)
{
return
cublasHgemm
(
handle
,
transa
,
transb
,
m
,
n
,
k
,
alpha
,
A
,
lda
,
B
,
ldb
,
beta
,
C
,
ldc
);
}
mmcv/ops/csrc/tensorrt/plugins/trt_cummaxmin.cpp
deleted
100644 → 0
View file @
961373ad
// Copyright (c) OpenMMLab. All rights reserved
#include "trt_cummaxmin.hpp"
#include <assert.h>
#include "trt_serialize.hpp"
void
CumMaxMinForwardLauncher_float
(
const
float
*
input
,
float
*
output_value
,
int
*
output_index
,
const
int
*
dims
,
int
nbDims
,
int
cum_dim
,
int
cum_type
,
cudaStream_t
stream
);
void
CumMaxMinForwardLauncher_int32
(
const
int
*
input
,
int
*
output_value
,
int
*
output_index
,
const
int
*
dims
,
int
nbDims
,
int
cum_dim
,
int
cum_type
,
cudaStream_t
stream
);
namespace
{
static
const
char
*
PLUGIN_VERSION
{
"1"
};
static
const
char
*
CUMMAXMIN_PLUGIN_NAME
{
"cummaxmin"
};
static
const
char
*
CUMMAX_PLUGIN_NAME
{
"cummax"
};
static
const
char
*
CUMMIN_PLUGIN_NAME
{
"cummin"
};
}
// namespace
CumMaxMinPluginDynamic
::
CumMaxMinPluginDynamic
(
const
std
::
string
&
name
,
int
dim
,
TRT_CUMCMPTYPE
cumType
)
:
mLayerName
(
name
),
mDim
(
dim
),
mCumType
(
cumType
)
{}
CumMaxMinPluginDynamic
::
CumMaxMinPluginDynamic
(
const
std
::
string
name
,
const
void
*
data
,
size_t
length
)
:
mLayerName
(
name
)
{
deserialize_value
(
&
data
,
&
length
,
&
mDim
);
deserialize_value
(
&
data
,
&
length
,
&
mCumType
);
}
CumMaxMinPluginDynamic
::~
CumMaxMinPluginDynamic
()
{}
nvinfer1
::
IPluginV2DynamicExt
*
CumMaxMinPluginDynamic
::
clone
()
const
{
CumMaxMinPluginDynamic
*
plugin
=
new
CumMaxMinPluginDynamic
(
mLayerName
,
mDim
,
mCumType
);
plugin
->
setPluginNamespace
(
getPluginNamespace
());
return
plugin
;
}
nvinfer1
::
DimsExprs
CumMaxMinPluginDynamic
::
getOutputDimensions
(
int
outputIndex
,
const
nvinfer1
::
DimsExprs
*
inputs
,
int
nbInputs
,
nvinfer1
::
IExprBuilder
&
exprBuilder
)
{
return
inputs
[
0
];
}
bool
CumMaxMinPluginDynamic
::
supportsFormatCombination
(
int
pos
,
const
nvinfer1
::
PluginTensorDesc
*
inOut
,
int
nbInputs
,
int
nbOutputs
)
{
switch
(
pos
)
{
// input[0]
case
0
:
return
(
inOut
[
pos
].
type
==
nvinfer1
::
DataType
::
kFLOAT
||
inOut
[
pos
].
type
==
nvinfer1
::
DataType
::
kINT32
)
&&
inOut
[
pos
].
format
==
nvinfer1
::
TensorFormat
::
kLINEAR
;
// output[0]
case
1
:
return
inOut
[
pos
].
type
==
inOut
[
0
].
type
&&
inOut
[
pos
].
format
==
inOut
[
0
].
format
;
// output[1]
case
2
:
return
inOut
[
pos
].
type
==
nvinfer1
::
DataType
::
kINT32
&&
inOut
[
pos
].
format
==
nvinfer1
::
TensorFormat
::
kLINEAR
;
default:
return
false
;
}
}
void
CumMaxMinPluginDynamic
::
configurePlugin
(
const
nvinfer1
::
DynamicPluginTensorDesc
*
inputs
,
int
nbInputs
,
const
nvinfer1
::
DynamicPluginTensorDesc
*
outputs
,
int
nbOutputs
)
{}
size_t
CumMaxMinPluginDynamic
::
getWorkspaceSize
(
const
nvinfer1
::
PluginTensorDesc
*
inputs
,
int
nbInputs
,
const
nvinfer1
::
PluginTensorDesc
*
outputs
,
int
nbOutputs
)
const
{
int
sizeof_dtype
=
mmcv
::
getElementSize
(
outputs
[
0
].
type
);
}
int
CumMaxMinPluginDynamic
::
enqueue
(
const
nvinfer1
::
PluginTensorDesc
*
inputDesc
,
const
nvinfer1
::
PluginTensorDesc
*
outputDesc
,
const
void
*
const
*
inputs
,
void
*
const
*
outputs
,
void
*
workSpace
,
cudaStream_t
stream
)
{
const
void
*
input
=
inputs
[
0
];
void
*
output_value
=
outputs
[
0
];
int
*
output_index
=
(
int
*
)
outputs
[
1
];
const
int
*
dims
=
&
(
inputDesc
[
0
].
dims
.
d
[
0
]);
int
nbDims
=
inputDesc
[
0
].
dims
.
nbDims
;
switch
(
inputDesc
[
0
].
type
)
{
case
nvinfer1
::
DataType
::
kFLOAT
:
CumMaxMinForwardLauncher_float
((
float
*
)
input
,
(
float
*
)
output_value
,
output_index
,
dims
,
nbDims
,
mDim
,
int
(
mCumType
),
stream
);
break
;
case
nvinfer1
::
DataType
::
kINT32
:
CumMaxMinForwardLauncher_int32
((
int
*
)
input
,
(
int
*
)
output_value
,
output_index
,
dims
,
nbDims
,
mDim
,
int
(
mCumType
),
stream
);
break
;
default:
break
;
}
return
0
;
}
nvinfer1
::
DataType
CumMaxMinPluginDynamic
::
getOutputDataType
(
int
index
,
const
nvinfer1
::
DataType
*
inputTypes
,
int
nbInputs
)
const
{
switch
(
index
)
{
case
0
:
return
inputTypes
[
0
];
case
1
:
return
nvinfer1
::
DataType
::
kINT32
;
default:
break
;
}
}
// IPluginV2 Methods
const
char
*
CumMaxMinPluginDynamic
::
getPluginType
()
const
{
switch
(
mCumType
)
{
case
TRT_CUMCMPTYPE
::
TRT_CUMMAX
:
return
CUMMAX_PLUGIN_NAME
;
case
TRT_CUMCMPTYPE
::
TRT_CUMMIN
:
return
CUMMIN_PLUGIN_NAME
;
default:
return
"UnknownCumType"
;
}
}
const
char
*
CumMaxMinPluginDynamic
::
getPluginVersion
()
const
{
return
PLUGIN_VERSION
;
}
int
CumMaxMinPluginDynamic
::
getNbOutputs
()
const
{
return
2
;
}
int
CumMaxMinPluginDynamic
::
initialize
()
{
return
0
;
}
void
CumMaxMinPluginDynamic
::
terminate
()
{}
size_t
CumMaxMinPluginDynamic
::
getSerializationSize
()
const
{
return
sizeof
(
mDim
)
+
sizeof
(
mCumType
);
}
void
CumMaxMinPluginDynamic
::
serialize
(
void
*
buffer
)
const
{
serialize_value
(
&
buffer
,
mDim
);
serialize_value
(
&
buffer
,
mCumType
);
}
void
CumMaxMinPluginDynamic
::
destroy
()
{
// This gets called when the network containing plugin is destroyed
delete
this
;
}
void
CumMaxMinPluginDynamic
::
setPluginNamespace
(
const
char
*
libNamespace
)
{
mNamespace
=
libNamespace
;
}
const
char
*
CumMaxMinPluginDynamic
::
getPluginNamespace
()
const
{
return
mNamespace
.
c_str
();
}
CumMaxMinPluginDynamicCreator
::
CumMaxMinPluginDynamicCreator
(
TRT_CUMCMPTYPE
cumType
)
:
mCumType
(
cumType
)
{
mPluginAttributes
.
clear
();
mPluginAttributes
.
emplace_back
(
nvinfer1
::
PluginField
(
"dim"
));
mFC
.
nbFields
=
mPluginAttributes
.
size
();
mFC
.
fields
=
mPluginAttributes
.
data
();
}
const
char
*
CumMaxMinPluginDynamicCreator
::
getPluginName
()
const
{
return
CUMMAXMIN_PLUGIN_NAME
;
}
const
char
*
CumMaxMinPluginDynamicCreator
::
getPluginVersion
()
const
{
return
PLUGIN_VERSION
;
}
const
nvinfer1
::
PluginFieldCollection
*
CumMaxMinPluginDynamicCreator
::
getFieldNames
()
{
return
&
mFC
;
}
nvinfer1
::
IPluginV2
*
CumMaxMinPluginDynamicCreator
::
createPlugin
(
const
char
*
name
,
const
nvinfer1
::
PluginFieldCollection
*
fc
)
{
int
dim
=
0
;
for
(
int
i
=
0
;
i
<
fc
->
nbFields
;
i
++
)
{
if
(
fc
->
fields
[
i
].
data
==
nullptr
)
{
continue
;
}
std
::
string
field_name
(
fc
->
fields
[
i
].
name
);
if
(
field_name
.
compare
(
"dim"
)
==
0
)
{
dim
=
static_cast
<
const
int
*>
(
fc
->
fields
[
i
].
data
)[
0
];
}
}
CumMaxMinPluginDynamic
*
plugin
=
new
CumMaxMinPluginDynamic
(
name
,
dim
,
mCumType
);
plugin
->
setPluginNamespace
(
getPluginNamespace
());
return
plugin
;
}
nvinfer1
::
IPluginV2
*
CumMaxMinPluginDynamicCreator
::
deserializePlugin
(
const
char
*
name
,
const
void
*
serialData
,
size_t
serialLength
)
{
// This object will be deleted when the network is destroyed, which will
// call FCPluginDynamic::destroy()
auto
plugin
=
new
CumMaxMinPluginDynamic
(
name
,
serialData
,
serialLength
);
plugin
->
setPluginNamespace
(
getPluginNamespace
());
return
plugin
;
}
void
CumMaxMinPluginDynamicCreator
::
setPluginNamespace
(
const
char
*
libNamespace
)
{
mNamespace
=
libNamespace
;
}
const
char
*
CumMaxMinPluginDynamicCreator
::
getPluginNamespace
()
const
{
return
mNamespace
.
c_str
();
}
CumMaxPluginDynamicCreator
::
CumMaxPluginDynamicCreator
()
:
CumMaxMinPluginDynamicCreator
(
TRT_CUMCMPTYPE
::
TRT_CUMMAX
)
{}
const
char
*
CumMaxPluginDynamicCreator
::
getPluginName
()
const
{
return
CUMMAX_PLUGIN_NAME
;
}
CumMinPluginDynamicCreator
::
CumMinPluginDynamicCreator
()
:
CumMaxMinPluginDynamicCreator
(
TRT_CUMCMPTYPE
::
TRT_CUMMIN
)
{}
const
char
*
CumMinPluginDynamicCreator
::
getPluginName
()
const
{
return
CUMMIN_PLUGIN_NAME
;
}
mmcv/ops/csrc/tensorrt/plugins/trt_cummaxmin_kernel.cu
deleted
100644 → 0
View file @
961373ad
// Copyright (c) OpenMMLab. All rights reserved
#include "common_cuda_helper.hpp"
#include "trt_cuda_helper.cuh"
#include "trt_plugin_helper.hpp"
using
mmcv
::
TensorDesc
;
template
<
typename
scalar_t
>
__global__
void
cummaxmin_kernel
(
const
scalar_t
*
input
,
scalar_t
*
output_value
,
int
*
output_index
,
TensorDesc
tensor_desc
,
int
cum_dim
,
int
cum_type
)
{
const
size_t
cum_size
=
tensor_desc
.
shape
[
cum_dim
];
const
size_t
cum_stride
=
tensor_desc
.
stride
[
cum_dim
];
const
size_t
data_size
=
tensor_desc
.
stride
[
0
]
*
tensor_desc
.
shape
[
0
]
/
cum_size
;
CUDA_1D_KERNEL_LOOP
(
index
,
data_size
)
{
size_t
cum_offset
=
index
/
cum_stride
*
(
cum_size
*
cum_stride
)
+
index
%
cum_stride
;
int
cum_index
=
0
;
auto
cum_value
=
input
[
cum_offset
];
output_value
[
cum_offset
]
=
cum_value
;
output_index
[
cum_offset
]
=
cum_index
;
for
(
size_t
cum_index_current
=
1
;
cum_index_current
<
cum_size
;
++
cum_index_current
)
{
cum_offset
+=
cum_stride
;
const
auto
cum_value_current
=
input
[
cum_offset
];
switch
(
cum_type
)
{
case
0
:
// max
if
(
cum_value_current
>
cum_value
)
{
cum_value
=
cum_value_current
;
cum_index
=
cum_index_current
;
}
break
;
case
1
:
// min
if
(
cum_value_current
<
cum_value
)
{
cum_value
=
cum_value_current
;
cum_index
=
cum_index_current
;
}
break
;
}
output_value
[
cum_offset
]
=
cum_value
;
output_index
[
cum_offset
]
=
cum_index
;
}
}
}
template
<
typename
scalar_t
>
void
CumMaxMinForwardLauncher
(
const
scalar_t
*
input
,
scalar_t
*
output_value
,
int
*
output_index
,
const
int
*
dims
,
int
nbDims
,
int
cum_dim
,
int
cum_type
,
cudaStream_t
stream
)
{
// fill tensordesc and initial
TensorDesc
tensor_desc
;
memset
((
void
*
)
&
tensor_desc
,
0
,
sizeof
(
TensorDesc
));
tensor_desc
.
dim
=
nbDims
;
tensor_desc
.
shape
[
nbDims
-
1
]
=
dims
[
nbDims
-
1
];
tensor_desc
.
stride
[
nbDims
-
1
]
=
1
;
for
(
int
i
=
nbDims
-
2
;
i
>=
0
;
--
i
)
{
tensor_desc
.
shape
[
i
]
=
dims
[
i
];
tensor_desc
.
stride
[
i
]
=
dims
[
i
+
1
]
*
tensor_desc
.
stride
[
i
+
1
];
}
// cum dim should be larger than 0
cum_dim
=
cum_dim
>=
0
?
cum_dim
:
(
nbDims
+
cum_dim
);
const
int
data_size
=
tensor_desc
.
stride
[
0
]
*
tensor_desc
.
shape
[
0
]
/
tensor_desc
.
shape
[
cum_dim
];
const
int
col_block
=
GET_BLOCKS
(
data_size
,
THREADS_PER_BLOCK
);
cummaxmin_kernel
<
scalar_t
><<<
col_block
,
THREADS_PER_BLOCK
,
0
,
stream
>>>
(
input
,
output_value
,
output_index
,
tensor_desc
,
cum_dim
,
cum_type
);
}
void
CumMaxMinForwardLauncher_float
(
const
float
*
input
,
float
*
output_value
,
int
*
output_index
,
const
int
*
dims
,
int
nbDims
,
int
cum_dim
,
int
cum_type
,
cudaStream_t
stream
)
{
CumMaxMinForwardLauncher
<
float
>
(
input
,
output_value
,
output_index
,
dims
,
nbDims
,
cum_dim
,
cum_type
,
stream
);
}
void
CumMaxMinForwardLauncher_int32
(
const
int
*
input
,
int
*
output_value
,
int
*
output_index
,
const
int
*
dims
,
int
nbDims
,
int
cum_dim
,
int
cum_type
,
cudaStream_t
stream
)
{
CumMaxMinForwardLauncher
<
int
>
(
input
,
output_value
,
output_index
,
dims
,
nbDims
,
cum_dim
,
cum_type
,
stream
);
}
mmcv/ops/csrc/tensorrt/plugins/trt_deform_conv.cpp
deleted
100644 → 0
View file @
961373ad
// Copyright (c) OpenMMLab. All rights reserved
#include "trt_deform_conv.hpp"
#include <assert.h>
#include <chrono>
#include "trt_serialize.hpp"
void
DeformConvForwardCUDAKernelLauncher_float
(
const
float
*
input
,
const
float
*
weight
,
const
float
*
offset
,
float
*
output
,
void
*
workspace
,
int
batchSize
,
int
nInputPlane
,
int
inputHeight
,
int
inputWidth
,
int
nOutputPlane
,
int
kW
,
int
kH
,
int
dW
,
int
dH
,
int
padW
,
int
padH
,
int
dilationW
,
int
dilationH
,
int
group
,
int
deformable_group
,
int
im2col_step
,
cublasHandle_t
cublas_handle
,
cudaStream_t
stream
);
namespace
{
static
const
char
*
PLUGIN_VERSION
{
"1"
};
static
const
char
*
PLUGIN_NAME
{
"MMCVDeformConv2d"
};
}
// namespace
nvinfer1
::
PluginFieldCollection
DeformableConvPluginDynamicCreator
::
mFC
{};
std
::
vector
<
nvinfer1
::
PluginField
>
DeformableConvPluginDynamicCreator
::
mPluginAttributes
;
DeformableConvPluginDynamic
::
DeformableConvPluginDynamic
(
const
std
::
string
&
name
,
const
nvinfer1
::
Dims
&
stride
,
const
nvinfer1
::
Dims
&
padding
,
const
nvinfer1
::
Dims
&
dilation
,
const
int
deformableGroup
,
const
int
group
,
int
im2colStep
)
:
mLayerName
(
name
),
mStride
(
stride
),
mPadding
(
padding
),
mDilation
(
dilation
),
mDeformableGroup
(
deformableGroup
),
mGroup
(
group
),
mIm2colStep
(
im2colStep
)
{}
DeformableConvPluginDynamic
::
DeformableConvPluginDynamic
(
const
std
::
string
name
,
const
void
*
data
,
size_t
length
)
:
mLayerName
(
name
)
{
deserialize_value
(
&
data
,
&
length
,
&
mStride
);
deserialize_value
(
&
data
,
&
length
,
&
mPadding
);
deserialize_value
(
&
data
,
&
length
,
&
mDilation
);
deserialize_value
(
&
data
,
&
length
,
&
mDeformableGroup
);
deserialize_value
(
&
data
,
&
length
,
&
mGroup
);
deserialize_value
(
&
data
,
&
length
,
&
mIm2colStep
);
}
DeformableConvPluginDynamic
::~
DeformableConvPluginDynamic
()
{}
nvinfer1
::
IPluginV2DynamicExt
*
DeformableConvPluginDynamic
::
clone
()
const
{
DeformableConvPluginDynamic
*
plugin
=
new
DeformableConvPluginDynamic
(
mLayerName
,
mStride
,
mPadding
,
mDilation
,
mDeformableGroup
,
mGroup
,
mIm2colStep
);
plugin
->
setPluginNamespace
(
getPluginNamespace
());
return
plugin
;
}
nvinfer1
::
DimsExprs
DeformableConvPluginDynamic
::
getOutputDimensions
(
int
outputIndex
,
const
nvinfer1
::
DimsExprs
*
inputs
,
int
nbInputs
,
nvinfer1
::
IExprBuilder
&
exprBuilder
)
{
nvinfer1
::
DimsExprs
ret
;
ret
.
nbDims
=
4
;
ret
.
d
[
0
]
=
inputs
[
0
].
d
[
0
];
ret
.
d
[
1
]
=
inputs
[
2
].
d
[
0
];
ret
.
d
[
2
]
=
inputs
[
1
].
d
[
2
];
ret
.
d
[
3
]
=
inputs
[
1
].
d
[
3
];
return
ret
;
}
bool
DeformableConvPluginDynamic
::
supportsFormatCombination
(
int
pos
,
const
nvinfer1
::
PluginTensorDesc
*
inOut
,
int
nbInputs
,
int
nbOutputs
)
{
if
(
pos
==
0
)
{
return
(
inOut
[
pos
].
type
==
nvinfer1
::
DataType
::
kFLOAT
&&
inOut
[
pos
].
format
==
nvinfer1
::
TensorFormat
::
kLINEAR
);
}
else
{
return
inOut
[
pos
].
type
==
inOut
[
0
].
type
&&
inOut
[
pos
].
format
==
inOut
[
0
].
format
;
}
}
void
DeformableConvPluginDynamic
::
configurePlugin
(
const
nvinfer1
::
DynamicPluginTensorDesc
*
inputs
,
int
nbInputs
,
const
nvinfer1
::
DynamicPluginTensorDesc
*
outputs
,
int
nbOutputs
)
{}
size_t
DeformableConvPluginDynamic
::
getWorkspaceSize
(
const
nvinfer1
::
PluginTensorDesc
*
inputs
,
int
nbInputs
,
const
nvinfer1
::
PluginTensorDesc
*
outputs
,
int
nbOutputs
)
const
{
int
sizeof_dtype
=
mmcv
::
getElementSize
(
outputs
[
0
].
type
);
int
batch_size
=
inputs
[
0
].
dims
.
d
[
0
];
int
nInputPlane
=
inputs
[
0
].
dims
.
d
[
1
];
int
inputHeight
=
inputs
[
0
].
dims
.
d
[
2
];
int
inputWidth
=
inputs
[
0
].
dims
.
d
[
3
];
int
nOutputPlane
=
outputs
[
0
].
dims
.
d
[
1
];
int
outputHeight
=
outputs
[
0
].
dims
.
d
[
2
];
int
outputWidth
=
outputs
[
0
].
dims
.
d
[
3
];
int
kW
=
inputs
[
2
].
dims
.
d
[
2
];
int
kH
=
inputs
[
2
].
dims
.
d
[
3
];
int
im2col_step
=
std
::
min
(
batch_size
,
mIm2colStep
);
size_t
col_size
=
mmcv
::
getAlignedSize
(
nInputPlane
*
kW
*
kH
*
im2col_step
*
outputHeight
*
outputWidth
*
sizeof_dtype
);
size_t
out_size
=
0
;
if
(
im2col_step
!=
1
)
out_size
=
mmcv
::
getAlignedSize
(
batch_size
*
nOutputPlane
*
outputHeight
*
outputWidth
*
sizeof_dtype
);
return
col_size
+
out_size
;
}
int
DeformableConvPluginDynamic
::
enqueue
(
const
nvinfer1
::
PluginTensorDesc
*
inputDesc
,
const
nvinfer1
::
PluginTensorDesc
*
outputDesc
,
const
void
*
const
*
inputs
,
void
*
const
*
outputs
,
void
*
workSpace
,
cudaStream_t
stream
)
{
int
batch_size
=
inputDesc
[
0
].
dims
.
d
[
0
];
int
inputChannel
=
inputDesc
[
0
].
dims
.
d
[
1
];
int
inputHeight
=
inputDesc
[
0
].
dims
.
d
[
2
];
int
inputWidth
=
inputDesc
[
0
].
dims
.
d
[
3
];
int
outputChannel
=
outputDesc
[
0
].
dims
.
d
[
1
];
int
kernelHeight
=
inputDesc
[
2
].
dims
.
d
[
2
];
int
kernelWidth
=
inputDesc
[
2
].
dims
.
d
[
3
];
const
void
*
x
=
inputs
[
0
];
const
void
*
offset
=
inputs
[
1
];
const
void
*
weight
=
inputs
[
2
];
void
*
output
=
outputs
[
0
];
int
im2col_step
=
std
::
min
(
batch_size
,
mIm2colStep
);
// TODO: add fp16 support
auto
data_type
=
inputDesc
[
0
].
type
;
switch
(
data_type
)
{
case
nvinfer1
::
DataType
::
kFLOAT
:
DeformConvForwardCUDAKernelLauncher_float
(
(
float
*
)
x
,
(
float
*
)
weight
,
(
float
*
)
offset
,
(
float
*
)
output
,
workSpace
,
batch_size
,
inputChannel
,
inputHeight
,
inputWidth
,
outputChannel
,
kernelWidth
,
kernelHeight
,
mStride
.
d
[
0
],
mStride
.
d
[
1
],
mPadding
.
d
[
0
],
mPadding
.
d
[
1
],
mDilation
.
d
[
0
],
mDilation
.
d
[
1
],
mGroup
,
mDeformableGroup
,
im2col_step
,
m_cublas_handle
,
stream
);
break
;
default:
return
1
;
break
;
}
return
0
;
}
nvinfer1
::
DataType
DeformableConvPluginDynamic
::
getOutputDataType
(
int
index
,
const
nvinfer1
::
DataType
*
inputTypes
,
int
nbInputs
)
const
{
return
inputTypes
[
0
];
}
// IPluginV2 Methods
const
char
*
DeformableConvPluginDynamic
::
getPluginType
()
const
{
return
PLUGIN_NAME
;
}
const
char
*
DeformableConvPluginDynamic
::
getPluginVersion
()
const
{
return
PLUGIN_VERSION
;
}
int
DeformableConvPluginDynamic
::
getNbOutputs
()
const
{
return
1
;
}
int
DeformableConvPluginDynamic
::
initialize
()
{
return
0
;
}
void
DeformableConvPluginDynamic
::
terminate
()
{}
size_t
DeformableConvPluginDynamic
::
getSerializationSize
()
const
{
return
sizeof
(
mStride
)
+
sizeof
(
mPadding
)
+
sizeof
(
mDilation
)
+
sizeof
(
mDeformableGroup
)
+
sizeof
(
mGroup
)
+
sizeof
(
mIm2colStep
);
}
void
DeformableConvPluginDynamic
::
serialize
(
void
*
buffer
)
const
{
serialize_value
(
&
buffer
,
mStride
);
serialize_value
(
&
buffer
,
mPadding
);
serialize_value
(
&
buffer
,
mDilation
);
serialize_value
(
&
buffer
,
mDeformableGroup
);
serialize_value
(
&
buffer
,
mGroup
);
serialize_value
(
&
buffer
,
mIm2colStep
);
}
void
DeformableConvPluginDynamic
::
destroy
()
{
// This gets called when the network containing plugin is destroyed
delete
this
;
}
void
DeformableConvPluginDynamic
::
attachToContext
(
cudnnContext
*
cudnnContext
,
cublasContext
*
cublasContext
,
nvinfer1
::
IGpuAllocator
*
gpuAllocator
)
{
m_cublas_handle
=
cublasContext
;
}
void
DeformableConvPluginDynamic
::
detachFromContext
()
{}
void
DeformableConvPluginDynamic
::
setPluginNamespace
(
const
char
*
libNamespace
)
{
mNamespace
=
libNamespace
;
}
const
char
*
DeformableConvPluginDynamic
::
getPluginNamespace
()
const
{
return
mNamespace
.
c_str
();
}
////////////////////// creator /////////////////////////////
DeformableConvPluginDynamicCreator
::
DeformableConvPluginDynamicCreator
()
{
mPluginAttributes
.
emplace_back
(
nvinfer1
::
PluginField
(
"stride"
));
mPluginAttributes
.
emplace_back
(
nvinfer1
::
PluginField
(
"padding"
));
mPluginAttributes
.
emplace_back
(
nvinfer1
::
PluginField
(
"dilation"
));
mPluginAttributes
.
emplace_back
(
nvinfer1
::
PluginField
(
"groups"
));
mPluginAttributes
.
emplace_back
(
nvinfer1
::
PluginField
(
"deform_groups"
));
mPluginAttributes
.
emplace_back
(
nvinfer1
::
PluginField
(
"bias"
));
mPluginAttributes
.
emplace_back
(
nvinfer1
::
PluginField
(
"im2col_step"
));
mFC
.
nbFields
=
mPluginAttributes
.
size
();
mFC
.
fields
=
mPluginAttributes
.
data
();
}
const
char
*
DeformableConvPluginDynamicCreator
::
getPluginName
()
const
{
return
PLUGIN_NAME
;
}
const
char
*
DeformableConvPluginDynamicCreator
::
getPluginVersion
()
const
{
return
PLUGIN_VERSION
;
}
const
nvinfer1
::
PluginFieldCollection
*
DeformableConvPluginDynamicCreator
::
getFieldNames
()
{
return
&
mFC
;
}
nvinfer1
::
IPluginV2
*
DeformableConvPluginDynamicCreator
::
createPlugin
(
const
char
*
name
,
const
nvinfer1
::
PluginFieldCollection
*
fc
)
{
nvinfer1
::
Dims
stride
{
2
,
{
1
,
1
}};
nvinfer1
::
Dims
padding
{
2
,
{
0
,
0
}};
nvinfer1
::
Dims
dilation
{
2
,
{
1
,
1
}};
int
deformableGroup
=
1
;
int
group
=
1
;
int
im2col_step
=
32
;
for
(
int
i
=
0
;
i
<
fc
->
nbFields
;
i
++
)
{
if
(
fc
->
fields
[
i
].
data
==
nullptr
)
{
continue
;
}
std
::
string
field_name
(
fc
->
fields
[
i
].
name
);
if
(
field_name
.
compare
(
"stride"
)
==
0
)
{
stride
.
nbDims
=
2
;
stride
.
d
[
0
]
=
static_cast
<
const
int
*>
(
fc
->
fields
[
i
].
data
)[
0
];
if
(
fc
->
fields
[
i
].
length
==
1
)
{
stride
.
d
[
1
]
=
stride
.
d
[
0
];
}
else
{
stride
.
d
[
1
]
=
static_cast
<
const
int
*>
(
fc
->
fields
[
i
].
data
)[
1
];
}
}
if
(
field_name
.
compare
(
"padding"
)
==
0
)
{
padding
.
nbDims
=
2
;
padding
.
d
[
0
]
=
static_cast
<
const
int
*>
(
fc
->
fields
[
i
].
data
)[
0
];
if
(
fc
->
fields
[
i
].
length
==
1
)
{
padding
.
d
[
1
]
=
padding
.
d
[
0
];
}
else
{
padding
.
d
[
1
]
=
static_cast
<
const
int
*>
(
fc
->
fields
[
i
].
data
)[
1
];
}
}
if
(
field_name
.
compare
(
"dilation"
)
==
0
)
{
dilation
.
nbDims
=
2
;
dilation
.
d
[
0
]
=
static_cast
<
const
int
*>
(
fc
->
fields
[
i
].
data
)[
0
];
if
(
fc
->
fields
[
i
].
length
==
1
)
{
dilation
.
d
[
1
]
=
dilation
.
d
[
0
];
}
else
{
dilation
.
d
[
1
]
=
static_cast
<
const
int
*>
(
fc
->
fields
[
i
].
data
)[
1
];
}
}
if
(
field_name
.
compare
(
"deform_groups"
)
==
0
)
{
deformableGroup
=
static_cast
<
const
int
*>
(
fc
->
fields
[
i
].
data
)[
0
];
}
if
(
field_name
.
compare
(
"group"
)
==
0
)
{
group
=
static_cast
<
const
int
*>
(
fc
->
fields
[
i
].
data
)[
0
];
}
if
(
field_name
.
compare
(
"im2col_step"
)
==
0
)
{
im2col_step
=
static_cast
<
const
int
*>
(
fc
->
fields
[
i
].
data
)[
0
];
}
}
DeformableConvPluginDynamic
*
plugin
=
new
DeformableConvPluginDynamic
(
name
,
stride
,
padding
,
dilation
,
deformableGroup
,
group
,
im2col_step
);
plugin
->
setPluginNamespace
(
getPluginNamespace
());
return
plugin
;
}
nvinfer1
::
IPluginV2
*
DeformableConvPluginDynamicCreator
::
deserializePlugin
(
const
char
*
name
,
const
void
*
serialData
,
size_t
serialLength
)
{
auto
plugin
=
new
DeformableConvPluginDynamic
(
name
,
serialData
,
serialLength
);
plugin
->
setPluginNamespace
(
getPluginNamespace
());
return
plugin
;
}
void
DeformableConvPluginDynamicCreator
::
setPluginNamespace
(
const
char
*
libNamespace
)
{
mNamespace
=
libNamespace
;
}
const
char
*
DeformableConvPluginDynamicCreator
::
getPluginNamespace
()
const
{
return
mNamespace
.
c_str
();
}
mmcv/ops/csrc/tensorrt/plugins/trt_deform_conv_kernel.cu
deleted
100644 → 0
View file @
961373ad
// Copyright (c) OpenMMLab. All rights reserved
#include <cuda_fp16.h>
#include "common_cuda_helper.hpp"
#include "deform_conv_cuda_kernel.cuh"
#include "trt_cuda_helper.cuh"
#include "trt_plugin_helper.hpp"
template
<
typename
T
>
void
trt_deformable_im2col
(
const
T
*
data_input
,
const
T
*
data_offset
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
ksize_h
,
const
int
ksize_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
parallel_imgs
,
const
int
deformable_group
,
T
*
data_col
,
cudaStream_t
stream
)
{
int
height_col
=
(
height
+
2
*
pad_h
-
(
dilation_h
*
(
ksize_h
-
1
)
+
1
))
/
stride_h
+
1
;
int
width_col
=
(
width
+
2
*
pad_w
-
(
dilation_w
*
(
ksize_w
-
1
)
+
1
))
/
stride_w
+
1
;
int
num_kernels
=
channels
*
height_col
*
width_col
*
parallel_imgs
;
int
channel_per_deformable_group
=
channels
/
deformable_group
;
deformable_im2col_gpu_kernel
<
T
>
<<<
GET_BLOCKS
(
num_kernels
),
THREADS_PER_BLOCK
,
0
,
stream
>>>
(
num_kernels
,
data_input
,
data_offset
,
height
,
width
,
ksize_h
,
ksize_w
,
pad_h
,
pad_w
,
stride_h
,
stride_w
,
dilation_h
,
dilation_w
,
channel_per_deformable_group
,
parallel_imgs
,
channels
,
deformable_group
,
height_col
,
width_col
,
data_col
);
cudaCheckError
();
}
template
<
typename
scalar_t
>
void
DeformConvForwardCUDAKernelLauncher
(
const
scalar_t
*
input
,
const
scalar_t
*
weight
,
const
scalar_t
*
offset
,
scalar_t
*
output
,
void
*
workspace
,
int
batchSize
,
int
nInputPlane
,
int
inputHeight
,
int
inputWidth
,
int
nOutputPlane
,
int
kW
,
int
kH
,
int
dW
,
int
dH
,
int
padW
,
int
padH
,
int
dilationW
,
int
dilationH
,
int
group
,
int
deformable_group
,
int
im2col_step
,
cublasHandle_t
cublas_handle
,
cudaStream_t
stream
)
{
size_t
word_size
=
sizeof
(
scalar_t
);
im2col_step
=
std
::
min
(
int
(
batchSize
),
im2col_step
);
long
outputWidth
=
(
inputWidth
+
2
*
padW
-
(
dilationW
*
(
kW
-
1
)
+
1
))
/
dW
+
1
;
long
outputHeight
=
(
inputHeight
+
2
*
padH
-
(
dilationH
*
(
kH
-
1
)
+
1
))
/
dH
+
1
;
long
long
columns_size
=
mmcv
::
getAlignedSize
(
nInputPlane
*
kW
*
kH
*
im2col_step
*
outputHeight
*
outputWidth
*
word_size
);
// column buffer for img2col
scalar_t
*
columns
=
(
scalar_t
*
)
workspace
;
workspace
=
workspace
+
columns_size
;
scalar_t
*
output_buffer
;
long
long
output_buffer_size
=
0
;
if
(
im2col_step
==
1
)
{
output_buffer
=
output
;
}
else
{
// output need permute when im2col_step!=1
output_buffer
=
(
scalar_t
*
)
workspace
;
output_buffer_size
=
batchSize
*
nOutputPlane
*
outputWidth
*
outputHeight
;
}
long
long
input_elt_step
=
im2col_step
*
nInputPlane
*
inputHeight
*
inputWidth
;
long
long
offset_elt_step
=
im2col_step
*
deformable_group
*
2
*
kH
*
kW
*
outputHeight
*
outputWidth
;
long
long
out_buffer_step
=
nOutputPlane
*
im2col_step
*
outputHeight
*
outputWidth
;
long
long
col_g_step
=
nInputPlane
*
kW
*
kH
/
group
*
im2col_step
*
outputHeight
*
outputWidth
;
long
long
weight_g_step
=
nOutputPlane
/
group
*
nInputPlane
/
group
*
kH
*
kW
;
long
long
out_buffer_g_step
=
nOutputPlane
/
group
*
im2col_step
*
outputHeight
*
outputWidth
;
int
m
=
nOutputPlane
/
group
;
int
n
=
im2col_step
*
outputHeight
*
outputWidth
;
int
k
=
nInputPlane
/
group
*
kH
*
kW
;
scalar_t
alpha
=
1.
;
scalar_t
beta
=
0.
;
for
(
int
elt
=
0
;
elt
<
batchSize
/
im2col_step
;
elt
++
)
{
const
scalar_t
*
input_start
=
input
+
elt
*
input_elt_step
;
const
scalar_t
*
offset_start
=
offset
+
elt
*
offset_elt_step
;
trt_deformable_im2col
<
scalar_t
>
(
input_start
,
offset_start
,
nInputPlane
,
inputHeight
,
inputWidth
,
kH
,
kW
,
padH
,
padW
,
dH
,
dW
,
dilationH
,
dilationW
,
im2col_step
,
deformable_group
,
columns
,
stream
);
for
(
int
g
=
0
;
g
<
group
;
++
g
)
{
const
scalar_t
*
weight_start
=
weight
+
g
*
weight_g_step
;
scalar_t
*
col_start
=
columns
+
g
*
col_g_step
;
scalar_t
*
out_buffer_start
=
output_buffer
+
elt
*
out_buffer_step
+
g
*
out_buffer_g_step
;
cublasGemmWrap
<
scalar_t
>
(
cublas_handle
,
CUBLAS_OP_N
,
CUBLAS_OP_N
,
n
,
m
,
k
,
&
alpha
,
col_start
,
n
,
weight_start
,
k
,
&
beta
,
out_buffer_start
,
n
);
cudaCheckError
();
}
}
if
(
im2col_step
!=
1
)
{
int
output_buffer_shape
[
5
]
=
{
batchSize
/
im2col_step
,
nOutputPlane
,
im2col_step
,
outputHeight
,
outputWidth
};
int
output_buffer_permute
[
5
]
=
{
0
,
2
,
1
,
3
,
4
};
memcpyPermute
<
scalar_t
>
(
output
,
output_buffer
,
&
output_buffer_shape
[
0
],
&
output_buffer_permute
[
0
],
5
,
stream
);
}
}
void
DeformConvForwardCUDAKernelLauncher_float
(
const
float
*
input
,
const
float
*
weight
,
const
float
*
offset
,
float
*
output
,
void
*
workspace
,
int
batchSize
,
int
nInputPlane
,
int
inputHeight
,
int
inputWidth
,
int
nOutputPlane
,
int
kW
,
int
kH
,
int
dW
,
int
dH
,
int
padW
,
int
padH
,
int
dilationW
,
int
dilationH
,
int
group
,
int
deformable_group
,
int
im2col_step
,
cublasHandle_t
cublas_handle
,
cudaStream_t
stream
)
{
DeformConvForwardCUDAKernelLauncher
<
float
>
(
input
,
weight
,
offset
,
output
,
workspace
,
batchSize
,
nInputPlane
,
inputHeight
,
inputWidth
,
nOutputPlane
,
kW
,
kH
,
dW
,
dH
,
padW
,
padH
,
dilationW
,
dilationH
,
group
,
deformable_group
,
im2col_step
,
cublas_handle
,
stream
);
}
mmcv/ops/csrc/tensorrt/plugins/trt_grid_sampler.cpp
deleted
100644 → 0
View file @
961373ad
// Copyright (c) OpenMMLab. All rights reserved
#include "trt_grid_sampler.hpp"
#include <assert.h>
#include <stdio.h>
#include <chrono>
#include "trt_serialize.hpp"
using
mmcv
::
GridSamplerInterpolation
;
using
mmcv
::
GridSamplerPadding
;
void
grid_sample_float
(
float
*
output
,
const
float
*
input
,
const
float
*
grid
,
int
*
output_dims
,
int
*
input_dims
,
int
*
grid_dims
,
int
nb_dims
,
GridSamplerInterpolation
interp
,
GridSamplerPadding
padding
,
bool
align_corners
,
cudaStream_t
stream
);
namespace
{
static
const
char
*
PLUGIN_VERSION
{
"1"
};
static
const
char
*
PLUGIN_NAME
{
"grid_sampler"
};
}
// namespace
nvinfer1
::
PluginFieldCollection
GridSamplerDynamicCreator
::
mFC
{};
std
::
vector
<
nvinfer1
::
PluginField
>
GridSamplerDynamicCreator
::
mPluginAttributes
;
GridSamplerDynamic
::
GridSamplerDynamic
(
const
std
::
string
&
name
,
int
mode
,
int
paddingMode
,
bool
alignCorners
)
:
mLayerName
(
name
),
mMode
(
mode
),
mPaddingMode
(
paddingMode
),
mAlignCorners
(
alignCorners
)
{}
GridSamplerDynamic
::
GridSamplerDynamic
(
const
std
::
string
name
,
const
void
*
data
,
size_t
length
)
:
mLayerName
(
name
)
{
deserialize_value
(
&
data
,
&
length
,
&
mMode
);
deserialize_value
(
&
data
,
&
length
,
&
mPaddingMode
);
deserialize_value
(
&
data
,
&
length
,
&
mAlignCorners
);
}
nvinfer1
::
IPluginV2DynamicExt
*
GridSamplerDynamic
::
clone
()
const
{
GridSamplerDynamic
*
plugin
=
new
GridSamplerDynamic
(
mLayerName
,
mMode
,
mPaddingMode
,
mAlignCorners
);
plugin
->
setPluginNamespace
(
getPluginNamespace
());
return
plugin
;
}
nvinfer1
::
DimsExprs
GridSamplerDynamic
::
getOutputDimensions
(
int
outputIndex
,
const
nvinfer1
::
DimsExprs
*
inputs
,
int
nbInputs
,
nvinfer1
::
IExprBuilder
&
exprBuilder
)
{
nvinfer1
::
DimsExprs
ret
;
ret
.
nbDims
=
inputs
[
0
].
nbDims
;
ret
.
d
[
0
]
=
inputs
[
0
].
d
[
0
];
ret
.
d
[
1
]
=
inputs
[
0
].
d
[
1
];
for
(
int
i
=
2
;
i
<
ret
.
nbDims
;
++
i
)
{
ret
.
d
[
i
]
=
inputs
[
1
].
d
[
i
-
1
];
}
return
ret
;
}
bool
GridSamplerDynamic
::
supportsFormatCombination
(
int
pos
,
const
nvinfer1
::
PluginTensorDesc
*
inOut
,
int
nbInputs
,
int
nbOutputs
)
{
if
(
pos
==
0
)
{
return
(
inOut
[
pos
].
type
==
nvinfer1
::
DataType
::
kFLOAT
&&
inOut
[
pos
].
format
==
nvinfer1
::
TensorFormat
::
kLINEAR
);
}
else
{
return
inOut
[
pos
].
type
==
inOut
[
0
].
type
&&
inOut
[
pos
].
format
==
inOut
[
0
].
format
;
}
}
void
GridSamplerDynamic
::
configurePlugin
(
const
nvinfer1
::
DynamicPluginTensorDesc
*
inputs
,
int
nbInputs
,
const
nvinfer1
::
DynamicPluginTensorDesc
*
outputs
,
int
nbOutputs
)
{
// Validate input arguments
}
size_t
GridSamplerDynamic
::
getWorkspaceSize
(
const
nvinfer1
::
PluginTensorDesc
*
inputs
,
int
nbInputs
,
const
nvinfer1
::
PluginTensorDesc
*
outputs
,
int
nbOutputs
)
const
{
return
0
;
}
int
GridSamplerDynamic
::
enqueue
(
const
nvinfer1
::
PluginTensorDesc
*
inputDesc
,
const
nvinfer1
::
PluginTensorDesc
*
outputDesc
,
const
void
*
const
*
inputs
,
void
*
const
*
outputs
,
void
*
workSpace
,
cudaStream_t
stream
)
{
nvinfer1
::
Dims
input_dims
=
inputDesc
[
0
].
dims
;
nvinfer1
::
Dims
grid_dims
=
inputDesc
[
1
].
dims
;
nvinfer1
::
Dims
output_dims
=
outputDesc
[
0
].
dims
;
using
mmcv
::
GridSamplerInterpolation
;
using
mmcv
::
GridSamplerPadding
;
GridSamplerInterpolation
interp_mode
=
GridSamplerInterpolation
::
Bilinear
;
switch
(
mMode
)
{
case
0
:
interp_mode
=
GridSamplerInterpolation
::
Bilinear
;
break
;
case
1
:
interp_mode
=
GridSamplerInterpolation
::
Nearest
;
break
;
default:
break
;
}
GridSamplerPadding
padding_mode
=
GridSamplerPadding
::
Zeros
;
switch
(
mPaddingMode
)
{
case
0
:
padding_mode
=
GridSamplerPadding
::
Zeros
;
break
;
case
1
:
padding_mode
=
GridSamplerPadding
::
Border
;
break
;
case
2
:
padding_mode
=
GridSamplerPadding
::
Reflection
;
break
;
default:
break
;
}
auto
data_type
=
inputDesc
[
0
].
type
;
switch
(
data_type
)
{
case
nvinfer1
::
DataType
::
kFLOAT
:
grid_sample_float
(
(
float
*
)
outputs
[
0
],
(
float
*
)
inputs
[
0
],
(
float
*
)
inputs
[
1
],
&
(
output_dims
.
d
[
0
]),
&
(
input_dims
.
d
[
0
]),
&
(
grid_dims
.
d
[
0
]),
input_dims
.
nbDims
,
interp_mode
,
padding_mode
,
mAlignCorners
,
stream
);
break
;
default:
return
1
;
break
;
}
return
0
;
}
nvinfer1
::
DataType
GridSamplerDynamic
::
getOutputDataType
(
int
index
,
const
nvinfer1
::
DataType
*
inputTypes
,
int
nbInputs
)
const
{
return
inputTypes
[
0
];
}
// IPluginV2 Methods
const
char
*
GridSamplerDynamic
::
getPluginType
()
const
{
return
PLUGIN_NAME
;
}
const
char
*
GridSamplerDynamic
::
getPluginVersion
()
const
{
return
PLUGIN_VERSION
;
}
int
GridSamplerDynamic
::
getNbOutputs
()
const
{
return
1
;
}
int
GridSamplerDynamic
::
initialize
()
{
return
0
;
}
void
GridSamplerDynamic
::
terminate
()
{}
size_t
GridSamplerDynamic
::
getSerializationSize
()
const
{
return
sizeof
(
mMode
)
+
sizeof
(
mPaddingMode
)
+
sizeof
(
mAlignCorners
);
}
void
GridSamplerDynamic
::
serialize
(
void
*
buffer
)
const
{
serialize_value
(
&
buffer
,
mMode
);
serialize_value
(
&
buffer
,
mPaddingMode
);
serialize_value
(
&
buffer
,
mAlignCorners
);
}
void
GridSamplerDynamic
::
destroy
()
{
// This gets called when the network containing plugin is destroyed
delete
this
;
}
void
GridSamplerDynamic
::
setPluginNamespace
(
const
char
*
libNamespace
)
{
mNamespace
=
libNamespace
;
}
const
char
*
GridSamplerDynamic
::
getPluginNamespace
()
const
{
return
mNamespace
.
c_str
();
}
////////////////////// creator /////////////////////////////
GridSamplerDynamicCreator
::
GridSamplerDynamicCreator
()
{
mPluginAttributes
.
clear
();
mPluginAttributes
.
emplace_back
(
nvinfer1
::
PluginField
(
"interpolation_mode"
));
mPluginAttributes
.
emplace_back
(
nvinfer1
::
PluginField
(
"padding_mode"
));
mPluginAttributes
.
emplace_back
(
nvinfer1
::
PluginField
(
"align_corners"
));
mFC
.
nbFields
=
mPluginAttributes
.
size
();
mFC
.
fields
=
mPluginAttributes
.
data
();
}
const
char
*
GridSamplerDynamicCreator
::
getPluginName
()
const
{
return
PLUGIN_NAME
;
}
const
char
*
GridSamplerDynamicCreator
::
getPluginVersion
()
const
{
return
PLUGIN_VERSION
;
}
const
nvinfer1
::
PluginFieldCollection
*
GridSamplerDynamicCreator
::
getFieldNames
()
{
return
&
mFC
;
}
nvinfer1
::
IPluginV2
*
GridSamplerDynamicCreator
::
createPlugin
(
const
char
*
name
,
const
nvinfer1
::
PluginFieldCollection
*
fc
)
{
int
mode
=
0
;
int
paddingMode
=
0
;
bool
alignCorners
=
false
;
for
(
int
i
=
0
;
i
<
fc
->
nbFields
;
i
++
)
{
if
(
fc
->
fields
[
i
].
data
==
nullptr
)
{
continue
;
}
std
::
string
field_name
(
fc
->
fields
[
i
].
name
);
if
(
field_name
.
compare
(
"interpolation_mode"
)
==
0
)
{
mode
=
static_cast
<
const
int
*>
(
fc
->
fields
[
i
].
data
)[
0
];
}
if
(
field_name
.
compare
(
"padding_mode"
)
==
0
)
{
paddingMode
=
static_cast
<
const
int
*>
(
fc
->
fields
[
i
].
data
)[
0
];
}
if
(
field_name
.
compare
(
"align_corners"
)
==
0
)
{
alignCorners
=
(
bool
)(
static_cast
<
const
int
*>
(
fc
->
fields
[
i
].
data
)[
0
]);
}
}
GridSamplerDynamic
*
plugin
=
new
GridSamplerDynamic
(
name
,
mode
,
paddingMode
,
alignCorners
);
plugin
->
setPluginNamespace
(
getPluginNamespace
());
return
plugin
;
}
nvinfer1
::
IPluginV2
*
GridSamplerDynamicCreator
::
deserializePlugin
(
const
char
*
name
,
const
void
*
serialData
,
size_t
serialLength
)
{
// This object will be deleted when the network is destroyed, which will
// call FCPluginDynamic::destroy()
auto
plugin
=
new
GridSamplerDynamic
(
name
,
serialData
,
serialLength
);
plugin
->
setPluginNamespace
(
getPluginNamespace
());
return
plugin
;
}
void
GridSamplerDynamicCreator
::
setPluginNamespace
(
const
char
*
libNamespace
)
{
mNamespace
=
libNamespace
;
}
const
char
*
GridSamplerDynamicCreator
::
getPluginNamespace
()
const
{
return
mNamespace
.
c_str
();
}
mmcv/ops/csrc/tensorrt/plugins/trt_grid_sampler_kernel.cu
deleted
100644 → 0
View file @
961373ad
// Copyright (c) OpenMMLab. All rights reserved
// modified from
// https://github.com/pytorch/pytorch/blob/ec683299ebabf297a3504c76248d37be830e4342/aten/src/ATen/native/cuda/GridSampler.cuh
// and
// https://github.com/pytorch/pytorch/blob/ec683299ebabf297a3504c76248d37be830e4342/aten/src/ATen/native/cuda/GridSampler.cu
#include <cuda_fp16.h>
#include <stdio.h>
#include <algorithm>
#include <cmath>
#include <vector>
#include "common_cuda_helper.hpp"
#include "trt_cuda_helper.cuh"
#include "trt_grid_sampler.hpp"
#include "trt_plugin_helper.hpp"
using
mmcv
::
GridSamplerInterpolation
;
using
mmcv
::
GridSamplerPadding
;
using
mmcv
::
TensorDesc
;
// Unnormalizes a coordinate from the -1 to +1 scale to its pixel index value,
// where we view each pixel as an area between (idx - 0.5) and (idx + 0.5).
// if align_corners: -1 and +1 get sent to the centers of the corner pixels
// -1 --> 0
// +1 --> (size - 1)
// scale_factor = (size - 1) / 2
// if not align_corners: -1 and +1 get sent to the image edges
// -1 --> -0.5
// +1 --> (size - 1) + 0.5 == size - 0.5
// scale_factor = size / 2
template
<
typename
scalar_t
>
static
__forceinline__
__device__
scalar_t
grid_sampler_unnormalize
(
scalar_t
coord
,
int
size
,
bool
align_corners
)
{
if
(
align_corners
)
{
// unnormalize coord from [-1, 1] to [0, size - 1]
return
((
coord
+
1.
f
)
/
2
)
*
(
size
-
1
);
}
else
{
// unnormalize coord from [-1, 1] to [-0.5, size - 0.5]
return
((
coord
+
1.
f
)
*
size
-
1
)
/
2
;
}
}
// Clips coordinates to between 0 and clip_limit - 1
template
<
typename
scalar_t
>
static
__forceinline__
__device__
scalar_t
clip_coordinates
(
scalar_t
in
,
int
clip_limit
)
{
return
::
min
(
static_cast
<
scalar_t
>
(
clip_limit
-
1
),
::
max
(
in
,
static_cast
<
scalar_t
>
(
0
)));
}
// Reflects coordinates until they fall between low and high (inclusive).
// The bounds are passed as twice their value so that half-integer values
// can be represented as ints.
template
<
typename
scalar_t
>
static
__forceinline__
__device__
scalar_t
reflect_coordinates
(
scalar_t
in
,
int
twice_low
,
int
twice_high
)
{
if
(
twice_low
==
twice_high
)
{
return
static_cast
<
scalar_t
>
(
0
);
}
scalar_t
min
=
static_cast
<
scalar_t
>
(
twice_low
)
/
2
;
scalar_t
span
=
static_cast
<
scalar_t
>
(
twice_high
-
twice_low
)
/
2
;
in
=
::
fabs
(
in
-
min
);
// `fmod` returns same sign as `in`, which is positive after the `fabs` above.
scalar_t
extra
=
::
fmod
(
in
,
span
);
int
flips
=
static_cast
<
int
>
(
::
floor
(
in
/
span
));
if
(
flips
%
2
==
0
)
{
return
extra
+
min
;
}
else
{
return
span
-
extra
+
min
;
}
}
template
<
typename
scalar_t
>
static
__forceinline__
__device__
scalar_t
safe_downgrade_to_int_range
(
scalar_t
x
)
{
// -100.0 does not have special meaning. This is just to make sure
// it's not within_bounds_2d or within_bounds_3d, and does not cause
// undefined behavior. See #35506.
if
(
x
>
INT_MAX
-
1
||
x
<
INT_MIN
||
!::
isfinite
(
static_cast
<
double
>
(
x
)))
return
static_cast
<
scalar_t
>
(
-
100.0
);
return
x
;
}
// Computes the pixel source index value for a grid coordinate
template
<
typename
scalar_t
>
static
__forceinline__
__device__
scalar_t
grid_sampler_compute_source_index
(
scalar_t
coord
,
int
size
,
GridSamplerPadding
padding_mode
,
bool
align_corners
)
{
coord
=
grid_sampler_unnormalize
(
coord
,
size
,
align_corners
);
if
(
padding_mode
==
GridSamplerPadding
::
Border
)
{
// clip coordinates to image borders
coord
=
clip_coordinates
(
coord
,
size
);
}
else
if
(
padding_mode
==
GridSamplerPadding
::
Reflection
)
{
// reflect coordinates by image borders
if
(
align_corners
)
{
coord
=
reflect_coordinates
(
coord
,
0
,
2
*
(
size
-
1
));
}
else
{
coord
=
reflect_coordinates
(
coord
,
-
1
,
2
*
size
-
1
);
}
// clip coordinates to image borders
coord
=
clip_coordinates
(
coord
,
size
);
}
coord
=
safe_downgrade_to_int_range
(
coord
);
return
coord
;
}
static
__forceinline__
__device__
bool
within_bounds_2d
(
int
h
,
int
w
,
int
H
,
int
W
)
{
return
h
>=
0
&&
h
<
H
&&
w
>=
0
&&
w
<
W
;
}
static
__forceinline__
__device__
bool
within_bounds_3d
(
int
d
,
int
h
,
int
w
,
int
D
,
int
H
,
int
W
)
{
return
d
>=
0
&&
d
<
D
&&
h
>=
0
&&
h
<
H
&&
w
>=
0
&&
w
<
W
;
}
template
<
typename
scalar_t
>
__global__
void
grid_sampler_2d_kernel
(
const
int
nthreads
,
const
scalar_t
*
input
,
const
scalar_t
*
grid
,
scalar_t
*
output
,
TensorDesc
input_desc
,
TensorDesc
grid_desc
,
TensorDesc
output_desc
,
const
GridSamplerInterpolation
interpolation_mode
,
const
GridSamplerPadding
padding_mode
,
bool
align_corners
)
{
int
C
=
input_desc
.
shape
[
1
];
int
inp_H
=
input_desc
.
shape
[
2
];
int
inp_W
=
input_desc
.
shape
[
3
];
int
out_H
=
grid_desc
.
shape
[
1
];
int
out_W
=
grid_desc
.
shape
[
2
];
int
inp_sN
=
input_desc
.
stride
[
0
];
int
inp_sC
=
input_desc
.
stride
[
1
];
int
inp_sH
=
input_desc
.
stride
[
2
];
int
inp_sW
=
input_desc
.
stride
[
3
];
int
grid_sN
=
grid_desc
.
stride
[
0
];
int
grid_sH
=
grid_desc
.
stride
[
1
];
int
grid_sW
=
grid_desc
.
stride
[
2
];
int
grid_sCoor
=
grid_desc
.
stride
[
3
];
int
out_sN
=
output_desc
.
stride
[
0
];
int
out_sC
=
output_desc
.
stride
[
1
];
int
out_sH
=
output_desc
.
stride
[
2
];
int
out_sW
=
output_desc
.
stride
[
3
];
CUDA_1D_KERNEL_LOOP
(
index
,
nthreads
)
{
const
int
w
=
index
%
out_W
;
const
int
h
=
(
index
/
out_W
)
%
out_H
;
const
int
n
=
index
/
(
out_H
*
out_W
);
const
int
grid_offset
=
n
*
grid_sN
+
h
*
grid_sH
+
w
*
grid_sW
;
// get the corresponding input x, y coordinates from grid
scalar_t
ix
=
grid
[
grid_offset
];
scalar_t
iy
=
grid
[
grid_offset
+
grid_sCoor
];
ix
=
grid_sampler_compute_source_index
(
ix
,
inp_W
,
padding_mode
,
align_corners
);
iy
=
grid_sampler_compute_source_index
(
iy
,
inp_H
,
padding_mode
,
align_corners
);
if
(
interpolation_mode
==
GridSamplerInterpolation
::
Bilinear
)
{
// get NE, NW, SE, SW pixel values from (x, y)
int
ix_nw
=
static_cast
<
int
>
(
::
floor
(
ix
));
int
iy_nw
=
static_cast
<
int
>
(
::
floor
(
iy
));
int
ix_ne
=
ix_nw
+
1
;
int
iy_ne
=
iy_nw
;
int
ix_sw
=
ix_nw
;
int
iy_sw
=
iy_nw
+
1
;
int
ix_se
=
ix_nw
+
1
;
int
iy_se
=
iy_nw
+
1
;
// get surfaces to each neighbor:
scalar_t
nw
=
(
ix_se
-
ix
)
*
(
iy_se
-
iy
);
scalar_t
ne
=
(
ix
-
ix_sw
)
*
(
iy_sw
-
iy
);
scalar_t
sw
=
(
ix_ne
-
ix
)
*
(
iy
-
iy_ne
);
scalar_t
se
=
(
ix
-
ix_nw
)
*
(
iy
-
iy_nw
);
// calculate bilinear weighted pixel value and set output pixel
auto
inp_ptr_NC
=
input
+
n
*
inp_sN
;
auto
out_ptr_NCHW
=
output
+
n
*
out_sN
+
h
*
out_sH
+
w
*
out_sW
;
for
(
int
c
=
0
;
c
<
C
;
++
c
,
inp_ptr_NC
+=
inp_sC
,
out_ptr_NCHW
+=
out_sC
)
{
*
out_ptr_NCHW
=
static_cast
<
scalar_t
>
(
0
);
if
(
within_bounds_2d
(
iy_nw
,
ix_nw
,
inp_H
,
inp_W
))
{
*
out_ptr_NCHW
+=
inp_ptr_NC
[
iy_nw
*
inp_sH
+
ix_nw
*
inp_sW
]
*
nw
;
}
if
(
within_bounds_2d
(
iy_ne
,
ix_ne
,
inp_H
,
inp_W
))
{
*
out_ptr_NCHW
+=
inp_ptr_NC
[
iy_ne
*
inp_sH
+
ix_ne
*
inp_sW
]
*
ne
;
}
if
(
within_bounds_2d
(
iy_sw
,
ix_sw
,
inp_H
,
inp_W
))
{
*
out_ptr_NCHW
+=
inp_ptr_NC
[
iy_sw
*
inp_sH
+
ix_sw
*
inp_sW
]
*
sw
;
}
if
(
within_bounds_2d
(
iy_se
,
ix_se
,
inp_H
,
inp_W
))
{
*
out_ptr_NCHW
+=
inp_ptr_NC
[
iy_se
*
inp_sH
+
ix_se
*
inp_sW
]
*
se
;
}
}
}
else
if
(
interpolation_mode
==
GridSamplerInterpolation
::
Nearest
)
{
int
ix_nearest
=
static_cast
<
int
>
(
::
round
(
ix
));
int
iy_nearest
=
static_cast
<
int
>
(
::
round
(
iy
));
// assign nearest neighbor pixel value to output pixel
auto
inp_ptr_NC
=
input
+
n
*
inp_sN
;
auto
out_ptr_NCHW
=
output
+
n
*
out_sN
+
h
*
out_sH
+
w
*
out_sW
;
for
(
int
c
=
0
;
c
<
C
;
++
c
,
inp_ptr_NC
+=
inp_sC
,
out_ptr_NCHW
+=
out_sC
)
{
if
(
within_bounds_2d
(
iy_nearest
,
ix_nearest
,
inp_H
,
inp_W
))
{
*
out_ptr_NCHW
=
inp_ptr_NC
[
iy_nearest
*
inp_sH
+
ix_nearest
*
inp_sW
];
}
else
{
*
out_ptr_NCHW
=
static_cast
<
scalar_t
>
(
0
);
}
}
}
}
}
template
<
typename
scalar_t
>
__global__
void
grid_sampler_3d_kernel
(
const
int
nthreads
,
const
scalar_t
*
input
,
const
scalar_t
*
grid
,
scalar_t
*
output
,
TensorDesc
input_desc
,
TensorDesc
grid_desc
,
TensorDesc
output_desc
,
const
GridSamplerInterpolation
interpolation_mode
,
const
GridSamplerPadding
padding_mode
,
bool
align_corners
)
{
int
C
=
input_desc
.
shape
[
1
];
int
inp_D
=
input_desc
.
shape
[
2
];
int
inp_H
=
input_desc
.
shape
[
3
];
int
inp_W
=
input_desc
.
shape
[
4
];
int
out_D
=
grid_desc
.
shape
[
1
];
int
out_H
=
grid_desc
.
shape
[
2
];
int
out_W
=
grid_desc
.
shape
[
3
];
int
inp_sN
=
input_desc
.
stride
[
0
];
int
inp_sC
=
input_desc
.
stride
[
1
];
int
inp_sD
=
input_desc
.
stride
[
2
];
int
inp_sH
=
input_desc
.
stride
[
3
];
int
inp_sW
=
input_desc
.
stride
[
4
];
int
grid_sN
=
grid_desc
.
stride
[
0
];
int
grid_sD
=
grid_desc
.
stride
[
1
];
int
grid_sH
=
grid_desc
.
stride
[
2
];
int
grid_sW
=
grid_desc
.
stride
[
3
];
int
grid_sCoor
=
grid_desc
.
stride
[
4
];
int
out_sN
=
output_desc
.
stride
[
0
];
int
out_sC
=
output_desc
.
stride
[
1
];
int
out_sD
=
output_desc
.
stride
[
2
];
int
out_sH
=
output_desc
.
stride
[
3
];
int
out_sW
=
output_desc
.
stride
[
4
];
CUDA_1D_KERNEL_LOOP
(
index
,
nthreads
)
{
const
int
w
=
index
%
out_W
;
const
int
h
=
(
index
/
out_W
)
%
out_H
;
const
int
d
=
(
index
/
(
out_H
*
out_W
))
%
out_D
;
const
int
n
=
index
/
(
out_D
*
out_H
*
out_W
);
const
int
grid_offset
=
n
*
grid_sN
+
d
*
grid_sD
+
h
*
grid_sH
+
w
*
grid_sW
;
// get the corresponding input x, y, z coordinates from grid
scalar_t
ix
=
grid
[
grid_offset
];
scalar_t
iy
=
grid
[
grid_offset
+
grid_sCoor
];
scalar_t
iz
=
grid
[
grid_offset
+
2
*
grid_sCoor
];
ix
=
grid_sampler_compute_source_index
(
ix
,
inp_W
,
padding_mode
,
align_corners
);
iy
=
grid_sampler_compute_source_index
(
iy
,
inp_H
,
padding_mode
,
align_corners
);
iz
=
grid_sampler_compute_source_index
(
iz
,
inp_D
,
padding_mode
,
align_corners
);
if
(
interpolation_mode
==
GridSamplerInterpolation
::
Bilinear
)
{
// get corner pixel values from (x, y, z)
// for 4d, we used north-east-south-west
// for 5d, we add top-bottom
int
ix_tnw
=
static_cast
<
int
>
(
::
floor
(
ix
));
int
iy_tnw
=
static_cast
<
int
>
(
::
floor
(
iy
));
int
iz_tnw
=
static_cast
<
int
>
(
::
floor
(
iz
));
int
ix_tne
=
ix_tnw
+
1
;
int
iy_tne
=
iy_tnw
;
int
iz_tne
=
iz_tnw
;
int
ix_tsw
=
ix_tnw
;
int
iy_tsw
=
iy_tnw
+
1
;
int
iz_tsw
=
iz_tnw
;
int
ix_tse
=
ix_tnw
+
1
;
int
iy_tse
=
iy_tnw
+
1
;
int
iz_tse
=
iz_tnw
;
int
ix_bnw
=
ix_tnw
;
int
iy_bnw
=
iy_tnw
;
int
iz_bnw
=
iz_tnw
+
1
;
int
ix_bne
=
ix_tnw
+
1
;
int
iy_bne
=
iy_tnw
;
int
iz_bne
=
iz_tnw
+
1
;
int
ix_bsw
=
ix_tnw
;
int
iy_bsw
=
iy_tnw
+
1
;
int
iz_bsw
=
iz_tnw
+
1
;
int
ix_bse
=
ix_tnw
+
1
;
int
iy_bse
=
iy_tnw
+
1
;
int
iz_bse
=
iz_tnw
+
1
;
// get surfaces to each neighbor:
scalar_t
tnw
=
(
ix_bse
-
ix
)
*
(
iy_bse
-
iy
)
*
(
iz_bse
-
iz
);
scalar_t
tne
=
(
ix
-
ix_bsw
)
*
(
iy_bsw
-
iy
)
*
(
iz_bsw
-
iz
);
scalar_t
tsw
=
(
ix_bne
-
ix
)
*
(
iy
-
iy_bne
)
*
(
iz_bne
-
iz
);
scalar_t
tse
=
(
ix
-
ix_bnw
)
*
(
iy
-
iy_bnw
)
*
(
iz_bnw
-
iz
);
scalar_t
bnw
=
(
ix_tse
-
ix
)
*
(
iy_tse
-
iy
)
*
(
iz
-
iz_tse
);
scalar_t
bne
=
(
ix
-
ix_tsw
)
*
(
iy_tsw
-
iy
)
*
(
iz
-
iz_tsw
);
scalar_t
bsw
=
(
ix_tne
-
ix
)
*
(
iy
-
iy_tne
)
*
(
iz
-
iz_tne
);
scalar_t
bse
=
(
ix
-
ix_tnw
)
*
(
iy
-
iy_tnw
)
*
(
iz
-
iz_tnw
);
auto
inp_ptr_NC
=
input
+
n
*
inp_sN
;
auto
out_ptr_NCDHW
=
output
+
n
*
out_sN
+
d
*
out_sD
+
h
*
out_sH
+
w
*
out_sW
;
for
(
int
c
=
0
;
c
<
C
;
++
c
,
inp_ptr_NC
+=
inp_sC
,
out_ptr_NCDHW
+=
out_sC
)
{
// (c, iz_tnw, iy_tnw, ix_tnw) * tnw + (c, iz_tne, iy_tne, ix_tne) *
// tne
// + (c, iz_tsw, iy_tsw, ix_tsw) * tsw + (c, iz_tse, iy_tse, ix_tse) *
// tse
// + (c, iz_bnw, iy_bnw, ix_bnw) * bnw + (c, iz_bne, iy_bne, ix_bne) *
// bne
// + (c, iz_bsw, iy_bsw, ix_bsw) * bsw + (c, iz_bse, iy_bse, ix_bse) *
// bse
*
out_ptr_NCDHW
=
static_cast
<
scalar_t
>
(
0
);
if
(
within_bounds_3d
(
iz_tnw
,
iy_tnw
,
ix_tnw
,
inp_D
,
inp_H
,
inp_W
))
{
*
out_ptr_NCDHW
+=
inp_ptr_NC
[
iz_tnw
*
inp_sD
+
iy_tnw
*
inp_sH
+
ix_tnw
*
inp_sW
]
*
tnw
;
}
if
(
within_bounds_3d
(
iz_tne
,
iy_tne
,
ix_tne
,
inp_D
,
inp_H
,
inp_W
))
{
*
out_ptr_NCDHW
+=
inp_ptr_NC
[
iz_tne
*
inp_sD
+
iy_tne
*
inp_sH
+
ix_tne
*
inp_sW
]
*
tne
;
}
if
(
within_bounds_3d
(
iz_tsw
,
iy_tsw
,
ix_tsw
,
inp_D
,
inp_H
,
inp_W
))
{
*
out_ptr_NCDHW
+=
inp_ptr_NC
[
iz_tsw
*
inp_sD
+
iy_tsw
*
inp_sH
+
ix_tsw
*
inp_sW
]
*
tsw
;
}
if
(
within_bounds_3d
(
iz_tse
,
iy_tse
,
ix_tse
,
inp_D
,
inp_H
,
inp_W
))
{
*
out_ptr_NCDHW
+=
inp_ptr_NC
[
iz_tse
*
inp_sD
+
iy_tse
*
inp_sH
+
ix_tse
*
inp_sW
]
*
tse
;
}
if
(
within_bounds_3d
(
iz_bnw
,
iy_bnw
,
ix_bnw
,
inp_D
,
inp_H
,
inp_W
))
{
*
out_ptr_NCDHW
+=
inp_ptr_NC
[
iz_bnw
*
inp_sD
+
iy_bnw
*
inp_sH
+
ix_bnw
*
inp_sW
]
*
bnw
;
}
if
(
within_bounds_3d
(
iz_bne
,
iy_bne
,
ix_bne
,
inp_D
,
inp_H
,
inp_W
))
{
*
out_ptr_NCDHW
+=
inp_ptr_NC
[
iz_bne
*
inp_sD
+
iy_bne
*
inp_sH
+
ix_bne
*
inp_sW
]
*
bne
;
}
if
(
within_bounds_3d
(
iz_bsw
,
iy_bsw
,
ix_bsw
,
inp_D
,
inp_H
,
inp_W
))
{
*
out_ptr_NCDHW
+=
inp_ptr_NC
[
iz_bsw
*
inp_sD
+
iy_bsw
*
inp_sH
+
ix_bsw
*
inp_sW
]
*
bsw
;
}
if
(
within_bounds_3d
(
iz_bse
,
iy_bse
,
ix_bse
,
inp_D
,
inp_H
,
inp_W
))
{
*
out_ptr_NCDHW
+=
inp_ptr_NC
[
iz_bse
*
inp_sD
+
iy_bse
*
inp_sH
+
ix_bse
*
inp_sW
]
*
bse
;
}
}
}
else
if
(
interpolation_mode
==
GridSamplerInterpolation
::
Nearest
)
{
int
ix_nearest
=
static_cast
<
int
>
(
::
round
(
ix
));
int
iy_nearest
=
static_cast
<
int
>
(
::
round
(
iy
));
int
iz_nearest
=
static_cast
<
int
>
(
::
round
(
iz
));
// assign nearest neighbor pixel value to output pixel
auto
inp_ptr_NC
=
input
+
n
*
inp_sN
;
auto
out_ptr_NCDHW
=
output
+
n
*
out_sN
+
d
*
out_sD
+
h
*
out_sH
+
w
*
out_sW
;
for
(
int
c
=
0
;
c
<
C
;
++
c
,
inp_ptr_NC
+=
inp_sC
,
out_ptr_NCDHW
+=
out_sC
)
{
if
(
within_bounds_3d
(
iz_nearest
,
iy_nearest
,
ix_nearest
,
inp_D
,
inp_H
,
inp_W
))
{
*
out_ptr_NCDHW
=
inp_ptr_NC
[
iz_nearest
*
inp_sD
+
iy_nearest
*
inp_sH
+
ix_nearest
*
inp_sW
];
}
else
{
*
out_ptr_NCDHW
=
static_cast
<
scalar_t
>
(
0
);
}
}
}
}
}
void
create_desc
(
const
int
*
dims
,
int
nb_dims
,
TensorDesc
&
desc
)
{
memcpy
(
&
desc
.
shape
[
0
],
dims
,
sizeof
(
int
)
*
nb_dims
);
desc
.
stride
[
nb_dims
-
1
]
=
1
;
for
(
int
i
=
nb_dims
-
2
;
i
>=
0
;
--
i
)
{
desc
.
stride
[
i
]
=
desc
.
stride
[
i
+
1
]
*
desc
.
shape
[
i
+
1
];
}
}
template
<
typename
T
>
void
grid_sample
(
T
*
output
,
const
T
*
input
,
const
T
*
grid
,
int
*
output_dims
,
int
*
input_dims
,
int
*
grid_dims
,
int
nb_dims
,
GridSamplerInterpolation
interp
,
GridSamplerPadding
padding
,
bool
align_corners
,
cudaStream_t
stream
)
{
TensorDesc
input_desc
;
create_desc
(
input_dims
,
nb_dims
,
input_desc
);
TensorDesc
output_desc
;
create_desc
(
output_dims
,
nb_dims
,
output_desc
);
TensorDesc
grid_desc
;
create_desc
(
grid_dims
,
nb_dims
,
grid_desc
);
int
count
=
1
;
for
(
int
i
=
0
;
i
<
nb_dims
;
++
i
)
{
if
(
i
==
1
)
{
continue
;
}
count
*=
output_desc
.
shape
[
i
];
}
if
(
nb_dims
==
4
)
{
grid_sampler_2d_kernel
<
T
>
<<<
GET_BLOCKS
(
count
),
THREADS_PER_BLOCK
,
0
,
stream
>>>
(
count
,
input
,
grid
,
output
,
input_desc
,
grid_desc
,
output_desc
,
interp
,
padding
,
align_corners
);
}
else
if
(
nb_dims
==
5
)
{
grid_sampler_3d_kernel
<
T
>
<<<
GET_BLOCKS
(
count
),
THREADS_PER_BLOCK
,
0
,
stream
>>>
(
count
,
input
,
grid
,
output
,
input_desc
,
grid_desc
,
output_desc
,
interp
,
padding
,
align_corners
);
}
else
{
printf
(
"input and grid dims should be 4 or 5
\n
"
);
}
}
void
grid_sample_float
(
float
*
output
,
const
float
*
input
,
const
float
*
grid
,
int
*
output_dims
,
int
*
input_dims
,
int
*
grid_dims
,
int
nb_dims
,
GridSamplerInterpolation
interp
,
GridSamplerPadding
padding
,
bool
align_corners
,
cudaStream_t
stream
)
{
grid_sample
<
float
>
(
output
,
input
,
grid
,
output_dims
,
input_dims
,
grid_dims
,
nb_dims
,
interp
,
padding
,
align_corners
,
stream
);
}
mmcv/ops/csrc/tensorrt/plugins/trt_instance_norm.cpp
deleted
100644 → 0
View file @
961373ad
// Copyright (c) OpenMMLab. All rights reserved
// Modified from:
// https://github.com/NVIDIA/TensorRT/blob/master/plugin/instanceNormalizationPlugin/instanceNormalizationPlugin.cpp
#include "trt_instance_norm.hpp"
#include <cuda_fp16.h>
#include <stdexcept>
#include "trt_serialize.hpp"
using
namespace
nvinfer1
;
cudnnStatus_t
convert_trt2cudnn_dtype
(
nvinfer1
::
DataType
trt_dtype
,
cudnnDataType_t
*
cudnn_dtype
)
{
switch
(
trt_dtype
)
{
case
nvinfer1
::
DataType
::
kFLOAT
:
*
cudnn_dtype
=
CUDNN_DATA_FLOAT
;
break
;
case
nvinfer1
::
DataType
::
kHALF
:
*
cudnn_dtype
=
CUDNN_DATA_HALF
;
break
;
default:
return
CUDNN_STATUS_BAD_PARAM
;
}
return
CUDNN_STATUS_SUCCESS
;
}
namespace
{
constexpr
const
char
*
PLUGIN_VERSION
{
"1"
};
constexpr
const
char
*
PLUGIN_NAME
{
"MMCVInstanceNormalization"
};
}
// namespace
PluginFieldCollection
InstanceNormalizationDynamicCreator
::
mFC
{};
std
::
vector
<
PluginField
>
InstanceNormalizationDynamicCreator
::
mPluginAttributes
;
InstanceNormalizationDynamic
::
InstanceNormalizationDynamic
(
const
std
::
string
&
name
,
float
epsilon
)
:
mLayerName
(
name
),
mEpsilon
(
epsilon
)
{}
InstanceNormalizationDynamic
::
InstanceNormalizationDynamic
(
const
std
::
string
&
name
,
void
const
*
serialData
,
size_t
serialLength
)
:
mLayerName
(
name
)
{
deserialize_value
(
&
serialData
,
&
serialLength
,
&
mEpsilon
);
}
InstanceNormalizationDynamic
::~
InstanceNormalizationDynamic
()
{}
// InstanceNormalizationDynamic returns one output.
int
InstanceNormalizationDynamic
::
getNbOutputs
()
const
{
return
1
;
}
DimsExprs
InstanceNormalizationDynamic
::
getOutputDimensions
(
int
outputIndex
,
const
nvinfer1
::
DimsExprs
*
inputs
,
int
nbInputs
,
nvinfer1
::
IExprBuilder
&
exprBuilder
)
{
nvinfer1
::
DimsExprs
output
(
inputs
[
0
]);
return
output
;
}
int
InstanceNormalizationDynamic
::
initialize
()
{
return
0
;
}
void
InstanceNormalizationDynamic
::
terminate
()
{}
size_t
InstanceNormalizationDynamic
::
getWorkspaceSize
(
const
nvinfer1
::
PluginTensorDesc
*
inputs
,
int
nbInputs
,
const
nvinfer1
::
PluginTensorDesc
*
outputs
,
int
nbOutputs
)
const
{
int
n
=
inputs
[
0
].
dims
.
d
[
0
];
int
c
=
inputs
[
0
].
dims
.
d
[
1
];
int
elem_size
=
mmcv
::
getElementSize
(
inputs
[
1
].
type
);
return
mmcv
::
getAlignedSize
(
n
*
c
*
elem_size
)
*
2
;
}
int
InstanceNormalizationDynamic
::
enqueue
(
const
nvinfer1
::
PluginTensorDesc
*
inputDesc
,
const
nvinfer1
::
PluginTensorDesc
*
outputDesc
,
const
void
*
const
*
inputs
,
void
*
const
*
outputs
,
void
*
workspace
,
cudaStream_t
stream
)
{
nvinfer1
::
Dims
input_dims
=
inputDesc
[
0
].
dims
;
int
n
=
input_dims
.
d
[
0
];
int
c
=
input_dims
.
d
[
1
];
int
h
=
input_dims
.
d
[
2
];
int
w
=
input_dims
.
nbDims
>
3
?
input_dims
.
d
[
3
]
:
1
;
int
elem_size
=
mmcv
::
getElementSize
(
inputDesc
[
1
].
type
);
void
*
n_scales
=
(
void
*
)
workspace
;
void
*
n_bias
=
(
void
*
)(
workspace
+
mmcv
::
getAlignedSize
(
n
*
c
*
elem_size
));
const
void
*
scales
=
(
const
void
*
)
inputs
[
1
];
const
void
*
bias
=
(
const
void
*
)
inputs
[
2
];
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
cudaMemcpyAsync
(
n_scales
+
i
*
c
*
elem_size
,
scales
,
c
*
elem_size
,
cudaMemcpyDeviceToDevice
,
stream
);
cudaMemcpyAsync
(
n_bias
+
i
*
c
*
elem_size
,
bias
,
c
*
elem_size
,
cudaMemcpyDeviceToDevice
,
stream
);
}
cudnnSetTensor4dDescriptor
(
_b_desc
,
CUDNN_TENSOR_NCHW
,
CUDNN_DATA_FLOAT
,
1
,
n
*
c
,
1
,
1
);
cudnnDataType_t
cudnn_dtype
{};
convert_trt2cudnn_dtype
(
inputDesc
[
0
].
type
,
&
cudnn_dtype
);
cudnnSetTensor4dDescriptor
(
_x_desc
,
CUDNN_TENSOR_NCHW
,
cudnn_dtype
,
1
,
n
*
c
,
h
,
w
);
cudnnSetTensor4dDescriptor
(
_y_desc
,
CUDNN_TENSOR_NCHW
,
cudnn_dtype
,
1
,
n
*
c
,
h
,
w
);
float
alpha
=
1
;
float
beta
=
0
;
void
const
*
x_ptr
=
inputs
[
0
];
void
*
y_ptr
=
outputs
[
0
];
cudnnSetStream
(
_cudnn_handle
,
stream
);
// Note: Use of CUDNN_BATCHNORM_SPATIAL_PERSISTENT can cause numerical
// overflows (NaNs) for fp32 data in some circumstances. The lower-
// performance CUDNN_BATCHNORM_SPATIAL should be used if this is not
// acceptable.
cudnnBatchNormalizationForwardTraining
(
_cudnn_handle
,
CUDNN_BATCHNORM_SPATIAL_PERSISTENT
,
&
alpha
,
&
beta
,
_x_desc
,
x_ptr
,
_y_desc
,
y_ptr
,
_b_desc
,
n_scales
,
n_bias
,
1.
,
nullptr
,
nullptr
,
mEpsilon
,
nullptr
,
nullptr
);
return
0
;
}
size_t
InstanceNormalizationDynamic
::
getSerializationSize
()
const
{
return
serialized_size
(
mEpsilon
);
}
void
InstanceNormalizationDynamic
::
serialize
(
void
*
buffer
)
const
{
serialize_value
(
&
buffer
,
mEpsilon
);
}
bool
InstanceNormalizationDynamic
::
supportsFormatCombination
(
int
pos
,
const
nvinfer1
::
PluginTensorDesc
*
inOut
,
int
nbInputs
,
int
nbOutputs
)
{
return
((
inOut
[
pos
].
type
==
nvinfer1
::
DataType
::
kFLOAT
||
inOut
[
pos
].
type
==
nvinfer1
::
DataType
::
kHALF
)
&&
inOut
[
pos
].
format
==
nvinfer1
::
PluginFormat
::
kLINEAR
&&
inOut
[
pos
].
type
==
inOut
[
0
].
type
);
}
const
char
*
InstanceNormalizationDynamic
::
getPluginType
()
const
{
return
PLUGIN_NAME
;
}
const
char
*
InstanceNormalizationDynamic
::
getPluginVersion
()
const
{
return
PLUGIN_VERSION
;
}
void
InstanceNormalizationDynamic
::
destroy
()
{
delete
this
;
}
IPluginV2DynamicExt
*
InstanceNormalizationDynamic
::
clone
()
const
{
auto
*
plugin
=
new
InstanceNormalizationDynamic
{
mLayerName
,
mEpsilon
};
plugin
->
setPluginNamespace
(
mPluginNamespace
.
c_str
());
return
plugin
;
}
// Set plugin namespace
void
InstanceNormalizationDynamic
::
setPluginNamespace
(
const
char
*
pluginNamespace
)
{
mPluginNamespace
=
pluginNamespace
;
}
const
char
*
InstanceNormalizationDynamic
::
getPluginNamespace
()
const
{
return
mPluginNamespace
.
c_str
();
}
nvinfer1
::
DataType
InstanceNormalizationDynamic
::
getOutputDataType
(
int
index
,
const
nvinfer1
::
DataType
*
inputTypes
,
int
nbInputs
)
const
{
return
inputTypes
[
0
];
}
// Attach the plugin object to an execution context and grant the plugin the
// access to some context resource.
void
InstanceNormalizationDynamic
::
attachToContext
(
cudnnContext
*
cudnnContext
,
cublasContext
*
cublasContext
,
IGpuAllocator
*
gpuAllocator
)
{
_cudnn_handle
=
cudnnContext
;
cudnnCreateTensorDescriptor
(
&
_b_desc
);
cudnnCreateTensorDescriptor
(
&
_x_desc
);
cudnnCreateTensorDescriptor
(
&
_y_desc
);
}
// Detach the plugin object from its execution context.
void
InstanceNormalizationDynamic
::
detachFromContext
()
{
cudnnDestroyTensorDescriptor
(
_y_desc
);
cudnnDestroyTensorDescriptor
(
_x_desc
);
cudnnDestroyTensorDescriptor
(
_b_desc
);
}
void
InstanceNormalizationDynamic
::
configurePlugin
(
const
nvinfer1
::
DynamicPluginTensorDesc
*
in
,
int
nbInputs
,
const
nvinfer1
::
DynamicPluginTensorDesc
*
out
,
int
nbOutputs
)
{}
// InstanceNormalizationDynamicCreator methods
InstanceNormalizationDynamicCreator
::
InstanceNormalizationDynamicCreator
()
{
mPluginAttributes
.
clear
();
mPluginAttributes
.
emplace_back
(
PluginField
(
"epsilon"
,
nullptr
,
PluginFieldType
::
kFLOAT32
,
1
));
mFC
.
nbFields
=
mPluginAttributes
.
size
();
mFC
.
fields
=
mPluginAttributes
.
data
();
}
const
char
*
InstanceNormalizationDynamicCreator
::
getPluginName
()
const
{
return
PLUGIN_NAME
;
}
const
char
*
InstanceNormalizationDynamicCreator
::
getPluginVersion
()
const
{
return
PLUGIN_VERSION
;
}
const
PluginFieldCollection
*
InstanceNormalizationDynamicCreator
::
getFieldNames
()
{
return
&
mFC
;
}
IPluginV2DynamicExt
*
InstanceNormalizationDynamicCreator
::
createPlugin
(
const
char
*
name
,
const
nvinfer1
::
PluginFieldCollection
*
fc
)
{
float
epsilon
=
1e-5
;
const
PluginField
*
fields
=
fc
->
fields
;
for
(
int
i
=
0
;
i
<
fc
->
nbFields
;
++
i
)
{
const
char
*
attrName
=
fields
[
i
].
name
;
if
(
!
strcmp
(
attrName
,
"epsilon"
))
{
epsilon
=
*
(
static_cast
<
const
float
*>
(
fields
[
i
].
data
));
}
}
InstanceNormalizationDynamic
*
obj
=
new
InstanceNormalizationDynamic
(
name
,
epsilon
);
obj
->
setPluginNamespace
(
mNamespace
.
c_str
());
return
obj
;
}
IPluginV2DynamicExt
*
InstanceNormalizationDynamicCreator
::
deserializePlugin
(
const
char
*
name
,
const
void
*
serialData
,
size_t
serialLength
)
{
InstanceNormalizationDynamic
*
obj
=
new
InstanceNormalizationDynamic
{
name
,
serialData
,
serialLength
};
obj
->
setPluginNamespace
(
mNamespace
.
c_str
());
return
obj
;
}
void
InstanceNormalizationDynamicCreator
::
setPluginNamespace
(
const
char
*
libNamespace
)
{
mNamespace
=
libNamespace
;
}
const
char
*
InstanceNormalizationDynamicCreator
::
getPluginNamespace
()
const
{
return
mNamespace
.
c_str
();
}
mmcv/ops/csrc/tensorrt/plugins/trt_modulated_deform_conv.cpp
deleted
100644 → 0
View file @
961373ad
// Copyright (c) OpenMMLab. All rights reserved
#include "trt_modulated_deform_conv.hpp"
#include <assert.h>
#include <chrono>
#include "trt_serialize.hpp"
void
ModulatedDeformConvForwardCUDAKernelLauncher_float
(
const
float
*
input
,
const
float
*
weight
,
const
float
*
bias
,
const
float
*
offset
,
const
float
*
mask
,
float
*
output
,
void
*
workspace
,
int
batch
,
int
channels
,
int
height
,
int
width
,
int
channels_out
,
int
kernel_w
,
int
kernel_h
,
int
stride_w
,
int
stride_h
,
int
pad_w
,
int
pad_h
,
int
dilation_w
,
int
dilation_h
,
int
group
,
int
deformable_group
,
int
im2col_step
,
cublasHandle_t
cublas_handle
,
cudaStream_t
stream
);
namespace
{
static
const
char
*
PLUGIN_VERSION
{
"1"
};
static
const
char
*
PLUGIN_NAME
{
"MMCVModulatedDeformConv2d"
};
}
// namespace
nvinfer1
::
PluginFieldCollection
ModulatedDeformableConvPluginDynamicCreator
::
mFC
{};
std
::
vector
<
nvinfer1
::
PluginField
>
ModulatedDeformableConvPluginDynamicCreator
::
mPluginAttributes
;
ModulatedDeformableConvPluginDynamic
::
ModulatedDeformableConvPluginDynamic
(
const
std
::
string
&
name
,
const
nvinfer1
::
Dims
stride
,
const
nvinfer1
::
Dims
padding
,
const
nvinfer1
::
Dims
dilation
,
const
int
deformableGroup
,
const
int
group
)
:
mLayerName
(
name
),
mStride
(
stride
),
mPadding
(
padding
),
mDilation
(
dilation
),
mDeformableGroup
(
deformableGroup
),
mGroup
(
group
)
{
mWithBias
=
false
;
}
ModulatedDeformableConvPluginDynamic
::
ModulatedDeformableConvPluginDynamic
(
const
std
::
string
name
,
const
void
*
data
,
size_t
length
)
:
mLayerName
(
name
)
{
deserialize_value
(
&
data
,
&
length
,
&
mStride
);
deserialize_value
(
&
data
,
&
length
,
&
mPadding
);
deserialize_value
(
&
data
,
&
length
,
&
mDilation
);
deserialize_value
(
&
data
,
&
length
,
&
mDeformableGroup
);
deserialize_value
(
&
data
,
&
length
,
&
mGroup
);
mWithBias
=
false
;
}
ModulatedDeformableConvPluginDynamic
::~
ModulatedDeformableConvPluginDynamic
()
{}
nvinfer1
::
IPluginV2DynamicExt
*
ModulatedDeformableConvPluginDynamic
::
clone
()
const
{
ModulatedDeformableConvPluginDynamic
*
plugin
=
new
ModulatedDeformableConvPluginDynamic
(
mLayerName
,
mStride
,
mPadding
,
mDilation
,
mDeformableGroup
,
mGroup
);
plugin
->
setPluginNamespace
(
getPluginNamespace
());
return
plugin
;
}
nvinfer1
::
DimsExprs
ModulatedDeformableConvPluginDynamic
::
getOutputDimensions
(
int
outputIndex
,
const
nvinfer1
::
DimsExprs
*
inputs
,
int
nbInputs
,
nvinfer1
::
IExprBuilder
&
exprBuilder
)
{
nvinfer1
::
DimsExprs
ret
;
ret
.
nbDims
=
4
;
ret
.
d
[
0
]
=
inputs
[
0
].
d
[
0
];
ret
.
d
[
1
]
=
inputs
[
3
].
d
[
0
];
ret
.
d
[
2
]
=
inputs
[
1
].
d
[
2
];
ret
.
d
[
3
]
=
inputs
[
1
].
d
[
3
];
return
ret
;
}
bool
ModulatedDeformableConvPluginDynamic
::
supportsFormatCombination
(
int
pos
,
const
nvinfer1
::
PluginTensorDesc
*
inOut
,
int
nbInputs
,
int
nbOutputs
)
{
if
(
pos
==
0
)
{
return
(
inOut
[
pos
].
type
==
nvinfer1
::
DataType
::
kFLOAT
&&
inOut
[
pos
].
format
==
nvinfer1
::
TensorFormat
::
kLINEAR
);
}
else
{
return
inOut
[
pos
].
type
==
inOut
[
0
].
type
&&
inOut
[
pos
].
format
==
inOut
[
0
].
format
;
}
}
void
ModulatedDeformableConvPluginDynamic
::
configurePlugin
(
const
nvinfer1
::
DynamicPluginTensorDesc
*
inputs
,
int
nbInputs
,
const
nvinfer1
::
DynamicPluginTensorDesc
*
outputs
,
int
nbOutputs
)
{
if
(
nbInputs
==
5
)
{
mWithBias
=
true
;
}
}
size_t
ModulatedDeformableConvPluginDynamic
::
getWorkspaceSize
(
const
nvinfer1
::
PluginTensorDesc
*
inputs
,
int
nbInputs
,
const
nvinfer1
::
PluginTensorDesc
*
outputs
,
int
nbOutputs
)
const
{
int
sizeof_dtype
=
mmcv
::
getElementSize
(
outputs
[
0
].
type
);
int
batch_size
=
inputs
[
0
].
dims
.
d
[
0
];
int
nInputPlane
=
inputs
[
0
].
dims
.
d
[
1
];
int
inputHeight
=
inputs
[
0
].
dims
.
d
[
2
];
int
inputWidth
=
inputs
[
0
].
dims
.
d
[
3
];
int
nOutputPlane
=
outputs
[
0
].
dims
.
d
[
1
];
int
outputHeight
=
outputs
[
0
].
dims
.
d
[
2
];
int
outputWidth
=
outputs
[
0
].
dims
.
d
[
3
];
int
kW
=
inputs
[
3
].
dims
.
d
[
2
];
int
kH
=
inputs
[
3
].
dims
.
d
[
3
];
int
im2col_step
=
std
::
min
(
32
,
batch_size
);
size_t
col_size
=
mmcv
::
getAlignedSize
(
nInputPlane
*
kW
*
kH
*
outputHeight
*
outputWidth
*
sizeof_dtype
);
return
col_size
;
}
int
ModulatedDeformableConvPluginDynamic
::
enqueue
(
const
nvinfer1
::
PluginTensorDesc
*
inputDesc
,
const
nvinfer1
::
PluginTensorDesc
*
outputDesc
,
const
void
*
const
*
inputs
,
void
*
const
*
outputs
,
void
*
workSpace
,
cudaStream_t
stream
)
{
int
batch
=
inputDesc
[
0
].
dims
.
d
[
0
];
int
channels
=
inputDesc
[
0
].
dims
.
d
[
1
];
int
height
=
inputDesc
[
0
].
dims
.
d
[
2
];
int
width
=
inputDesc
[
0
].
dims
.
d
[
3
];
int
channels_out
=
outputDesc
[
0
].
dims
.
d
[
1
];
int
kernel_h
=
inputDesc
[
3
].
dims
.
d
[
2
];
int
kernel_w
=
inputDesc
[
3
].
dims
.
d
[
3
];
const
void
*
x
=
inputs
[
0
];
const
void
*
offset
=
inputs
[
1
];
const
void
*
mask
=
inputs
[
2
];
const
void
*
weight
=
inputs
[
3
];
const
void
*
bias
=
mWithBias
?
inputs
[
4
]
:
nullptr
;
void
*
output
=
outputs
[
0
];
int
im2col_step
=
std
::
min
(
batch
,
32
);
// TODO: add fp16 support
auto
data_type
=
inputDesc
[
0
].
type
;
switch
(
data_type
)
{
case
nvinfer1
::
DataType
::
kFLOAT
:
ModulatedDeformConvForwardCUDAKernelLauncher_float
(
(
float
*
)
x
,
(
float
*
)
weight
,
(
float
*
)
bias
,
(
float
*
)
offset
,
(
float
*
)
mask
,
(
float
*
)
output
,
workSpace
,
batch
,
channels
,
height
,
width
,
channels_out
,
kernel_w
,
kernel_h
,
mStride
.
d
[
0
],
mStride
.
d
[
1
],
mPadding
.
d
[
0
],
mPadding
.
d
[
1
],
mDilation
.
d
[
0
],
mDilation
.
d
[
1
],
mGroup
,
mDeformableGroup
,
im2col_step
,
m_cublas_handle
,
stream
);
break
;
default:
return
1
;
break
;
}
return
0
;
}
nvinfer1
::
DataType
ModulatedDeformableConvPluginDynamic
::
getOutputDataType
(
int
index
,
const
nvinfer1
::
DataType
*
inputTypes
,
int
nbInputs
)
const
{
return
inputTypes
[
0
];
}
// IPluginV2 Methods
const
char
*
ModulatedDeformableConvPluginDynamic
::
getPluginType
()
const
{
return
PLUGIN_NAME
;
}
const
char
*
ModulatedDeformableConvPluginDynamic
::
getPluginVersion
()
const
{
return
PLUGIN_VERSION
;
}
int
ModulatedDeformableConvPluginDynamic
::
getNbOutputs
()
const
{
return
1
;
}
int
ModulatedDeformableConvPluginDynamic
::
initialize
()
{
return
0
;
}
void
ModulatedDeformableConvPluginDynamic
::
terminate
()
{}
size_t
ModulatedDeformableConvPluginDynamic
::
getSerializationSize
()
const
{
return
sizeof
(
mStride
)
+
sizeof
(
mPadding
)
+
sizeof
(
mDilation
)
+
sizeof
(
mDeformableGroup
)
+
sizeof
(
mGroup
);
}
void
ModulatedDeformableConvPluginDynamic
::
serialize
(
void
*
buffer
)
const
{
serialize_value
(
&
buffer
,
mStride
);
serialize_value
(
&
buffer
,
mPadding
);
serialize_value
(
&
buffer
,
mDilation
);
serialize_value
(
&
buffer
,
mDeformableGroup
);
serialize_value
(
&
buffer
,
mGroup
);
}
void
ModulatedDeformableConvPluginDynamic
::
destroy
()
{
// This gets called when the network containing plugin is destroyed
delete
this
;
}
void
ModulatedDeformableConvPluginDynamic
::
attachToContext
(
cudnnContext
*
cudnnContext
,
cublasContext
*
cublasContext
,
nvinfer1
::
IGpuAllocator
*
gpuAllocator
)
{
m_cublas_handle
=
cublasContext
;
}
void
ModulatedDeformableConvPluginDynamic
::
detachFromContext
()
{}
void
ModulatedDeformableConvPluginDynamic
::
setPluginNamespace
(
const
char
*
libNamespace
)
{
mNamespace
=
libNamespace
;
}
const
char
*
ModulatedDeformableConvPluginDynamic
::
getPluginNamespace
()
const
{
return
mNamespace
.
c_str
();
}
////////////////////// creator /////////////////////////////
ModulatedDeformableConvPluginDynamicCreator
::
ModulatedDeformableConvPluginDynamicCreator
()
{
mPluginAttributes
.
emplace_back
(
nvinfer1
::
PluginField
(
"stride"
));
mPluginAttributes
.
emplace_back
(
nvinfer1
::
PluginField
(
"padding"
));
mPluginAttributes
.
emplace_back
(
nvinfer1
::
PluginField
(
"dilation"
));
mPluginAttributes
.
emplace_back
(
nvinfer1
::
PluginField
(
"groups"
));
mPluginAttributes
.
emplace_back
(
nvinfer1
::
PluginField
(
"deform_groups"
));
mFC
.
nbFields
=
mPluginAttributes
.
size
();
mFC
.
fields
=
mPluginAttributes
.
data
();
}
const
char
*
ModulatedDeformableConvPluginDynamicCreator
::
getPluginName
()
const
{
return
PLUGIN_NAME
;
}
const
char
*
ModulatedDeformableConvPluginDynamicCreator
::
getPluginVersion
()
const
{
return
PLUGIN_VERSION
;
}
const
nvinfer1
::
PluginFieldCollection
*
ModulatedDeformableConvPluginDynamicCreator
::
getFieldNames
()
{
return
&
mFC
;
}
nvinfer1
::
IPluginV2
*
ModulatedDeformableConvPluginDynamicCreator
::
createPlugin
(
const
char
*
name
,
const
nvinfer1
::
PluginFieldCollection
*
fc
)
{
nvinfer1
::
Dims
stride
{
2
,
{
1
,
1
}};
nvinfer1
::
Dims
padding
{
2
,
{
0
,
0
}};
nvinfer1
::
Dims
dilation
{
2
,
{
1
,
1
}};
int
deformableGroup
=
1
;
int
group
=
1
;
for
(
int
i
=
0
;
i
<
fc
->
nbFields
;
i
++
)
{
if
(
fc
->
fields
[
i
].
data
==
nullptr
)
{
continue
;
}
std
::
string
field_name
(
fc
->
fields
[
i
].
name
);
if
(
field_name
.
compare
(
"deform_groups"
)
==
0
)
{
deformableGroup
=
static_cast
<
const
int
*>
(
fc
->
fields
[
i
].
data
)[
0
];
}
if
(
field_name
.
compare
(
"group"
)
==
0
)
{
group
=
static_cast
<
const
int
*>
(
fc
->
fields
[
i
].
data
)[
0
];
}
if
(
field_name
.
compare
(
"stride"
)
==
0
)
{
stride
.
nbDims
=
2
;
stride
.
d
[
0
]
=
static_cast
<
const
int
*>
(
fc
->
fields
[
i
].
data
)[
0
];
stride
.
d
[
1
]
=
static_cast
<
const
int
*>
(
fc
->
fields
[
i
].
data
)[
1
];
}
if
(
field_name
.
compare
(
"padding"
)
==
0
)
{
padding
.
nbDims
=
2
;
padding
.
d
[
0
]
=
static_cast
<
const
int
*>
(
fc
->
fields
[
i
].
data
)[
0
];
padding
.
d
[
1
]
=
static_cast
<
const
int
*>
(
fc
->
fields
[
i
].
data
)[
1
];
}
if
(
field_name
.
compare
(
"dilation"
)
==
0
)
{
dilation
.
nbDims
=
2
;
dilation
.
d
[
0
]
=
static_cast
<
const
int
*>
(
fc
->
fields
[
i
].
data
)[
0
];
dilation
.
d
[
1
]
=
static_cast
<
const
int
*>
(
fc
->
fields
[
i
].
data
)[
1
];
}
}
ModulatedDeformableConvPluginDynamic
*
plugin
=
new
ModulatedDeformableConvPluginDynamic
(
name
,
stride
,
padding
,
dilation
,
deformableGroup
,
group
);
plugin
->
setPluginNamespace
(
getPluginNamespace
());
return
plugin
;
}
nvinfer1
::
IPluginV2
*
ModulatedDeformableConvPluginDynamicCreator
::
deserializePlugin
(
const
char
*
name
,
const
void
*
serialData
,
size_t
serialLength
)
{
auto
plugin
=
new
ModulatedDeformableConvPluginDynamic
(
name
,
serialData
,
serialLength
);
plugin
->
setPluginNamespace
(
getPluginNamespace
());
return
plugin
;
}
void
ModulatedDeformableConvPluginDynamicCreator
::
setPluginNamespace
(
const
char
*
libNamespace
)
{
mNamespace
=
libNamespace
;
}
const
char
*
ModulatedDeformableConvPluginDynamicCreator
::
getPluginNamespace
()
const
{
return
mNamespace
.
c_str
();
}
mmcv/ops/csrc/tensorrt/plugins/trt_modulated_deform_conv_kernel.cu
deleted
100644 → 0
View file @
961373ad
// Copyright (c) OpenMMLab. All rights reserved
#include <assert.h>
#include <cuda_fp16.h>
#include "common_cuda_helper.hpp"
#include "modulated_deform_conv_cuda_kernel.cuh"
#include "trt_cuda_helper.cuh"
#include "trt_plugin_helper.hpp"
template
<
typename
T
>
void
trt_modulated_deformable_im2col
(
const
T
*
data_im_
,
const
T
*
data_offset_
,
const
T
*
data_mask_
,
const
int
batch_size
,
const
int
channels
,
const
int
height_im
,
const
int
width_im
,
const
int
height_col
,
const
int
width_col
,
const
int
kernel_h
,
const
int
kenerl_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
deformable_group
,
T
*
data_col_
,
cudaStream_t
stream
)
{
// num_axes should be smaller than block size
const
int
channel_per_deformable_group
=
channels
/
deformable_group
;
const
int
num_kernels
=
channels
*
batch_size
*
height_col
*
width_col
;
modulated_deformable_im2col_gpu_kernel
<
T
>
<<<
GET_BLOCKS
(
num_kernels
),
THREADS_PER_BLOCK
,
0
,
stream
>>>
(
num_kernels
,
data_im_
,
data_offset_
,
data_mask_
,
height_im
,
width_im
,
kernel_h
,
kenerl_w
,
pad_h
,
pad_w
,
stride_h
,
stride_w
,
dilation_h
,
dilation_w
,
channel_per_deformable_group
,
batch_size
,
channels
,
deformable_group
,
height_col
,
width_col
,
data_col_
);
cudaCheckError
();
}
template
<
typename
scalar_t
>
__global__
void
output_add_bias_kernel
(
scalar_t
*
output
,
const
scalar_t
*
bias
,
size_t
step_batch
,
size_t
step_channel
,
size_t
n
)
{
CUDA_1D_KERNEL_LOOP
(
index
,
n
)
{
output
[
index
]
+=
bias
[(
index
%
step_batch
)
/
step_channel
];
}
}
template
<
typename
scalar_t
>
static
void
output_add_bias
(
scalar_t
*
output
,
const
scalar_t
*
bias
,
size_t
batch
,
size_t
channel
,
size_t
height
,
size_t
width
,
cudaStream_t
stream
)
{
size_t
step_channel
=
height
*
width
;
size_t
step_batch
=
step_channel
*
channel
;
size_t
n
=
step_batch
*
batch
;
output_add_bias_kernel
<<<
GET_BLOCKS
(
n
),
THREADS_PER_BLOCK
,
0
,
stream
>>>
(
output
,
bias
,
step_batch
,
step_channel
,
n
);
}
template
<
typename
scalar_t
>
void
ModulatedDeformConvForwardCUDAKernelLauncher
(
const
scalar_t
*
input
,
const
scalar_t
*
weight
,
const
scalar_t
*
bias
,
const
scalar_t
*
offset
,
const
scalar_t
*
mask
,
scalar_t
*
output
,
void
*
workspace
,
int
batch
,
int
channels
,
int
height
,
int
width
,
int
channels_out
,
int
kernel_w
,
int
kernel_h
,
int
stride_w
,
int
stride_h
,
int
pad_w
,
int
pad_h
,
int
dilation_w
,
int
dilation_h
,
int
group
,
int
deformable_group
,
int
im2col_step
,
cublasHandle_t
cublas_handle
,
cudaStream_t
stream
)
{
size_t
sizeof_dtype
=
sizeof
(
scalar_t
);
bool
with_bias
=
(
bias
!=
nullptr
);
im2col_step
=
std
::
min
(
int
(
batch
),
im2col_step
);
assert
(
batch
%
im2col_step
==
0
);
const
int
channels_kernel
=
channels
/
group
;
const
int
height_out
=
(
height
+
2
*
pad_h
-
(
dilation_h
*
(
kernel_h
-
1
)
+
1
))
/
stride_h
+
1
;
const
int
width_out
=
(
width
+
2
*
pad_w
-
(
dilation_w
*
(
kernel_w
-
1
)
+
1
))
/
stride_w
+
1
;
scalar_t
*
columns
=
(
scalar_t
*
)
workspace
;
const
size_t
input_step
=
channels
*
height
*
width
;
const
size_t
offset_step
=
deformable_group
*
kernel_h
*
kernel_w
*
2
*
height
*
width
;
const
size_t
mask_step
=
deformable_group
*
kernel_h
*
kernel_w
*
height
*
width
;
const
size_t
out_step
=
channels_out
*
height_out
*
width_out
;
const
size_t
out_group_step
=
out_step
/
group
;
const
size_t
col_g_step
=
channels
*
kernel_w
*
kernel_h
/
group
*
height_out
*
width_out
;
const
size_t
weight_g_step
=
channels_out
/
group
*
channels
/
group
*
kernel_h
*
kernel_w
;
const
int
m
=
channels_out
/
group
;
const
int
n
=
height_out
*
width_out
;
const
int
k
=
channels
/
group
*
kernel_h
*
kernel_w
;
scalar_t
alpha
=
1.
;
scalar_t
beta
=
0.
;
for
(
int
b
=
0
;
b
<
batch
;
b
++
)
{
const
scalar_t
*
input_start
=
input
+
b
*
input_step
;
const
scalar_t
*
offset_start
=
offset
+
b
*
offset_step
;
const
scalar_t
*
mask_start
=
mask
+
b
*
mask_step
;
trt_modulated_deformable_im2col
<
scalar_t
>
(
input_start
,
offset_start
,
mask_start
,
1
,
channels
,
height
,
width
,
height_out
,
width_out
,
kernel_h
,
kernel_w
,
pad_h
,
pad_w
,
stride_h
,
stride_w
,
dilation_h
,
dilation_w
,
deformable_group
,
columns
,
stream
);
for
(
int
g
=
0
;
g
<
group
;
g
++
)
{
const
scalar_t
*
weight_start
=
weight
+
g
*
weight_g_step
;
scalar_t
*
col_start
=
columns
+
g
*
col_g_step
;
scalar_t
*
out_buffer_start
=
output
+
b
*
out_step
+
g
*
out_group_step
;
// cudaMemsetAsync(out_buffer_start, 0, 1, stream);
cublasGemmWrap
<
scalar_t
>
(
cublas_handle
,
CUBLAS_OP_N
,
CUBLAS_OP_N
,
n
,
m
,
k
,
&
alpha
,
col_start
,
n
,
weight_start
,
k
,
&
beta
,
out_buffer_start
,
n
);
cudaCheckError
();
}
}
if
(
with_bias
)
{
output_add_bias
<
scalar_t
>
(
output
,
bias
,
batch
,
channels_out
,
height_out
,
width_out
,
stream
);
}
}
void
ModulatedDeformConvForwardCUDAKernelLauncher_float
(
const
float
*
input
,
const
float
*
weight
,
const
float
*
bias
,
const
float
*
offset
,
const
float
*
mask
,
float
*
output
,
void
*
workspace
,
int
batch
,
int
channels
,
int
height
,
int
width
,
int
channels_out
,
int
kernel_w
,
int
kernel_h
,
int
stride_w
,
int
stride_h
,
int
pad_w
,
int
pad_h
,
int
dilation_w
,
int
dilation_h
,
int
group
,
int
deformable_group
,
int
im2col_step
,
cublasHandle_t
cublas_handle
,
cudaStream_t
stream
)
{
ModulatedDeformConvForwardCUDAKernelLauncher
<
float
>
(
input
,
weight
,
bias
,
offset
,
mask
,
output
,
workspace
,
batch
,
channels
,
height
,
width
,
channels_out
,
kernel_w
,
kernel_h
,
stride_w
,
stride_h
,
pad_w
,
pad_h
,
dilation_w
,
dilation_h
,
group
,
deformable_group
,
im2col_step
,
cublas_handle
,
stream
);
}
mmcv/ops/csrc/tensorrt/plugins/trt_nms.cpp
deleted
100644 → 0
View file @
961373ad
// Copyright (c) OpenMMLab. All rights reserved
#include "trt_nms.hpp"
#include <assert.h>
#include <stdio.h>
#include <chrono>
#include "trt_serialize.hpp"
extern
size_t
get_onnxnms_workspace_size
(
size_t
num_batches
,
size_t
spatial_dimension
,
size_t
num_classes
,
size_t
boxes_word_size
,
int
center_point_box
,
size_t
output_length
);
extern
void
TRTNMSCUDAKernelLauncher_float
(
const
float
*
boxes
,
const
float
*
scores
,
const
int
max_output_boxes_per_class
,
const
float
iou_threshold
,
const
float
score_threshold
,
const
int
offset
,
int
*
output
,
int
center_point_box
,
int
num_batches
,
int
spatial_dimension
,
int
num_classes
,
size_t
output_length
,
void
*
workspace
,
cudaStream_t
stream
);
namespace
{
static
const
char
*
PLUGIN_VERSION
{
"1"
};
static
const
char
*
PLUGIN_NAME
{
"NonMaxSuppression"
};
}
// namespace
nvinfer1
::
PluginFieldCollection
NonMaxSuppressionDynamicCreator
::
mFC
{};
std
::
vector
<
nvinfer1
::
PluginField
>
NonMaxSuppressionDynamicCreator
::
mPluginAttributes
;
NonMaxSuppressionDynamic
::
NonMaxSuppressionDynamic
(
const
std
::
string
&
name
,
int
centerPointBox
,
int
maxOutputBoxesPerClass
,
float
iouThreshold
,
float
scoreThreshold
,
int
offset
)
:
mLayerName
(
name
),
mCenterPointBox
(
centerPointBox
),
mMaxOutputBoxesPerClass
(
maxOutputBoxesPerClass
),
mIouThreshold
(
iouThreshold
),
mScoreThreshold
(
scoreThreshold
),
mOffset
(
offset
)
{}
NonMaxSuppressionDynamic
::
NonMaxSuppressionDynamic
(
const
std
::
string
name
,
const
void
*
data
,
size_t
length
)
:
mLayerName
(
name
)
{
deserialize_value
(
&
data
,
&
length
,
&
mCenterPointBox
);
deserialize_value
(
&
data
,
&
length
,
&
mMaxOutputBoxesPerClass
);
deserialize_value
(
&
data
,
&
length
,
&
mIouThreshold
);
deserialize_value
(
&
data
,
&
length
,
&
mScoreThreshold
);
deserialize_value
(
&
data
,
&
length
,
&
mOffset
);
}
nvinfer1
::
IPluginV2DynamicExt
*
NonMaxSuppressionDynamic
::
clone
()
const
{
NonMaxSuppressionDynamic
*
plugin
=
new
NonMaxSuppressionDynamic
(
mLayerName
,
mCenterPointBox
,
mMaxOutputBoxesPerClass
,
mIouThreshold
,
mScoreThreshold
,
mOffset
);
plugin
->
setPluginNamespace
(
getPluginNamespace
());
return
plugin
;
}
nvinfer1
::
DimsExprs
NonMaxSuppressionDynamic
::
getOutputDimensions
(
int
outputIndex
,
const
nvinfer1
::
DimsExprs
*
inputs
,
int
nbInputs
,
nvinfer1
::
IExprBuilder
&
exprBuilder
)
{
nvinfer1
::
DimsExprs
ret
;
ret
.
nbDims
=
2
;
auto
num_batches
=
inputs
[
0
].
d
[
0
];
auto
spatial_dimension
=
inputs
[
0
].
d
[
1
];
if
(
mMaxOutputBoxesPerClass
>
0
)
{
spatial_dimension
=
exprBuilder
.
operation
(
nvinfer1
::
DimensionOperation
::
kMIN
,
*
spatial_dimension
,
*
exprBuilder
.
constant
(
mMaxOutputBoxesPerClass
));
}
auto
num_classes
=
inputs
[
1
].
d
[
1
];
ret
.
d
[
0
]
=
exprBuilder
.
operation
(
nvinfer1
::
DimensionOperation
::
kPROD
,
*
num_batches
,
*
exprBuilder
.
operation
(
nvinfer1
::
DimensionOperation
::
kPROD
,
*
spatial_dimension
,
*
num_classes
));
ret
.
d
[
1
]
=
exprBuilder
.
constant
(
3
);
return
ret
;
}
bool
NonMaxSuppressionDynamic
::
supportsFormatCombination
(
int
pos
,
const
nvinfer1
::
PluginTensorDesc
*
inOut
,
int
nbInputs
,
int
nbOutputs
)
{
if
(
pos
<
nbInputs
)
{
switch
(
pos
)
{
case
0
:
// boxes
return
inOut
[
pos
].
type
==
nvinfer1
::
DataType
::
kFLOAT
&&
inOut
[
pos
].
format
==
nvinfer1
::
TensorFormat
::
kLINEAR
;
case
1
:
// scores
return
inOut
[
pos
].
type
==
nvinfer1
::
DataType
::
kFLOAT
&&
inOut
[
pos
].
format
==
nvinfer1
::
TensorFormat
::
kLINEAR
;
default:
return
true
;
}
}
else
{
switch
(
pos
-
nbInputs
)
{
case
0
:
// selected_indices
return
inOut
[
pos
].
type
==
nvinfer1
::
DataType
::
kINT32
&&
inOut
[
pos
].
format
==
nvinfer1
::
TensorFormat
::
kLINEAR
;
default:
return
true
;
}
}
return
true
;
}
void
NonMaxSuppressionDynamic
::
configurePlugin
(
const
nvinfer1
::
DynamicPluginTensorDesc
*
inputs
,
int
nbInputs
,
const
nvinfer1
::
DynamicPluginTensorDesc
*
outputs
,
int
nbOutputs
)
{}
size_t
NonMaxSuppressionDynamic
::
getWorkspaceSize
(
const
nvinfer1
::
PluginTensorDesc
*
inputs
,
int
nbInputs
,
const
nvinfer1
::
PluginTensorDesc
*
outputs
,
int
nbOutputs
)
const
{
size_t
boxes_word_size
=
mmcv
::
getElementSize
(
inputs
[
0
].
type
);
size_t
num_batches
=
inputs
[
0
].
dims
.
d
[
0
];
size_t
spatial_dimension
=
inputs
[
0
].
dims
.
d
[
1
];
size_t
num_classes
=
inputs
[
1
].
dims
.
d
[
1
];
size_t
output_length
=
outputs
[
0
].
dims
.
d
[
0
];
return
get_onnxnms_workspace_size
(
num_batches
,
spatial_dimension
,
num_classes
,
boxes_word_size
,
mCenterPointBox
,
output_length
);
}
int
NonMaxSuppressionDynamic
::
enqueue
(
const
nvinfer1
::
PluginTensorDesc
*
inputDesc
,
const
nvinfer1
::
PluginTensorDesc
*
outputDesc
,
const
void
*
const
*
inputs
,
void
*
const
*
outputs
,
void
*
workSpace
,
cudaStream_t
stream
)
{
int
num_batches
=
inputDesc
[
0
].
dims
.
d
[
0
];
int
spatial_dimension
=
inputDesc
[
0
].
dims
.
d
[
1
];
int
num_classes
=
inputDesc
[
1
].
dims
.
d
[
1
];
int
output_length
=
outputDesc
[
0
].
dims
.
d
[
0
];
const
float
*
boxes
=
(
const
float
*
)
inputs
[
0
];
const
float
*
scores
=
(
const
float
*
)
inputs
[
1
];
int
*
output
=
(
int
*
)
outputs
[
0
];
TRTNMSCUDAKernelLauncher_float
(
boxes
,
scores
,
mMaxOutputBoxesPerClass
,
mIouThreshold
,
mScoreThreshold
,
mOffset
,
output
,
mCenterPointBox
,
num_batches
,
spatial_dimension
,
num_classes
,
output_length
,
workSpace
,
stream
);
return
0
;
}
nvinfer1
::
DataType
NonMaxSuppressionDynamic
::
getOutputDataType
(
int
index
,
const
nvinfer1
::
DataType
*
inputTypes
,
int
nbInputs
)
const
{
return
nvinfer1
::
DataType
::
kINT32
;
}
// IPluginV2 Methods
const
char
*
NonMaxSuppressionDynamic
::
getPluginType
()
const
{
return
PLUGIN_NAME
;
}
const
char
*
NonMaxSuppressionDynamic
::
getPluginVersion
()
const
{
return
PLUGIN_VERSION
;
}
int
NonMaxSuppressionDynamic
::
getNbOutputs
()
const
{
return
1
;
}
int
NonMaxSuppressionDynamic
::
initialize
()
{
return
0
;
}
void
NonMaxSuppressionDynamic
::
terminate
()
{}
size_t
NonMaxSuppressionDynamic
::
getSerializationSize
()
const
{
return
sizeof
(
mCenterPointBox
)
+
sizeof
(
mMaxOutputBoxesPerClass
)
+
sizeof
(
mIouThreshold
)
+
sizeof
(
mScoreThreshold
)
+
sizeof
(
mOffset
);
}
void
NonMaxSuppressionDynamic
::
serialize
(
void
*
buffer
)
const
{
serialize_value
(
&
buffer
,
mCenterPointBox
);
serialize_value
(
&
buffer
,
mMaxOutputBoxesPerClass
);
serialize_value
(
&
buffer
,
mIouThreshold
);
serialize_value
(
&
buffer
,
mScoreThreshold
);
serialize_value
(
&
buffer
,
mOffset
);
}
void
NonMaxSuppressionDynamic
::
destroy
()
{
// This gets called when the network containing plugin is destroyed
delete
this
;
}
void
NonMaxSuppressionDynamic
::
setPluginNamespace
(
const
char
*
libNamespace
)
{
mNamespace
=
libNamespace
;
}
const
char
*
NonMaxSuppressionDynamic
::
getPluginNamespace
()
const
{
return
mNamespace
.
c_str
();
}
////////////////////// creator /////////////////////////////
NonMaxSuppressionDynamicCreator
::
NonMaxSuppressionDynamicCreator
()
{
mPluginAttributes
.
clear
();
mPluginAttributes
.
emplace_back
(
nvinfer1
::
PluginField
(
"center_point_box"
));
mPluginAttributes
.
emplace_back
(
nvinfer1
::
PluginField
(
"max_output_boxes_per_class"
));
mPluginAttributes
.
emplace_back
(
nvinfer1
::
PluginField
(
"iou_threshold"
));
mPluginAttributes
.
emplace_back
(
nvinfer1
::
PluginField
(
"score_threshold"
));
mPluginAttributes
.
emplace_back
(
nvinfer1
::
PluginField
(
"offset"
));
mFC
.
nbFields
=
mPluginAttributes
.
size
();
mFC
.
fields
=
mPluginAttributes
.
data
();
}
const
char
*
NonMaxSuppressionDynamicCreator
::
getPluginName
()
const
{
return
PLUGIN_NAME
;
}
const
char
*
NonMaxSuppressionDynamicCreator
::
getPluginVersion
()
const
{
return
PLUGIN_VERSION
;
}
const
nvinfer1
::
PluginFieldCollection
*
NonMaxSuppressionDynamicCreator
::
getFieldNames
()
{
return
&
mFC
;
}
nvinfer1
::
IPluginV2
*
NonMaxSuppressionDynamicCreator
::
createPlugin
(
const
char
*
name
,
const
nvinfer1
::
PluginFieldCollection
*
fc
)
{
int
centerPointBox
=
0
;
int
maxOutputBoxesPerClass
=
0
;
float
iouThreshold
=
0.0
f
;
float
scoreThreshold
=
0.0
f
;
int
offset
=
0
;
for
(
int
i
=
0
;
i
<
fc
->
nbFields
;
i
++
)
{
if
(
fc
->
fields
[
i
].
data
==
nullptr
)
{
continue
;
}
std
::
string
field_name
(
fc
->
fields
[
i
].
name
);
if
(
field_name
.
compare
(
"center_point_box"
)
==
0
)
{
centerPointBox
=
static_cast
<
const
int
*>
(
fc
->
fields
[
i
].
data
)[
0
];
}
if
(
field_name
.
compare
(
"max_output_boxes_per_class"
)
==
0
)
{
maxOutputBoxesPerClass
=
static_cast
<
const
int
*>
(
fc
->
fields
[
i
].
data
)[
0
];
}
if
(
field_name
.
compare
(
"iou_threshold"
)
==
0
)
{
iouThreshold
=
static_cast
<
const
float
*>
(
fc
->
fields
[
i
].
data
)[
0
];
}
if
(
field_name
.
compare
(
"score_threshold"
)
==
0
)
{
scoreThreshold
=
static_cast
<
const
float
*>
(
fc
->
fields
[
i
].
data
)[
0
];
}
if
(
field_name
.
compare
(
"offset"
)
==
0
)
{
offset
=
static_cast
<
const
int
*>
(
fc
->
fields
[
i
].
data
)[
0
];
}
}
NonMaxSuppressionDynamic
*
plugin
=
new
NonMaxSuppressionDynamic
(
name
,
centerPointBox
,
maxOutputBoxesPerClass
,
iouThreshold
,
scoreThreshold
,
offset
);
plugin
->
setPluginNamespace
(
getPluginNamespace
());
return
plugin
;
}
nvinfer1
::
IPluginV2
*
NonMaxSuppressionDynamicCreator
::
deserializePlugin
(
const
char
*
name
,
const
void
*
serialData
,
size_t
serialLength
)
{
auto
plugin
=
new
NonMaxSuppressionDynamic
(
name
,
serialData
,
serialLength
);
plugin
->
setPluginNamespace
(
getPluginNamespace
());
return
plugin
;
}
void
NonMaxSuppressionDynamicCreator
::
setPluginNamespace
(
const
char
*
libNamespace
)
{
mNamespace
=
libNamespace
;
}
const
char
*
NonMaxSuppressionDynamicCreator
::
getPluginNamespace
()
const
{
return
mNamespace
.
c_str
();
}
mmcv/ops/csrc/tensorrt/plugins/trt_nms_kernel.cu
deleted
100644 → 0
View file @
961373ad
// Copyright (c) OpenMMLab. All rights reserved
#include <stdio.h>
#include <thrust/execution_policy.h>
#include <thrust/gather.h>
#include <thrust/sort.h>
#include <thrust/transform.h>
#include <chrono>
#include <thread>
#include <vector>
#include "common_cuda_helper.hpp"
#include "nms_cuda_kernel.cuh"
#include "trt_cuda_helper.cuh"
#include "trt_plugin_helper.hpp"
struct
NMSBox
{
float
box
[
4
];
};
struct
nms_centerwh2xyxy
{
__host__
__device__
NMSBox
operator
()(
const
NMSBox
box
)
{
NMSBox
out
;
out
.
box
[
0
]
=
box
.
box
[
0
]
-
box
.
box
[
2
]
/
2.0
f
;
out
.
box
[
1
]
=
box
.
box
[
1
]
-
box
.
box
[
3
]
/
2.0
f
;
out
.
box
[
2
]
=
box
.
box
[
0
]
+
box
.
box
[
2
]
/
2.0
f
;
out
.
box
[
3
]
=
box
.
box
[
1
]
+
box
.
box
[
3
]
/
2.0
f
;
return
out
;
}
};
struct
nms_sbox_idle
{
const
float
*
idle_box_
;
__host__
__device__
nms_sbox_idle
(
const
float
*
idle_box
)
{
idle_box_
=
idle_box
;
}
__host__
__device__
NMSBox
operator
()(
const
NMSBox
box
)
{
return
{
idle_box_
[
0
],
idle_box_
[
1
],
idle_box_
[
2
],
idle_box_
[
3
]};
}
};
struct
nms_score_threshold
{
float
score_threshold_
;
__host__
__device__
nms_score_threshold
(
const
float
score_threshold
)
{
score_threshold_
=
score_threshold
;
}
__host__
__device__
bool
operator
()(
const
float
score
)
{
return
score
<
score_threshold_
;
}
};
__global__
void
nms_reindex_kernel
(
int
n
,
int
*
output
,
int
*
index_cache
)
{
CUDA_1D_KERNEL_LOOP
(
index
,
n
)
{
const
int
old_index
=
output
[
index
*
3
+
2
];
output
[
index
*
3
+
2
]
=
index_cache
[
old_index
];
}
}
__global__
void
mask_to_output_kernel
(
const
unsigned
long
long
*
dev_mask
,
const
int
*
index
,
int
*
output
,
int
*
output_count
,
int
batch_id
,
int
cls_id
,
int
spatial_dimension
,
int
col_blocks
,
int
max_output_boxes_per_class
)
{
extern
__shared__
unsigned
long
long
remv
[];
// fill remv with 0
CUDA_1D_KERNEL_LOOP
(
i
,
col_blocks
)
{
remv
[
i
]
=
0
;
}
__syncthreads
();
int
start
=
*
output_count
;
int
out_per_class_count
=
0
;
for
(
int
i
=
0
;
i
<
spatial_dimension
;
i
++
)
{
const
int
nblock
=
i
/
threadsPerBlock
;
const
int
inblock
=
i
%
threadsPerBlock
;
if
(
!
(
remv
[
nblock
]
&
(
1ULL
<<
inblock
)))
{
if
(
threadIdx
.
x
==
0
)
{
output
[
start
*
3
+
0
]
=
batch_id
;
output
[
start
*
3
+
1
]
=
cls_id
;
output
[
start
*
3
+
2
]
=
index
[
i
];
start
+=
1
;
}
out_per_class_count
+=
1
;
if
(
out_per_class_count
>=
max_output_boxes_per_class
)
{
break
;
}
__syncthreads
();
// set every overlap box with bit 1 in remv
const
unsigned
long
long
*
p
=
dev_mask
+
i
*
col_blocks
;
CUDA_1D_KERNEL_LOOP
(
j
,
col_blocks
)
{
if
(
j
>=
nblock
)
{
remv
[
j
]
|=
p
[
j
];
}
}
// j
__syncthreads
();
}
}
// i
if
(
threadIdx
.
x
==
0
)
{
*
output_count
=
start
;
}
}
size_t
get_onnxnms_workspace_size
(
size_t
num_batches
,
size_t
spatial_dimension
,
size_t
num_classes
,
size_t
boxes_word_size
,
int
center_point_box
,
size_t
output_length
)
{
size_t
boxes_xyxy_workspace
=
0
;
if
(
center_point_box
==
1
)
{
boxes_xyxy_workspace
=
mmcv
::
getAlignedSize
(
num_batches
*
spatial_dimension
*
4
*
boxes_word_size
);
}
size_t
scores_workspace
=
mmcv
::
getAlignedSize
(
spatial_dimension
*
boxes_word_size
);
size_t
boxes_workspace
=
mmcv
::
getAlignedSize
(
spatial_dimension
*
4
*
boxes_word_size
);
const
int
col_blocks
=
(
spatial_dimension
+
threadsPerBlock
-
1
)
/
threadsPerBlock
;
size_t
mask_workspace
=
mmcv
::
getAlignedSize
(
spatial_dimension
*
col_blocks
*
sizeof
(
unsigned
long
long
));
size_t
index_template_workspace
=
mmcv
::
getAlignedSize
(
spatial_dimension
*
sizeof
(
int
));
size_t
index_workspace
=
mmcv
::
getAlignedSize
(
spatial_dimension
*
sizeof
(
int
));
size_t
count_workspace
=
mmcv
::
getAlignedSize
(
sizeof
(
int
));
return
scores_workspace
+
boxes_xyxy_workspace
+
boxes_workspace
+
mask_workspace
+
index_template_workspace
+
index_workspace
+
count_workspace
;
}
/**
* Launch the NonMaxSuppression kernel
*
* The NMS will be performed on each batch/class, share the kernel implement
* `nms_cuda`. For each batch/class, the `boxes_sorted` and `index_cache` will
* be sorted by scores, boxes_sorted will be used in `nms_cuda` kernel. After
* that, the output would be generated by `mask_to_output_kernel` with
* `dev_mask` and `sorted_cache`.
*
* @param[in] bboxes with shape [num_batch, spatial_dimension, 4], input boxes
* @param[in] scores with shape [num_batch, num_classes, spatial_dimension],
* input scores
* @param[in] max_output_boxes_per_class max output boxes per class
* @param[in] iou_threshold threshold of iou
* @param[in] score_threshold threshold of scores
* @param[in] offset box offset, only 0 or 1 is valid
* @param[out] output with shape [output_length, 3], each row contain index
* (batch_id, class_id, boxes_id), filling -1 if result is not valid.
* @param[in] center_point_box 0 if boxes is [left, top, right, bottom] 1 if
* boxes is [center_x, center_y, width, height]
* @param[in] num_batches batch size of boxes and scores
* @param[in] spatial_dimension boxes numbers each batch
* @param[in] num_classes class numbers
* @param[in] output_length the max output rows
* @param[in] workspace memory for all temporary variables.
* @param[in] stream cuda stream
*/
void
TRTNMSCUDAKernelLauncher_float
(
const
float
*
boxes
,
const
float
*
scores
,
const
int
max_output_boxes_per_class
,
const
float
iou_threshold
,
const
float
score_threshold
,
const
int
offset
,
int
*
output
,
int
center_point_box
,
int
num_batches
,
int
spatial_dimension
,
int
num_classes
,
size_t
output_length
,
void
*
workspace
,
cudaStream_t
stream
)
{
const
int
col_blocks
=
(
spatial_dimension
+
threadsPerBlock
-
1
)
/
threadsPerBlock
;
float
*
boxes_sorted
=
(
float
*
)
workspace
;
workspace
=
static_cast
<
char
*>
(
workspace
)
+
mmcv
::
getAlignedSize
(
spatial_dimension
*
4
*
sizeof
(
float
));
float
*
boxes_xyxy
=
nullptr
;
if
(
center_point_box
==
1
)
{
boxes_xyxy
=
(
float
*
)
workspace
;
workspace
=
static_cast
<
char
*>
(
workspace
)
+
mmcv
::
getAlignedSize
(
num_batches
*
spatial_dimension
*
4
*
sizeof
(
float
));
thrust
::
transform
(
thrust
::
cuda
::
par
.
on
(
stream
),
(
NMSBox
*
)
boxes
,
(
NMSBox
*
)(
boxes
+
num_batches
*
spatial_dimension
*
4
),
(
NMSBox
*
)
boxes_xyxy
,
nms_centerwh2xyxy
());
cudaCheckError
();
}
float
*
scores_sorted
=
(
float
*
)
workspace
;
workspace
=
static_cast
<
char
*>
(
workspace
)
+
mmcv
::
getAlignedSize
(
spatial_dimension
*
sizeof
(
float
));
unsigned
long
long
*
dev_mask
=
(
unsigned
long
long
*
)
workspace
;
workspace
=
static_cast
<
char
*>
(
workspace
)
+
mmcv
::
getAlignedSize
(
spatial_dimension
*
col_blocks
*
sizeof
(
unsigned
long
long
));
int
*
index_cache
=
(
int
*
)
workspace
;
workspace
=
static_cast
<
char
*>
(
workspace
)
+
mmcv
::
getAlignedSize
(
spatial_dimension
*
sizeof
(
int
));
// generate sequence [0,1,2,3,4 ....]
int
*
index_template
=
(
int
*
)
workspace
;
workspace
=
static_cast
<
char
*>
(
workspace
)
+
mmcv
::
getAlignedSize
(
spatial_dimension
*
sizeof
(
int
));
thrust
::
sequence
(
thrust
::
cuda
::
par
.
on
(
stream
),
index_template
,
index_template
+
spatial_dimension
,
0
);
int
max_output_boxes_per_class_cpu
=
max_output_boxes_per_class
;
if
(
max_output_boxes_per_class_cpu
<=
0
)
{
max_output_boxes_per_class_cpu
=
spatial_dimension
;
}
int
*
output_count
=
(
int
*
)
workspace
;
workspace
=
static_cast
<
char
*>
(
workspace
)
+
mmcv
::
getAlignedSize
(
sizeof
(
int
));
cudaMemsetAsync
(
output_count
,
0
,
sizeof
(
int
),
stream
);
// fill output with -1
thrust
::
fill
(
thrust
::
cuda
::
par
.
on
(
stream
),
output
,
output
+
output_length
*
3
,
-
1
);
cudaCheckError
();
dim3
blocks
(
col_blocks
,
col_blocks
);
dim3
threads
(
threadsPerBlock
);
for
(
int
batch_id
=
0
;
batch_id
<
num_batches
;
++
batch_id
)
{
for
(
int
cls_id
=
0
;
cls_id
<
num_classes
;
++
cls_id
)
{
const
int
batch_cls_id
=
batch_id
*
num_classes
+
cls_id
;
// sort boxes by score
cudaMemcpyAsync
(
scores_sorted
,
scores
+
batch_cls_id
*
spatial_dimension
,
spatial_dimension
*
sizeof
(
float
),
cudaMemcpyDeviceToDevice
,
stream
);
cudaCheckError
();
cudaMemcpyAsync
(
index_cache
,
index_template
,
spatial_dimension
*
sizeof
(
int
),
cudaMemcpyDeviceToDevice
,
stream
);
cudaCheckError
();
thrust
::
sort_by_key
(
thrust
::
cuda
::
par
.
on
(
stream
),
scores_sorted
,
scores_sorted
+
spatial_dimension
,
index_cache
,
thrust
::
greater
<
float
>
());
if
(
center_point_box
==
1
)
{
thrust
::
gather
(
thrust
::
cuda
::
par
.
on
(
stream
),
index_cache
,
index_cache
+
spatial_dimension
,
(
NMSBox
*
)(
boxes_xyxy
+
batch_id
*
spatial_dimension
*
4
),
(
NMSBox
*
)
boxes_sorted
);
}
else
{
thrust
::
gather
(
thrust
::
cuda
::
par
.
on
(
stream
),
index_cache
,
index_cache
+
spatial_dimension
,
(
NMSBox
*
)(
boxes
+
batch_id
*
spatial_dimension
*
4
),
(
NMSBox
*
)
boxes_sorted
);
}
cudaCheckError
();
if
(
score_threshold
>
0.0
f
)
{
thrust
::
transform_if
(
thrust
::
cuda
::
par
.
on
(
stream
),
(
NMSBox
*
)
boxes_sorted
,
(
NMSBox
*
)(
boxes_sorted
+
spatial_dimension
*
4
),
scores_sorted
,
(
NMSBox
*
)
boxes_sorted
,
nms_sbox_idle
(
boxes_sorted
),
nms_score_threshold
(
score_threshold
));
}
nms_cuda
<<<
blocks
,
threads
,
0
,
stream
>>>
(
spatial_dimension
,
iou_threshold
,
offset
,
boxes_sorted
,
dev_mask
);
// will be performed when dev_mask is full.
mask_to_output_kernel
<<<
1
,
threadsPerBlock
,
col_blocks
*
sizeof
(
unsigned
long
long
),
stream
>>>
(
dev_mask
,
index_cache
,
output
,
output_count
,
batch_id
,
cls_id
,
spatial_dimension
,
col_blocks
,
max_output_boxes_per_class_cpu
);
}
// cls_id
}
// batch_id
}
Prev
1
2
3
4
5
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment