Unverified Commit 5025ebed authored by PanZezhong1725's avatar PanZezhong1725 Committed by GitHub
Browse files

issue/213/fix 修复cuda conv,关闭cudnn执行时报错 (#320)

parents d417f967 e3b28d1b
#include "../../../devices/cuda/cuda_common.cuh"
#include "../../../devices/cuda/cuda_handle.cuh" #include "../../../devices/cuda/cuda_handle.cuh"
#include "conv_cuda.cuh" #include "conv_cuda.cuh"
#ifdef ENABLE_CUDNN_API
#define DESTROY_CUDNN_DESCRIPTOR(desc_ptr, destroy_func) \ #define DESTROY_CUDNN_DESCRIPTOR(desc_ptr, destroy_func) \
do { \ do { \
if (desc_ptr) { \ if (desc_ptr) { \
...@@ -427,3 +430,5 @@ infiniStatus_t Descriptor::calculate( ...@@ -427,3 +430,5 @@ infiniStatus_t Descriptor::calculate(
return INFINI_STATUS_SUCCESS; return INFINI_STATUS_SUCCESS;
} }
} // namespace op::conv::cuda } // namespace op::conv::cuda
#endif // ENABLE_CUDNN_API
#ifndef __CONV_CUDA_CUH__ #ifndef __CONV_CUDA_CUH__
#define __CONV_CUDA_CUH__ #define __CONV_CUDA_CUH__
#include "../../../devices/cuda/cuda_common.cuh"
#include "../conv.h" #include "../conv.h"
DESCRIPTOR(cuda) DESCRIPTOR(cuda)
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
#ifdef ENABLE_CPU_API #ifdef ENABLE_CPU_API
#include "cpu/conv_cpu.h" #include "cpu/conv_cpu.h"
#endif #endif
#ifdef ENABLE_CUDA_API #ifdef ENABLE_NVIDIA_API
#include "cuda/conv_cuda.cuh" #include "cuda/conv_cuda.cuh"
#endif #endif
...@@ -36,7 +36,7 @@ __C __export infiniStatus_t infiniopCreateConvDescriptor(infiniopHandle_t handle ...@@ -36,7 +36,7 @@ __C __export infiniStatus_t infiniopCreateConvDescriptor(infiniopHandle_t handle
#ifdef ENABLE_CPU_API #ifdef ENABLE_CPU_API
CREATE(INFINI_DEVICE_CPU, cpu); CREATE(INFINI_DEVICE_CPU, cpu);
#endif #endif
#ifdef ENABLE_CUDA_API #ifdef ENABLE_NVIDIA_API
CREATE(INFINI_DEVICE_NVIDIA, cuda); CREATE(INFINI_DEVICE_NVIDIA, cuda);
#endif #endif
default: default:
...@@ -60,7 +60,7 @@ infiniopGetConvWorkspaceSize( ...@@ -60,7 +60,7 @@ infiniopGetConvWorkspaceSize(
#ifdef ENABLE_CPU_API #ifdef ENABLE_CPU_API
GET(INFINI_DEVICE_CPU, cpu); GET(INFINI_DEVICE_CPU, cpu);
#endif #endif
#ifdef ENABLE_CUDA_API #ifdef ENABLE_NVIDIA_API
GET(INFINI_DEVICE_NVIDIA, cuda); GET(INFINI_DEVICE_NVIDIA, cuda);
#endif #endif
default: default:
...@@ -92,7 +92,7 @@ __C infiniStatus_t infiniopConv( ...@@ -92,7 +92,7 @@ __C infiniStatus_t infiniopConv(
#ifdef ENABLE_CPU_API #ifdef ENABLE_CPU_API
CALCULATE(INFINI_DEVICE_CPU, cpu); CALCULATE(INFINI_DEVICE_CPU, cpu);
#endif #endif
#ifdef ENABLE_CUDA_API #ifdef ENABLE_NVIDIA_API
CALCULATE(INFINI_DEVICE_NVIDIA, cuda); CALCULATE(INFINI_DEVICE_NVIDIA, cuda);
#endif #endif
...@@ -113,7 +113,7 @@ infiniopDestroyConvDescriptor(infiniopConvDescriptor_t desc) { ...@@ -113,7 +113,7 @@ infiniopDestroyConvDescriptor(infiniopConvDescriptor_t desc) {
#ifdef ENABLE_CPU_API #ifdef ENABLE_CPU_API
DELETE(INFINI_DEVICE_CPU, cpu); DELETE(INFINI_DEVICE_CPU, cpu);
#endif #endif
#ifdef ENABLE_CUDA_API #ifdef ENABLE_NVIDIA_API
DELETE(INFINI_DEVICE_NVIDIA, cuda); DELETE(INFINI_DEVICE_NVIDIA, cuda);
#endif #endif
default: default:
......
...@@ -42,7 +42,7 @@ _TEST_CASES = [ ...@@ -42,7 +42,7 @@ _TEST_CASES = [
), ),
( (
(1, 3, 4, 4), (1, 3, 4, 4),
(48, 16, 4, 1), (48, 16, 4, 1),
(2, 3, 3, 3), (2, 3, 3, 3),
(27, 9, 3, 1), (27, 9, 3, 1),
(1, 1), (1, 1),
...@@ -85,7 +85,7 @@ _TENSOR_DTYPES = [InfiniDtype.F16, InfiniDtype.F32, InfiniDtype.BF16] ...@@ -85,7 +85,7 @@ _TENSOR_DTYPES = [InfiniDtype.F16, InfiniDtype.F32, InfiniDtype.BF16]
# Tolerance map for different data types # Tolerance map for different data types
_TOLERANCE_MAP = { _TOLERANCE_MAP = {
InfiniDtype.F16: {"atol": 1e-3, "rtol": 1e-3}, InfiniDtype.F16: {"atol": 1e-3, "rtol": 1e-3},
InfiniDtype.F32: {"atol": 1e-6, "rtol": 1e-6}, InfiniDtype.F32: {"atol": 1e-5, "rtol": 1e-5},
InfiniDtype.BF16: {"atol": 1e-3, "rtol": 1e-2}, InfiniDtype.BF16: {"atol": 1e-3, "rtol": 1e-2},
} }
...@@ -94,14 +94,27 @@ PROFILE = False ...@@ -94,14 +94,27 @@ PROFILE = False
NUM_PRERUN = 10 NUM_PRERUN = 10
NUM_ITERATIONS = 1000 NUM_ITERATIONS = 1000
def conv(x, w, stride, padding, dilation, y_tensor, bias=None): def conv(x, w, stride, padding, dilation, y_tensor, bias=None):
match len(x.shape) - 2: match len(x.shape) - 2:
case 1: case 1:
y_tensor.copy_(F.conv1d(x, w, bias=bias, stride=stride, padding=padding, dilation=dilation)) y_tensor.copy_(
F.conv1d(
x, w, bias=bias, stride=stride, padding=padding, dilation=dilation
)
)
case 2: case 2:
y_tensor.copy_(F.conv2d(x, w, bias=bias, stride=stride, padding=padding, dilation=dilation)) y_tensor.copy_(
F.conv2d(
x, w, bias=bias, stride=stride, padding=padding, dilation=dilation
)
)
case 3: case 3:
y_tensor.copy_(F.conv3d(x, w, bias=bias, stride=stride, padding=padding, dilation=dilation)) y_tensor.copy_(
F.conv3d(
x, w, bias=bias, stride=stride, padding=padding, dilation=dilation
)
)
case _: case _:
print("Error: Pytorch -> Unsupported tensor dimension") print("Error: Pytorch -> Unsupported tensor dimension")
...@@ -163,12 +176,23 @@ def test( ...@@ -163,12 +176,23 @@ def test(
y_shape, y_stride = inferShapeStride(x_shape, w_shape, pads, strides, dilations) y_shape, y_stride = inferShapeStride(x_shape, w_shape, pads, strides, dilations)
y = TestTensor(y_shape, y_stride, dt=tensor_dtype, device=device) y = TestTensor(y_shape, y_stride, dt=tensor_dtype, device=device)
b = TestTensor((w.shape[0],), (1,), dt=tensor_dtype, device=device, scale=0.01) if w.shape[0] > 1 else None b = (
TestTensor((w.shape[0],), (1,), dt=tensor_dtype, device=device, scale=0.01)
if w.shape[0] > 1
else None
)
print( print(
f"Testing Conv on {InfiniDeviceNames[device]} with x_shape: {x_shape}, w_shape: {w_shape}, b_shape: {w_shape[0]}, pads: {pads}, strides: {strides}, dilations: {dilations}, x_stride: {x_stride} dtype:{tensor_dtype}" f"Testing Conv on {InfiniDeviceNames[device]} with x_shape: {x_shape}, w_shape: {w_shape}, b_shape: {w_shape[0]}, pads: {pads}, strides: {strides}, dilations: {dilations}, x_stride: {x_stride} dtype:{InfiniDtypeNames[tensor_dtype]}"
f"dtype:{InfiniDtypeNames[tensor_dtype]}" )
conv(
x.torch_tensor(),
w.torch_tensor(),
strides,
pads,
dilations,
y.torch_tensor(),
b.torch_tensor() if b is not None else None,
) )
conv(x.torch_tensor(), w.torch_tensor(), strides, pads, dilations, y.torch_tensor(), b.torch_tensor() if b is not None else None)
if sync is not None: if sync is not None:
sync() sync()
...@@ -196,7 +220,9 @@ def test( ...@@ -196,7 +220,9 @@ def test(
workspace_size = ctypes.c_uint64(0) workspace_size = ctypes.c_uint64(0)
check_error( check_error(
LIBINFINIOP.infiniopGetConvWorkspaceSize(descriptor, ctypes.byref(workspace_size)) LIBINFINIOP.infiniopGetConvWorkspaceSize(
descriptor, ctypes.byref(workspace_size)
)
) )
workspace = TestWorkspace(workspace_size.value, y.device) workspace = TestWorkspace(workspace_size.value, y.device)
...@@ -241,4 +267,3 @@ if __name__ == "__main__": ...@@ -241,4 +267,3 @@ if __name__ == "__main__":
test_operator(device, test, _TEST_CASES, _TENSOR_DTYPES) test_operator(device, test, _TEST_CASES, _TENSOR_DTYPES)
print("\033[92mTest passed!\033[0m") print("\033[92mTest passed!\033[0m")
...@@ -99,6 +99,8 @@ def test( ...@@ -99,6 +99,8 @@ def test(
ans = random_sample( ans = random_sample(
logits.torch_tensor(), random_val, topp, topk, voc, temperature logits.torch_tensor(), random_val, topp, topk, voc, temperature
).to(
torch.int32
) # 这个函数在device速度可能会很慢,可以通过data.to("cpu")方式加快计算过程 ) # 这个函数在device速度可能会很慢,可以通过data.to("cpu")方式加快计算过程
indices = TestTensor([], None, InfiniDtype.I32, device, mode="zeros") indices = TestTensor([], None, InfiniDtype.I32, device, mode="zeros")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment