Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
infinicore
Commits
5025ebed
Unverified
Commit
5025ebed
authored
Jul 11, 2025
by
PanZezhong1725
Committed by
GitHub
Jul 11, 2025
Browse files
issue/213/fix 修复cuda conv,关闭cudnn执行时报错 (#320)
parents
d417f967
e3b28d1b
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
48 additions
and
17 deletions
+48
-17
src/infiniop/ops/conv/cuda/conv_cuda.cu
src/infiniop/ops/conv/cuda/conv_cuda.cu
+5
-0
src/infiniop/ops/conv/cuda/conv_cuda.cuh
src/infiniop/ops/conv/cuda/conv_cuda.cuh
+0
-1
src/infiniop/ops/conv/operator.cc
src/infiniop/ops/conv/operator.cc
+5
-5
test/infiniop/conv.py
test/infiniop/conv.py
+36
-11
test/infiniop/random_sample.py
test/infiniop/random_sample.py
+2
-0
No files found.
src/infiniop/ops/conv/cuda/conv_cuda.cu
View file @
5025ebed
#include "../../../devices/cuda/cuda_common.cuh"
#include "../../../devices/cuda/cuda_handle.cuh"
#include "../../../devices/cuda/cuda_handle.cuh"
#include "conv_cuda.cuh"
#include "conv_cuda.cuh"
#ifdef ENABLE_CUDNN_API
#define DESTROY_CUDNN_DESCRIPTOR(desc_ptr, destroy_func) \
#define DESTROY_CUDNN_DESCRIPTOR(desc_ptr, destroy_func) \
do { \
do { \
if (desc_ptr) { \
if (desc_ptr) { \
...
@@ -427,3 +430,5 @@ infiniStatus_t Descriptor::calculate(
...
@@ -427,3 +430,5 @@ infiniStatus_t Descriptor::calculate(
return
INFINI_STATUS_SUCCESS
;
return
INFINI_STATUS_SUCCESS
;
}
}
}
// namespace op::conv::cuda
}
// namespace op::conv::cuda
#endif // ENABLE_CUDNN_API
src/infiniop/ops/conv/cuda/conv_cuda.cuh
View file @
5025ebed
#ifndef __CONV_CUDA_CUH__
#ifndef __CONV_CUDA_CUH__
#define __CONV_CUDA_CUH__
#define __CONV_CUDA_CUH__
#include "../../../devices/cuda/cuda_common.cuh"
#include "../conv.h"
#include "../conv.h"
DESCRIPTOR
(
cuda
)
DESCRIPTOR
(
cuda
)
...
...
src/infiniop/ops/conv/operator.cc
View file @
5025ebed
...
@@ -5,7 +5,7 @@
...
@@ -5,7 +5,7 @@
#ifdef ENABLE_CPU_API
#ifdef ENABLE_CPU_API
#include "cpu/conv_cpu.h"
#include "cpu/conv_cpu.h"
#endif
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
#include "cuda/conv_cuda.cuh"
#include "cuda/conv_cuda.cuh"
#endif
#endif
...
@@ -36,7 +36,7 @@ __C __export infiniStatus_t infiniopCreateConvDescriptor(infiniopHandle_t handle
...
@@ -36,7 +36,7 @@ __C __export infiniStatus_t infiniopCreateConvDescriptor(infiniopHandle_t handle
#ifdef ENABLE_CPU_API
#ifdef ENABLE_CPU_API
CREATE
(
INFINI_DEVICE_CPU
,
cpu
);
CREATE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
CREATE
(
INFINI_DEVICE_NVIDIA
,
cuda
);
CREATE
(
INFINI_DEVICE_NVIDIA
,
cuda
);
#endif
#endif
default:
default:
...
@@ -60,7 +60,7 @@ infiniopGetConvWorkspaceSize(
...
@@ -60,7 +60,7 @@ infiniopGetConvWorkspaceSize(
#ifdef ENABLE_CPU_API
#ifdef ENABLE_CPU_API
GET
(
INFINI_DEVICE_CPU
,
cpu
);
GET
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
GET
(
INFINI_DEVICE_NVIDIA
,
cuda
);
GET
(
INFINI_DEVICE_NVIDIA
,
cuda
);
#endif
#endif
default:
default:
...
@@ -92,7 +92,7 @@ __C infiniStatus_t infiniopConv(
...
@@ -92,7 +92,7 @@ __C infiniStatus_t infiniopConv(
#ifdef ENABLE_CPU_API
#ifdef ENABLE_CPU_API
CALCULATE
(
INFINI_DEVICE_CPU
,
cpu
);
CALCULATE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
CALCULATE
(
INFINI_DEVICE_NVIDIA
,
cuda
);
CALCULATE
(
INFINI_DEVICE_NVIDIA
,
cuda
);
#endif
#endif
...
@@ -113,7 +113,7 @@ infiniopDestroyConvDescriptor(infiniopConvDescriptor_t desc) {
...
@@ -113,7 +113,7 @@ infiniopDestroyConvDescriptor(infiniopConvDescriptor_t desc) {
#ifdef ENABLE_CPU_API
#ifdef ENABLE_CPU_API
DELETE
(
INFINI_DEVICE_CPU
,
cpu
);
DELETE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
DELETE
(
INFINI_DEVICE_NVIDIA
,
cuda
);
DELETE
(
INFINI_DEVICE_NVIDIA
,
cuda
);
#endif
#endif
default:
default:
...
...
test/infiniop/conv.py
View file @
5025ebed
...
@@ -42,7 +42,7 @@ _TEST_CASES = [
...
@@ -42,7 +42,7 @@ _TEST_CASES = [
),
),
(
(
(
1
,
3
,
4
,
4
),
(
1
,
3
,
4
,
4
),
(
48
,
16
,
4
,
1
),
(
48
,
16
,
4
,
1
),
(
2
,
3
,
3
,
3
),
(
2
,
3
,
3
,
3
),
(
27
,
9
,
3
,
1
),
(
27
,
9
,
3
,
1
),
(
1
,
1
),
(
1
,
1
),
...
@@ -85,7 +85,7 @@ _TENSOR_DTYPES = [InfiniDtype.F16, InfiniDtype.F32, InfiniDtype.BF16]
...
@@ -85,7 +85,7 @@ _TENSOR_DTYPES = [InfiniDtype.F16, InfiniDtype.F32, InfiniDtype.BF16]
# Tolerance map for different data types
# Tolerance map for different data types
_TOLERANCE_MAP
=
{
_TOLERANCE_MAP
=
{
InfiniDtype
.
F16
:
{
"atol"
:
1e-3
,
"rtol"
:
1e-3
},
InfiniDtype
.
F16
:
{
"atol"
:
1e-3
,
"rtol"
:
1e-3
},
InfiniDtype
.
F32
:
{
"atol"
:
1e-
6
,
"rtol"
:
1e-
6
},
InfiniDtype
.
F32
:
{
"atol"
:
1e-
5
,
"rtol"
:
1e-
5
},
InfiniDtype
.
BF16
:
{
"atol"
:
1e-3
,
"rtol"
:
1e-2
},
InfiniDtype
.
BF16
:
{
"atol"
:
1e-3
,
"rtol"
:
1e-2
},
}
}
...
@@ -94,14 +94,27 @@ PROFILE = False
...
@@ -94,14 +94,27 @@ PROFILE = False
NUM_PRERUN
=
10
NUM_PRERUN
=
10
NUM_ITERATIONS
=
1000
NUM_ITERATIONS
=
1000
def
conv
(
x
,
w
,
stride
,
padding
,
dilation
,
y_tensor
,
bias
=
None
):
def
conv
(
x
,
w
,
stride
,
padding
,
dilation
,
y_tensor
,
bias
=
None
):
match
len
(
x
.
shape
)
-
2
:
match
len
(
x
.
shape
)
-
2
:
case
1
:
case
1
:
y_tensor
.
copy_
(
F
.
conv1d
(
x
,
w
,
bias
=
bias
,
stride
=
stride
,
padding
=
padding
,
dilation
=
dilation
))
y_tensor
.
copy_
(
F
.
conv1d
(
x
,
w
,
bias
=
bias
,
stride
=
stride
,
padding
=
padding
,
dilation
=
dilation
)
)
case
2
:
case
2
:
y_tensor
.
copy_
(
F
.
conv2d
(
x
,
w
,
bias
=
bias
,
stride
=
stride
,
padding
=
padding
,
dilation
=
dilation
))
y_tensor
.
copy_
(
F
.
conv2d
(
x
,
w
,
bias
=
bias
,
stride
=
stride
,
padding
=
padding
,
dilation
=
dilation
)
)
case
3
:
case
3
:
y_tensor
.
copy_
(
F
.
conv3d
(
x
,
w
,
bias
=
bias
,
stride
=
stride
,
padding
=
padding
,
dilation
=
dilation
))
y_tensor
.
copy_
(
F
.
conv3d
(
x
,
w
,
bias
=
bias
,
stride
=
stride
,
padding
=
padding
,
dilation
=
dilation
)
)
case
_
:
case
_
:
print
(
"Error: Pytorch -> Unsupported tensor dimension"
)
print
(
"Error: Pytorch -> Unsupported tensor dimension"
)
...
@@ -163,12 +176,23 @@ def test(
...
@@ -163,12 +176,23 @@ def test(
y_shape
,
y_stride
=
inferShapeStride
(
x_shape
,
w_shape
,
pads
,
strides
,
dilations
)
y_shape
,
y_stride
=
inferShapeStride
(
x_shape
,
w_shape
,
pads
,
strides
,
dilations
)
y
=
TestTensor
(
y_shape
,
y_stride
,
dt
=
tensor_dtype
,
device
=
device
)
y
=
TestTensor
(
y_shape
,
y_stride
,
dt
=
tensor_dtype
,
device
=
device
)
b
=
TestTensor
((
w
.
shape
[
0
],),
(
1
,),
dt
=
tensor_dtype
,
device
=
device
,
scale
=
0.01
)
if
w
.
shape
[
0
]
>
1
else
None
b
=
(
TestTensor
((
w
.
shape
[
0
],),
(
1
,),
dt
=
tensor_dtype
,
device
=
device
,
scale
=
0.01
)
if
w
.
shape
[
0
]
>
1
else
None
)
print
(
print
(
f
"Testing Conv on
{
InfiniDeviceNames
[
device
]
}
with x_shape:
{
x_shape
}
, w_shape:
{
w_shape
}
, b_shape:
{
w_shape
[
0
]
}
, pads:
{
pads
}
, strides:
{
strides
}
, dilations:
{
dilations
}
, x_stride:
{
x_stride
}
dtype:
{
tensor_dtype
}
"
f
"Testing Conv on
{
InfiniDeviceNames
[
device
]
}
with x_shape:
{
x_shape
}
, w_shape:
{
w_shape
}
, b_shape:
{
w_shape
[
0
]
}
, pads:
{
pads
}
, strides:
{
strides
}
, dilations:
{
dilations
}
, x_stride:
{
x_stride
}
dtype:
{
InfiniDtypeNames
[
tensor_dtype
]
}
"
f
"dtype:
{
InfiniDtypeNames
[
tensor_dtype
]
}
"
)
conv
(
x
.
torch_tensor
(),
w
.
torch_tensor
(),
strides
,
pads
,
dilations
,
y
.
torch_tensor
(),
b
.
torch_tensor
()
if
b
is
not
None
else
None
,
)
)
conv
(
x
.
torch_tensor
(),
w
.
torch_tensor
(),
strides
,
pads
,
dilations
,
y
.
torch_tensor
(),
b
.
torch_tensor
()
if
b
is
not
None
else
None
)
if
sync
is
not
None
:
if
sync
is
not
None
:
sync
()
sync
()
...
@@ -196,7 +220,9 @@ def test(
...
@@ -196,7 +220,9 @@ def test(
workspace_size
=
ctypes
.
c_uint64
(
0
)
workspace_size
=
ctypes
.
c_uint64
(
0
)
check_error
(
check_error
(
LIBINFINIOP
.
infiniopGetConvWorkspaceSize
(
descriptor
,
ctypes
.
byref
(
workspace_size
))
LIBINFINIOP
.
infiniopGetConvWorkspaceSize
(
descriptor
,
ctypes
.
byref
(
workspace_size
)
)
)
)
workspace
=
TestWorkspace
(
workspace_size
.
value
,
y
.
device
)
workspace
=
TestWorkspace
(
workspace_size
.
value
,
y
.
device
)
...
@@ -241,4 +267,3 @@ if __name__ == "__main__":
...
@@ -241,4 +267,3 @@ if __name__ == "__main__":
test_operator
(
device
,
test
,
_TEST_CASES
,
_TENSOR_DTYPES
)
test_operator
(
device
,
test
,
_TEST_CASES
,
_TENSOR_DTYPES
)
print
(
"
\033
[92mTest passed!
\033
[0m"
)
print
(
"
\033
[92mTest passed!
\033
[0m"
)
test/infiniop/random_sample.py
View file @
5025ebed
...
@@ -99,6 +99,8 @@ def test(
...
@@ -99,6 +99,8 @@ def test(
ans
=
random_sample
(
ans
=
random_sample
(
logits
.
torch_tensor
(),
random_val
,
topp
,
topk
,
voc
,
temperature
logits
.
torch_tensor
(),
random_val
,
topp
,
topk
,
voc
,
temperature
).
to
(
torch
.
int32
)
# 这个函数在device速度可能会很慢,可以通过data.to("cpu")方式加快计算过程
)
# 这个函数在device速度可能会很慢,可以通过data.to("cpu")方式加快计算过程
indices
=
TestTensor
([],
None
,
InfiniDtype
.
I32
,
device
,
mode
=
"zeros"
)
indices
=
TestTensor
([],
None
,
InfiniDtype
.
I32
,
device
,
mode
=
"zeros"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment