Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
infinicore
Commits
c6a3e4c7
Unverified
Commit
c6a3e4c7
authored
Jul 10, 2025
by
PanZezhong1725
Committed by
GitHub
Jul 10, 2025
Browse files
Merge pull request #315 from YdrMaster/main
issue/314 修复 nvidia 上编译和测试问题
parents
f3a075b7
68107e5e
Changes
22
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
79 additions
and
73 deletions
+79
-73
src/infiniccl/cuda/infiniccl_cuda.h
src/infiniccl/cuda/infiniccl_cuda.h
+1
-1
src/infiniop/devices/cuda/cuda_common.cu
src/infiniop/devices/cuda/cuda_common.cu
+4
-0
src/infiniop/devices/cuda/cuda_common.cuh
src/infiniop/devices/cuda/cuda_common.cuh
+2
-0
src/infiniop/devices/cuda/cuda_handle.cuh
src/infiniop/devices/cuda/cuda_handle.cuh
+8
-1
src/infiniop/devices/cuda/cuda_kernel_common.cuh
src/infiniop/devices/cuda/cuda_kernel_common.cuh
+1
-1
src/infiniop/devices/handle.cc
src/infiniop/devices/handle.cc
+3
-3
src/infiniop/ops/add/operator.cc
src/infiniop/ops/add/operator.cc
+5
-5
src/infiniop/ops/causal_softmax/cuda/causal_softmax_kernel.cuh
...nfiniop/ops/causal_softmax/cuda/causal_softmax_kernel.cuh
+1
-1
src/infiniop/ops/causal_softmax/operator.cc
src/infiniop/ops/causal_softmax/operator.cc
+5
-5
src/infiniop/ops/clip/operator.cc
src/infiniop/ops/clip/operator.cc
+5
-5
src/infiniop/ops/gemm/operator.cc
src/infiniop/ops/gemm/operator.cc
+5
-5
src/infiniop/ops/mul/operator.cc
src/infiniop/ops/mul/operator.cc
+5
-5
src/infiniop/ops/random_sample/operator.cc
src/infiniop/ops/random_sample/operator.cc
+5
-5
src/infiniop/ops/rearrange/operator.cc
src/infiniop/ops/rearrange/operator.cc
+4
-4
src/infiniop/ops/rms_norm/operator.cc
src/infiniop/ops/rms_norm/operator.cc
+5
-5
src/infiniop/ops/rope/operator.cc
src/infiniop/ops/rope/operator.cc
+8
-15
src/infiniop/ops/sub/operator.cc
src/infiniop/ops/sub/operator.cc
+5
-5
src/infiniop/ops/swiglu/operator.cc
src/infiniop/ops/swiglu/operator.cc
+5
-5
src/infinirt/cuda/infinirt_cuda.cuh
src/infinirt/cuda/infinirt_cuda.cuh
+1
-1
test/infiniop/rope.py
test/infiniop/rope.py
+1
-1
No files found.
src/infiniccl/cuda/infiniccl_cuda.h
View file @
c6a3e4c7
...
...
@@ -4,7 +4,7 @@
#include "../infiniccl_impl.h"
// Windows does not support CUDA
#if defined(ENABLE_
CUD
A_API) && defined(ENABLE_CCL) && !defined(_WIN32)
#if defined(ENABLE_
NVIDI
A_API) && defined(ENABLE_CCL) && !defined(_WIN32)
INFINICCL_DEVICE_API_IMPL
(
cuda
)
#else
INFINICCL_DEVICE_API_NOOP
(
cuda
)
...
...
src/infiniop/devices/cuda/cuda_common.cu
View file @
c6a3e4c7
...
...
@@ -34,6 +34,7 @@ infiniStatus_t Handle::Internal::useCublas(cudaStream_t stream, const Fn<cublasH
return
INFINI_STATUS_SUCCESS
;
}
#ifdef ENABLE_CUDNN_API
infiniStatus_t
Handle
::
Internal
::
useCudnn
(
cudaStream_t
stream
,
const
Fn
<
cudnnHandle_t
>
&
f
)
const
{
auto
handle
=
dnn_handles
.
pop
();
if
(
!
handle
)
{
...
...
@@ -44,6 +45,7 @@ infiniStatus_t Handle::Internal::useCudnn(cudaStream_t stream, const Fn<cudnnHan
dnn_handles
.
push
(
std
::
move
(
*
handle
));
return
INFINI_STATUS_SUCCESS
;
}
#endif
int
Handle
::
Internal
::
warpSize
()
const
{
return
_warp_size
;
}
int
Handle
::
Internal
::
maxThreadsPerBlock
()
const
{
return
_max_threads_per_block
;
}
...
...
@@ -54,6 +56,7 @@ int Handle::Internal::gridSizeX() const { return _grid_size[0]; }
int
Handle
::
Internal
::
gridSizeY
()
const
{
return
_grid_size
[
1
];
}
int
Handle
::
Internal
::
gridSizeZ
()
const
{
return
_grid_size
[
2
];
}
#ifdef ENABLE_CUDNN_API
cudnnDataType_t
getCudnnDtype
(
infiniDtype_t
dt
)
{
switch
(
dt
)
{
case
INFINI_DTYPE_F16
:
...
...
@@ -78,6 +81,7 @@ cudnnDataType_t getCudnnDtype(infiniDtype_t dt) {
return
CUDNN_DATA_FLOAT
;
}
}
#endif
namespace
nvidia
{
...
...
src/infiniop/devices/cuda/cuda_common.cuh
View file @
c6a3e4c7
...
...
@@ -6,7 +6,9 @@
namespace
device
::
cuda
{
#ifdef ENABLE_CUDNN_API
cudnnDataType_t
getCudnnDtype
(
infiniDtype_t
dt
);
#endif
}
// namespace device::cuda
...
...
src/infiniop/devices/cuda/cuda_handle.cuh
View file @
c6a3e4c7
...
...
@@ -5,9 +5,12 @@
#include "../pool.h"
#include "cuda_handle.h"
#include <cublas_v2.h>
#include <cudnn.h>
#include <functional>
#ifdef ENABLE_CUDNN_API
#include <cudnn.h>
#endif
#define CHECK_CUBLAS(API) CHECK_INTERNAL(API, CUBLAS_STATUS_SUCCESS)
#define CHECK_CUDNN(API) CHECK_INTERNAL(API, CUDNN_STATUS_SUCCESS)
...
...
@@ -15,7 +18,9 @@ namespace device::cuda {
class
Handle
::
Internal
{
Pool
<
cublasHandle_t
>
blas_handles
;
#ifdef ENABLE_CUDNN_API
Pool
<
cudnnHandle_t
>
dnn_handles
;
#endif
int
_warp_size
,
_max_threads_per_block
,
...
...
@@ -29,7 +34,9 @@ public:
Internal
(
int
);
infiniStatus_t
useCublas
(
cudaStream_t
stream
,
const
Fn
<
cublasHandle_t
>
&
f
)
const
;
#ifdef ENABLE_CUDNN_API
infiniStatus_t
useCudnn
(
cudaStream_t
stream
,
const
Fn
<
cudnnHandle_t
>
&
f
)
const
;
#endif
int
warpSize
()
const
;
int
maxThreadsPerBlock
()
const
;
...
...
src/infiniop/devices/cuda/cuda_kernel_common.cuh
View file @
c6a3e4c7
...
...
@@ -45,7 +45,7 @@ indexToOffset(
}
}
// namespace device::cuda
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
#include <cuda_fp16.h>
__forceinline__
__device__
float
exp_
(
const
float
val
)
{
...
...
src/infiniop/devices/handle.cc
View file @
c6a3e4c7
...
...
@@ -5,7 +5,7 @@
#ifdef ENABLE_CPU_API
#include "cpu/cpu_handle.h"
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
#include "cuda/cuda_handle.h"
#endif
#ifdef ENABLE_CAMBRICON_API
...
...
@@ -41,7 +41,7 @@ __C infiniStatus_t infiniopCreateHandle(infiniopHandle_t *handle_ptr) {
#ifdef ENABLE_CPU_API
CREATE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
CREATE
(
INFINI_DEVICE_NVIDIA
,
cuda
::
nvidia
);
#endif
#ifdef ENABLE_CAMBRICON_API
...
...
@@ -78,7 +78,7 @@ __C infiniStatus_t infiniopDestroyHandle(infiniopHandle_t handle) {
#ifdef ENABLE_CPU_API
DELETE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
DELETE
(
INFINI_DEVICE_NVIDIA
,
cuda
::
nvidia
);
#endif
#ifdef ENABLE_CAMBRICON_API
...
...
src/infiniop/ops/add/operator.cc
View file @
c6a3e4c7
...
...
@@ -5,7 +5,7 @@
#ifdef ENABLE_CPU_API
#include "cpu/add_cpu.h"
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
#include "cuda/add_cuda.cuh"
#endif
...
...
@@ -30,7 +30,7 @@ __C infiniStatus_t infiniopCreateAddDescriptor(
#ifdef ENABLE_CPU_API
CREATE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
CREATE
(
INFINI_DEVICE_NVIDIA
,
cuda
);
#endif
...
...
@@ -52,7 +52,7 @@ __C infiniStatus_t infiniopGetAddWorkspaceSize(infiniopAddDescriptor_t desc, siz
#ifdef ENABLE_CPU_API
GET
(
INFINI_DEVICE_CPU
,
cpu
)
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
GET
(
INFINI_DEVICE_NVIDIA
,
cuda
)
#endif
default:
...
...
@@ -82,7 +82,7 @@ __C infiniStatus_t infiniopAdd(
#ifdef ENABLE_CPU_API
CALCULATE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
CALCULATE
(
INFINI_DEVICE_NVIDIA
,
cuda
);
#endif
...
...
@@ -106,7 +106,7 @@ infiniopDestroyAddDescriptor(infiniopAddDescriptor_t desc) {
#ifdef ENABLE_CPU_API
DELETE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
DELETE
(
INFINI_DEVICE_NVIDIA
,
cuda
);
#endif
...
...
src/infiniop/ops/causal_softmax/cuda/causal_softmax_kernel.cuh
View file @
c6a3e4c7
...
...
@@ -32,7 +32,7 @@ INFINIOP_CUDA_KERNEL causalSoftmax(
// 2 | * * * ... * * * |
// height: 3 col_id->
if
(
width
+
blockIdx
.
x
>=
threadIdx
.
x
+
height
)
{
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
y
[
col
]
=
exp_
(
x
[
col
]
-
max_
);
#else
y
[
col
]
=
exp
(
x
[
col
]
-
max_
);
...
...
src/infiniop/ops/causal_softmax/operator.cc
View file @
c6a3e4c7
...
...
@@ -5,7 +5,7 @@
#ifdef ENABLE_CPU_API
#include "cpu/causal_softmax_cpu.h"
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
#include "cuda/causal_softmax_cuda.cuh"
#endif
#ifdef ENABLE_METAX_API
...
...
@@ -33,7 +33,7 @@ __C infiniStatus_t infiniopCreateCausalSoftmaxDescriptor(
#ifdef ENABLE_CPU_API
CREATE
(
INFINI_DEVICE_CPU
,
cpu
)
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
CREATE
(
INFINI_DEVICE_NVIDIA
,
cuda
)
#endif
#ifdef ENABLE_METAX_API
...
...
@@ -73,7 +73,7 @@ __C infiniStatus_t infiniopGetCausalSoftmaxWorkspaceSize(infiniopCausalSoftmaxDe
#ifdef ENABLE_CPU_API
GET
(
INFINI_DEVICE_CPU
,
cpu
)
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
GET
(
INFINI_DEVICE_NVIDIA
,
cuda
)
#endif
#ifdef ENABLE_CAMBRICON_MLU
...
...
@@ -119,7 +119,7 @@ __C infiniStatus_t infiniopCausalSoftmax(
#ifdef ENABLE_CPU_API
CALCULATE
(
INFINI_DEVICE_CPU
,
cpu
)
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
CALCULATE
(
INFINI_DEVICE_NVIDIA
,
cuda
)
#endif
#ifdef ENABLE_METAX_API
...
...
@@ -159,7 +159,7 @@ __C infiniStatus_t infiniopDestroyCausalSoftmaxDescriptor(infiniopCausalSoftmaxD
#ifdef ENABLE_CPU_API
DESTROY
(
INFINI_DEVICE_CPU
,
cpu
)
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
DESTROY
(
INFINI_DEVICE_NVIDIA
,
cuda
)
#endif
#ifdef ENABLE_METAX_API
...
...
src/infiniop/ops/clip/operator.cc
View file @
c6a3e4c7
...
...
@@ -5,7 +5,7 @@
#ifdef ENABLE_CPU_API
#include "cpu/clip_cpu.h"
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
#include "cuda/clip_cuda.cuh"
#endif
...
...
@@ -30,7 +30,7 @@ __C infiniStatus_t infiniopCreateClipDescriptor(
#ifdef ENABLE_CPU_API
CREATE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
CREATE
(
INFINI_DEVICE_NVIDIA
,
cuda
);
#endif
...
...
@@ -52,7 +52,7 @@ __C infiniStatus_t infiniopGetClipWorkspaceSize(infiniopClipDescriptor_t desc, s
#ifdef ENABLE_CPU_API
GET
(
INFINI_DEVICE_CPU
,
cpu
)
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
GET
(
INFINI_DEVICE_NVIDIA
,
cuda
)
#endif
}
...
...
@@ -82,7 +82,7 @@ __C infiniStatus_t infiniopClip(
#ifdef ENABLE_CPU_API
CALCULATE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
CALCULATE
(
INFINI_DEVICE_NVIDIA
,
cuda
);
#endif
...
...
@@ -106,7 +106,7 @@ infiniopDestroyClipDescriptor(infiniopClipDescriptor_t desc) {
#ifdef ENABLE_CPU_API
DELETE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
DELETE
(
INFINI_DEVICE_NVIDIA
,
cuda
);
#endif
...
...
src/infiniop/ops/gemm/operator.cc
View file @
c6a3e4c7
...
...
@@ -5,7 +5,7 @@
#ifdef ENABLE_CPU_API
#include "cpu/gemm_cpu.h"
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
#include "cuda/gemm_cuda.cuh"
#endif
#ifdef ENABLE_CAMBRICON_API
...
...
@@ -45,7 +45,7 @@ __C infiniStatus_t infiniopCreateGemmDescriptor(
#ifdef ENABLE_CPU_API
CREATE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
CREATE
(
INFINI_DEVICE_NVIDIA
,
cuda
);
#endif
#ifdef ENABLE_CAMBRICON_API
...
...
@@ -87,7 +87,7 @@ infiniopGetGemmWorkspaceSize(
#ifdef ENABLE_CPU_API
GET
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
GET
(
INFINI_DEVICE_NVIDIA
,
cuda
);
#endif
#ifdef ENABLE_CAMBRICON_API
...
...
@@ -136,7 +136,7 @@ __C infiniStatus_t infiniopGemm(
#ifdef ENABLE_CPU_API
CALCULATE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
CALCULATE
(
INFINI_DEVICE_NVIDIA
,
cuda
);
#endif
#ifdef ENABLE_CAMBRICON_API
...
...
@@ -175,7 +175,7 @@ infiniopDestroyGemmDescriptor(infiniopGemmDescriptor_t desc) {
#ifdef ENABLE_CPU_API
DELETE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
DELETE
(
INFINI_DEVICE_NVIDIA
,
cuda
);
#endif
#ifdef ENABLE_CAMBRICON_API
...
...
src/infiniop/ops/mul/operator.cc
View file @
c6a3e4c7
...
...
@@ -6,7 +6,7 @@
#include "cpu/mul_cpu.h"
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
#include "cuda/mul_cuda.cuh"
#endif
...
...
@@ -31,7 +31,7 @@ __C infiniStatus_t infiniopCreateMulDescriptor(
#ifdef ENABLE_CPU_API
CREATE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
CREATE
(
INFINI_DEVICE_NVIDIA
,
cuda
);
#endif
...
...
@@ -53,7 +53,7 @@ __C infiniStatus_t infiniopGetMulWorkspaceSize(infiniopMulDescriptor_t desc, siz
#ifdef ENABLE_CPU_API
GET
(
INFINI_DEVICE_CPU
,
cpu
)
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
GET
(
INFINI_DEVICE_NVIDIA
,
cuda
)
#endif
default:
...
...
@@ -83,7 +83,7 @@ __C infiniStatus_t infiniopMul(
#ifdef ENABLE_CPU_API
CALCULATE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
CALCULATE
(
INFINI_DEVICE_NVIDIA
,
cuda
);
#endif
...
...
@@ -107,7 +107,7 @@ infiniopDestroyMulDescriptor(infiniopMulDescriptor_t desc) {
#ifdef ENABLE_CPU_API
DELETE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
DELETE
(
INFINI_DEVICE_NVIDIA
,
cuda
);
#endif
...
...
src/infiniop/ops/random_sample/operator.cc
View file @
c6a3e4c7
...
...
@@ -5,7 +5,7 @@
#ifdef ENABLE_CPU_API
#include "cpu/random_sample_cpu.h"
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
#include "cuda/random_sample_cuda.cuh"
#endif
#ifdef ENABLE_METAX_API
...
...
@@ -35,7 +35,7 @@ infiniopCreateRandomSampleDescriptor(
#ifdef ENABLE_CPU_API
CREATE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
CREATE
(
INFINI_DEVICE_NVIDIA
,
cuda
);
#endif
#ifdef ENABLE_METAX_API
...
...
@@ -68,7 +68,7 @@ __C infiniStatus_t infiniopGetRandomSampleWorkspaceSize(
#ifdef ENABLE_CPU_API
GET
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
GET
(
INFINI_DEVICE_NVIDIA
,
cuda
);
#endif
#ifdef ENABLE_METAX_API
...
...
@@ -111,7 +111,7 @@ __C infiniStatus_t infiniopRandomSample(
#ifdef ENABLE_CPU_API
CALCULATE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
CALCULATE
(
INFINI_DEVICE_NVIDIA
,
cuda
);
#endif
#ifdef ENABLE_METAX_API
...
...
@@ -141,7 +141,7 @@ __C infiniStatus_t infiniopDestroyRandomSampleDescriptor(
#ifdef ENABLE_CPU_API
DELETE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
DELETE
(
INFINI_DEVICE_NVIDIA
,
cuda
);
#endif
#ifdef ENABLE_METAX_API
...
...
src/infiniop/ops/rearrange/operator.cc
View file @
c6a3e4c7
...
...
@@ -9,7 +9,7 @@
#include "ascend/rearrange_ascend.h"
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
#include "cuda/rearrange_cuda.cuh"
#endif
#ifdef ENABLE_METAX_API
...
...
@@ -39,7 +39,7 @@ __C infiniStatus_t infiniopCreateRearrangeDescriptor(
CREATE
(
INFINI_DEVICE_ASCEND
,
ascend
);
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
CREATE
(
INFINI_DEVICE_NVIDIA
,
cuda
);
#endif
#ifdef ENABLE_METAX_API
...
...
@@ -72,7 +72,7 @@ __C infiniStatus_t infiniopRearrange(
CALCULATE
(
INFINI_DEVICE_ASCEND
,
ascend
);
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
CALCULATE
(
INFINI_DEVICE_NVIDIA
,
cuda
);
#endif
#ifdef ENABLE_METAX_API
...
...
@@ -103,7 +103,7 @@ __C infiniStatus_t infiniopDestroyRearrangeDescriptor(
DELETE
(
INFINI_DEVICE_ASCEND
,
ascend
);
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
DELETE
(
INFINI_DEVICE_NVIDIA
,
cuda
);
#endif
#ifdef ENABLE_METAX_API
...
...
src/infiniop/ops/rms_norm/operator.cc
View file @
c6a3e4c7
...
...
@@ -5,7 +5,7 @@
#ifdef ENABLE_CPU_API
#include "cpu/rms_norm_cpu.h"
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
#include "cuda/rms_norm_cuda.cuh"
#endif
#ifdef ENABLE_ASCEND_API
...
...
@@ -43,7 +43,7 @@ __C infiniStatus_t infiniopCreateRMSNormDescriptor(
#ifdef ENABLE_CPU_API
CREATE
(
INFINI_DEVICE_CPU
,
cpu
)
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
CREATE
(
INFINI_DEVICE_NVIDIA
,
cuda
)
#endif
#ifdef ENABLE_KUNLUN_API
...
...
@@ -81,7 +81,7 @@ __C infiniStatus_t infiniopGetRMSNormWorkspaceSize(infiniopRMSNormDescriptor_t d
#ifdef ENABLE_CPU_API
GET
(
INFINI_DEVICE_CPU
,
cpu
)
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
GET
(
INFINI_DEVICE_NVIDIA
,
cuda
)
#endif
#ifdef ENABLE_KUNLUN_API
...
...
@@ -120,7 +120,7 @@ __C infiniStatus_t infiniopRMSNorm(infiniopRMSNormDescriptor_t desc, void *works
#ifdef ENABLE_CPU_API
CALCULATE
(
INFINI_DEVICE_CPU
,
cpu
)
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
CALCULATE
(
INFINI_DEVICE_NVIDIA
,
cuda
)
#endif
#ifdef ENABLE_KUNLUN_API
...
...
@@ -158,7 +158,7 @@ __C infiniStatus_t infiniopDestroyRMSNormDescriptor(infiniopRMSNormDescriptor_t
#ifdef ENABLE_CPU_API
DESTROY
(
INFINI_DEVICE_CPU
,
cpu
)
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
DESTROY
(
INFINI_DEVICE_NVIDIA
,
cuda
)
#endif
#ifdef ENABLE_KUNLUN_API
...
...
src/infiniop/ops/rope/operator.cc
View file @
c6a3e4c7
...
...
@@ -5,7 +5,7 @@
#ifdef ENABLE_CPU_API
#include "cpu/rope_cpu.h"
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
#include "cuda/rope_cuda.cuh"
#endif
#ifdef ENABLE_ASCEND_API
...
...
@@ -39,12 +39,15 @@ __C infiniStatus_t infiniopCreateRoPEDescriptor(
#ifdef ENABLE_CPU_API
CREATE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
CREATE
(
INFINI_DEVICE_NVIDIA
,
cuda
);
#endif
#ifdef ENABLE_METAX_API
CREATE
(
INFINI_DEVICE_METAX
,
maca
);
#endif
#ifdef ENABLE_ASCEND_API
CREATE
(
INFINI_DEVICE_ASCEND
,
ascend
);
#endif
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
return
bangCreateRoPEDescriptor
((
BangHandle_t
)
handle
,
...
...
@@ -52,16 +55,6 @@ __C infiniStatus_t infiniopCreateRoPEDescriptor(
pos_ids
,
sin_table
,
cos_table
);
}
#endif
#ifdef ENABLE_ASCEND_API
CREATE
(
INFINI_DEVICE_ASCEND
,
ascend
);
#endif
#ifdef ENABLE_METAX_GPU
case
DevMetaxGpu
:
{
return
macaCreateRoPEDescriptor
((
MacaHandle_t
)
handle
,
(
RoPEMacaDescriptor_t
*
)
desc_ptr
,
t
,
pos_ids
,
sin_table
,
cos_table
);
}
#endif
#ifdef ENABLE_MTHREADS_GPU
case
DevMthreadsGpu
:
{
return
musaCreateRoPEDescriptor
((
MusaHandle_t
)
handle
,
...
...
@@ -87,7 +80,7 @@ __C infiniStatus_t infiniopGetRoPEWorkspaceSize(infiniopRoPEDescriptor_t desc,
#ifdef ENABLE_CPU_API
GET
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
GET
(
INFINI_DEVICE_NVIDIA
,
cuda
);
#endif
#ifdef ENABLE_METAX_API
...
...
@@ -138,7 +131,7 @@ __C infiniStatus_t infiniopRoPE(
#ifdef ENABLE_CPU_API
CALCULATE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
CALCULATE
(
INFINI_DEVICE_NVIDIA
,
cuda
);
#endif
#ifdef ENABLE_METAX_API
...
...
@@ -184,7 +177,7 @@ infiniopDestroyRoPEDescriptor(infiniopRoPEDescriptor_t desc) {
#ifdef ENABLE_CPU_API
DELETE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
DELETE
(
INFINI_DEVICE_NVIDIA
,
cuda
);
#endif
#ifdef ENABLE_METAX_API
...
...
src/infiniop/ops/sub/operator.cc
View file @
c6a3e4c7
...
...
@@ -5,7 +5,7 @@
#ifdef ENABLE_CPU_API
#include "cpu/sub_cpu.h"
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
#include "cuda/sub_cuda.cuh"
#endif
...
...
@@ -30,7 +30,7 @@ __C infiniStatus_t infiniopCreateSubDescriptor(
#ifdef ENABLE_CPU_API
CREATE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
CREATE
(
INFINI_DEVICE_NVIDIA
,
cuda
);
#endif
...
...
@@ -52,7 +52,7 @@ __C infiniStatus_t infiniopGetSubWorkspaceSize(infiniopSubDescriptor_t desc, siz
#ifdef ENABLE_CPU_API
GET
(
INFINI_DEVICE_CPU
,
cpu
)
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
GET
(
INFINI_DEVICE_NVIDIA
,
cuda
)
#endif
default:
...
...
@@ -82,7 +82,7 @@ __C infiniStatus_t infiniopSub(
#ifdef ENABLE_CPU_API
CALCULATE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
CALCULATE
(
INFINI_DEVICE_NVIDIA
,
cuda
);
#endif
...
...
@@ -106,7 +106,7 @@ infiniopDestroySubDescriptor(infiniopSubDescriptor_t desc) {
#ifdef ENABLE_CPU_API
DELETE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
DELETE
(
INFINI_DEVICE_NVIDIA
,
cuda
);
#endif
...
...
src/infiniop/ops/swiglu/operator.cc
View file @
c6a3e4c7
...
...
@@ -5,7 +5,7 @@
#ifdef ENABLE_CPU_API
#include "cpu/swiglu_cpu.h"
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
#include "cuda/swiglu_cuda.cuh"
#endif
#ifdef ENABLE_KUNLUN_API
...
...
@@ -39,7 +39,7 @@ __C infiniStatus_t infiniopCreateSwiGLUDescriptor(
#ifdef ENABLE_CPU_API
CREATE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
CREATE
(
INFINI_DEVICE_NVIDIA
,
cuda
);
#endif
#ifdef ENABLE_KUNLUN_API
...
...
@@ -89,7 +89,7 @@ __C infiniStatus_t infiniopGetSwiGLUWorkspaceSize(infiniopSwiGLUDescriptor_t des
#ifdef ENABLE_CPU_API
GET
(
INFINI_DEVICE_CPU
,
cpu
)
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
GET
(
INFINI_DEVICE_NVIDIA
,
cuda
)
#endif
#ifdef ENABLE_KUNLUN_API
...
...
@@ -142,7 +142,7 @@ __C infiniStatus_t infiniopSwiGLU(
#ifdef ENABLE_CPU_API
CALCULATE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
CALCULATE
(
INFINI_DEVICE_NVIDIA
,
cuda
);
#endif
#ifdef ENABLE_KUNLUN_API
...
...
@@ -188,7 +188,7 @@ infiniopDestroySwiGLUDescriptor(infiniopSwiGLUDescriptor_t desc) {
#ifdef ENABLE_CPU_API
DELETE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
DELETE
(
INFINI_DEVICE_NVIDIA
,
cuda
);
#endif
#ifdef ENABLE_KUNLUN_API
...
...
src/infinirt/cuda/infinirt_cuda.cuh
View file @
c6a3e4c7
...
...
@@ -3,7 +3,7 @@
#include "../infinirt_impl.h"
namespace
infinirt
::
cuda
{
#ifdef ENABLE_
CUD
A_API
#ifdef ENABLE_
NVIDI
A_API
INFINIRT_DEVICE_API_IMPL
#else
INFINIRT_DEVICE_API_NOOP
...
...
test/infiniop/rope.py
View file @
c6a3e4c7
...
...
@@ -94,7 +94,7 @@ def rotary_embedding(ans, t, sin, cos, device):
def
sin_cos_table
(
pos
,
dim
,
device
,
theta
,
dtype
):
assert
dim
%
2
==
0
,
"Embedding dimension must be even."
freqs
=
1.0
/
(
theta
**
(
torch
.
arange
(
0
,
dim
,
2
)[:
(
dim
//
2
)].
float
()
/
dim
))
angles
=
torch
.
outer
(
pos
,
freqs
)
angles
=
torch
.
outer
(
pos
.
cpu
()
,
freqs
)
return
(
TestTensor
.
from_torch
(
torch
.
sin
(
angles
),
dtype
,
device
),
TestTensor
.
from_torch
(
torch
.
cos
(
angles
),
dtype
,
device
),
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment