Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
infinicore
Commits
ceb57c2a
Commit
ceb57c2a
authored
Jul 14, 2025
by
YdrMaster
Browse files
issue/291/style: 根据实际情况将 cuda 改为 nvidia
Signed-off-by:
YdrMaster
<
ydrml@hotmail.com
>
parent
d76a2607
Changes
49
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
62 additions
and
62 deletions
+62
-62
src/infiniop/ops/conv/nvidia/conv_nvidia.cuh
src/infiniop/ops/conv/nvidia/conv_nvidia.cuh
+1
-1
src/infiniop/ops/conv/operator.cc
src/infiniop/ops/conv/operator.cc
+5
-5
src/infiniop/ops/gemm/nvidia/gemm_nvidia.cu
src/infiniop/ops/gemm/nvidia/gemm_nvidia.cu
+6
-6
src/infiniop/ops/gemm/nvidia/gemm_nvidia.cuh
src/infiniop/ops/gemm/nvidia/gemm_nvidia.cuh
+1
-1
src/infiniop/ops/gemm/operator.cc
src/infiniop/ops/gemm/operator.cc
+5
-5
src/infiniop/ops/mul/cpu/mul_cpu.h
src/infiniop/ops/mul/cpu/mul_cpu.h
+1
-1
src/infiniop/ops/mul/metax/mul_metax.h
src/infiniop/ops/mul/metax/mul_metax.h
+1
-1
src/infiniop/ops/mul/nvidia/mul_nvidia.cu
src/infiniop/ops/mul/nvidia/mul_nvidia.cu
+2
-2
src/infiniop/ops/mul/nvidia/mul_nvidia.cuh
src/infiniop/ops/mul/nvidia/mul_nvidia.cuh
+2
-2
src/infiniop/ops/random_sample/nvidia/random_sample_kernel.cuh
...nfiniop/ops/random_sample/nvidia/random_sample_kernel.cuh
+3
-3
src/infiniop/ops/random_sample/nvidia/random_sample_nvidia.cu
...infiniop/ops/random_sample/nvidia/random_sample_nvidia.cu
+6
-6
src/infiniop/ops/random_sample/nvidia/random_sample_nvidia.cuh
...nfiniop/ops/random_sample/nvidia/random_sample_nvidia.cuh
+1
-1
src/infiniop/ops/random_sample/operator.cc
src/infiniop/ops/random_sample/operator.cc
+5
-5
src/infiniop/ops/rearrange/nvidia/rearrange_kernel.cuh
src/infiniop/ops/rearrange/nvidia/rearrange_kernel.cuh
+1
-1
src/infiniop/ops/rearrange/nvidia/rearrange_nvidia.cu
src/infiniop/ops/rearrange/nvidia/rearrange_nvidia.cu
+7
-7
src/infiniop/ops/rearrange/nvidia/rearrange_nvidia.cuh
src/infiniop/ops/rearrange/nvidia/rearrange_nvidia.cuh
+1
-1
src/infiniop/ops/rearrange/operator.cc
src/infiniop/ops/rearrange/operator.cc
+4
-4
src/infiniop/ops/relu/cpu/relu_cpu.h
src/infiniop/ops/relu/cpu/relu_cpu.h
+1
-1
src/infiniop/ops/rms_norm/nvidia/rms_norm_nvidia.cu
src/infiniop/ops/rms_norm/nvidia/rms_norm_nvidia.cu
+4
-4
src/infiniop/ops/rope/nvidia/rope_nvidia.cu
src/infiniop/ops/rope/nvidia/rope_nvidia.cu
+5
-5
No files found.
src/infiniop/ops/conv/
cud
a/conv_
cud
a.cuh
→
src/infiniop/ops/conv/
nvidi
a/conv_
nvidi
a.cuh
View file @
ceb57c2a
...
...
@@ -3,6 +3,6 @@
#include "../conv.h"
DESCRIPTOR
(
cud
a
)
DESCRIPTOR
(
nvidi
a
)
#endif // __GEMM_CUDA_CUH__
src/infiniop/ops/conv/operator.cc
View file @
ceb57c2a
...
...
@@ -6,7 +6,7 @@
#include "cpu/conv_cpu.h"
#endif
#ifdef ENABLE_NVIDIA_API
#include "
cud
a/conv_
cud
a.cuh"
#include "
nvidi
a/conv_
nvidi
a.cuh"
#endif
__C
__export
infiniStatus_t
infiniopCreateConvDescriptor
(
infiniopHandle_t
handle
,
...
...
@@ -37,7 +37,7 @@ __C __export infiniStatus_t infiniopCreateConvDescriptor(infiniopHandle_t handle
CREATE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_NVIDIA_API
CREATE
(
INFINI_DEVICE_NVIDIA
,
cud
a
);
CREATE
(
INFINI_DEVICE_NVIDIA
,
nvidi
a
);
#endif
default:
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
...
...
@@ -61,7 +61,7 @@ infiniopGetConvWorkspaceSize(
GET
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_NVIDIA_API
GET
(
INFINI_DEVICE_NVIDIA
,
cud
a
);
GET
(
INFINI_DEVICE_NVIDIA
,
nvidi
a
);
#endif
default:
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
...
...
@@ -93,7 +93,7 @@ __C infiniStatus_t infiniopConv(
CALCULATE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_NVIDIA_API
CALCULATE
(
INFINI_DEVICE_NVIDIA
,
cud
a
);
CALCULATE
(
INFINI_DEVICE_NVIDIA
,
nvidi
a
);
#endif
default:
...
...
@@ -114,7 +114,7 @@ infiniopDestroyConvDescriptor(infiniopConvDescriptor_t desc) {
DELETE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_NVIDIA_API
DELETE
(
INFINI_DEVICE_NVIDIA
,
cud
a
);
DELETE
(
INFINI_DEVICE_NVIDIA
,
nvidi
a
);
#endif
default:
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
...
...
src/infiniop/ops/gemm/
cud
a/gemm_
cud
a.cu
→
src/infiniop/ops/gemm/
nvidi
a/gemm_
nvidi
a.cu
View file @
ceb57c2a
#include "../../../devices/
cuda/cud
a_handle.cuh"
#include "gemm_
cud
a.cuh"
#include "../../../devices/
nvidia/nvidi
a_handle.cuh"
#include "gemm_
nvidi
a.cuh"
namespace
op
::
gemm
::
cud
a
{
namespace
op
::
gemm
::
nvidi
a
{
struct
Descriptor
::
Opaque
{
std
::
shared_ptr
<
device
::
cud
a
::
Handle
::
Internal
>
internal
;
std
::
shared_ptr
<
device
::
nvidi
a
::
Handle
::
Internal
>
internal
;
};
Descriptor
::~
Descriptor
()
{
...
...
@@ -17,7 +17,7 @@ infiniStatus_t Descriptor::create(
infiniopTensorDescriptor_t
c_desc
,
infiniopTensorDescriptor_t
a_desc
,
infiniopTensorDescriptor_t
b_desc
)
{
auto
handle
=
reinterpret_cast
<
device
::
cuda
::
nvidia
::
Handle
*>
(
handle_
);
auto
handle
=
reinterpret_cast
<
device
::
nvidia
::
Handle
*>
(
handle_
);
auto
dtype
=
c_desc
->
dtype
();
CHECK_DTYPE
(
dtype
,
INFINI_DTYPE_F16
,
INFINI_DTYPE_F32
,
INFINI_DTYPE_BF16
);
...
...
@@ -121,4 +121,4 @@ infiniStatus_t Descriptor::calculate(
return
INFINI_STATUS_SUCCESS
;
}
}
// namespace op::gemm::
cud
a
}
// namespace op::gemm::
nvidi
a
src/infiniop/ops/gemm/
cud
a/gemm_
cud
a.cuh
→
src/infiniop/ops/gemm/
nvidi
a/gemm_
nvidi
a.cuh
View file @
ceb57c2a
...
...
@@ -3,6 +3,6 @@
#include "../gemm.h"
DESCRIPTOR
(
cud
a
)
DESCRIPTOR
(
nvidi
a
)
#endif // __GEMM_CUDA_CUH__
src/infiniop/ops/gemm/operator.cc
View file @
ceb57c2a
...
...
@@ -6,7 +6,7 @@
#include "cpu/gemm_cpu.h"
#endif
#ifdef ENABLE_NVIDIA_API
#include "
cud
a/gemm_
cud
a.cuh"
#include "
nvidi
a/gemm_
nvidi
a.cuh"
#endif
#ifdef ENABLE_CAMBRICON_API
#include "bang/gemm_bang.h"
...
...
@@ -46,7 +46,7 @@ __C infiniStatus_t infiniopCreateGemmDescriptor(
CREATE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_NVIDIA_API
CREATE
(
INFINI_DEVICE_NVIDIA
,
cud
a
);
CREATE
(
INFINI_DEVICE_NVIDIA
,
nvidi
a
);
#endif
#ifdef ENABLE_CAMBRICON_API
CREATE
(
INFINI_DEVICE_CAMBRICON
,
bang
);
...
...
@@ -88,7 +88,7 @@ infiniopGetGemmWorkspaceSize(
GET
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_NVIDIA_API
GET
(
INFINI_DEVICE_NVIDIA
,
cud
a
);
GET
(
INFINI_DEVICE_NVIDIA
,
nvidi
a
);
#endif
#ifdef ENABLE_CAMBRICON_API
GET
(
INFINI_DEVICE_CAMBRICON
,
bang
);
...
...
@@ -137,7 +137,7 @@ __C infiniStatus_t infiniopGemm(
CALCULATE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_NVIDIA_API
CALCULATE
(
INFINI_DEVICE_NVIDIA
,
cud
a
);
CALCULATE
(
INFINI_DEVICE_NVIDIA
,
nvidi
a
);
#endif
#ifdef ENABLE_CAMBRICON_API
CALCULATE
(
INFINI_DEVICE_CAMBRICON
,
bang
);
...
...
@@ -176,7 +176,7 @@ infiniopDestroyGemmDescriptor(infiniopGemmDescriptor_t desc) {
DELETE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_NVIDIA_API
DELETE
(
INFINI_DEVICE_NVIDIA
,
cud
a
);
DELETE
(
INFINI_DEVICE_NVIDIA
,
nvidi
a
);
#endif
#ifdef ENABLE_CAMBRICON_API
DELETE
(
INFINI_DEVICE_CAMBRICON
,
bang
);
...
...
src/infiniop/ops/mul/cpu/mul_cpu.h
View file @
ceb57c2a
...
...
@@ -3,7 +3,7 @@
#include "../../../elementwise/cpu/elementwise_cpu.h"
ELEMENTWISE_DESCRIPTOR
(
mul
,
cpu
,
cpu
)
ELEMENTWISE_DESCRIPTOR
(
mul
,
cpu
)
namespace
op
::
mul
::
cpu
{
typedef
struct
MulOp
{
...
...
src/infiniop/ops/mul/metax/mul_metax.h
View file @
ceb57c2a
...
...
@@ -3,6 +3,6 @@
#include "../../../elementwise/metax/elementwise_metax_api.h"
ELEMENTWISE_DESCRIPTOR
(
mul
,
metax
,
metax
)
ELEMENTWISE_DESCRIPTOR
(
mul
,
metax
)
#endif // __MUL_METAX_API_H__
src/infiniop/ops/mul/nvidia/mul_nvidia.cu
View file @
ceb57c2a
#include "../../../elementwise/
cud
a/elementwise_
cud
a.cuh"
#include "../../../elementwise/
nvidi
a/elementwise_
nvidi
a.cuh"
#include "../cuda/kernel.cuh"
#include "mul_nvidia.cuh"
...
...
@@ -13,7 +13,7 @@ infiniStatus_t Descriptor::create(
infiniopTensorDescriptor_t
out_desc
,
std
::
vector
<
infiniopTensorDescriptor_t
>
input_desc_vec
)
{
auto
handle
=
reinterpret_cast
<
device
::
cud
a
::
Handle
*>
(
handle_
);
auto
handle
=
reinterpret_cast
<
device
::
nvidi
a
::
Handle
*>
(
handle_
);
auto
dtype
=
out_desc
->
dtype
();
const
auto
&
a_desc
=
input_desc_vec
.
at
(
0
);
...
...
src/infiniop/ops/mul/nvidia/mul_nvidia.cuh
View file @
ceb57c2a
#ifndef __MUL_CUDA_API_H__
#define __MUL_CUDA_API_H__
#include "../../../elementwise/
cud
a/elementwise_
cud
a_api.cuh"
#include "../../../elementwise/
nvidi
a/elementwise_
nvidi
a_api.cuh"
ELEMENTWISE_DESCRIPTOR
(
mul
,
nvidia
,
cuda
)
ELEMENTWISE_DESCRIPTOR
(
mul
,
nvidia
)
#endif // __MUL_CUDA_API_H__
src/infiniop/ops/random_sample/
cud
a/random_sample_kernel.cuh
→
src/infiniop/ops/random_sample/
nvidi
a/random_sample_kernel.cuh
View file @
ceb57c2a
#
include
"../../../devices/
cuda/cud
a_kernel_common.cuh"
#
include
"../../../devices/
nvidia/nvidi
a_kernel_common.cuh"
#include "infinicore.h"
#include <cub/device/device_radix_sort.cuh>
#include <cub/device/device_reduce.cuh>
#include <cub/device/device_scan.cuh>
namespace
op
::
random_sample
::
cud
a
{
namespace
op
::
random_sample
::
nvidi
a
{
// ↓↓↓ 重新封装 cub api,减少模板参数,方便调用
...
...
@@ -256,4 +256,4 @@ struct Algo {
}
};
}
// namespace op::random_sample::
cud
a
}
// namespace op::random_sample::
nvidi
a
src/infiniop/ops/random_sample/
cud
a/random_sample_
cud
a.cu
→
src/infiniop/ops/random_sample/
nvidi
a/random_sample_
nvidi
a.cu
View file @
ceb57c2a
#
include
"../../../devices/
cuda/cud
a_handle.cuh"
#
include
"../../../devices/
nvidia/nvidi
a_handle.cuh"
#include "../info.h"
#include "random_sample_cuda.cuh"
#include "random_sample_kernel.cuh"
#include "random_sample_nvidia.cuh"
namespace
op
::
random_sample
::
cud
a
{
namespace
op
::
random_sample
::
nvidi
a
{
struct
Descriptor
::
Opaque
{
std
::
shared_ptr
<
device
::
cud
a
::
Handle
::
Internal
>
internal
;
std
::
shared_ptr
<
device
::
nvidi
a
::
Handle
::
Internal
>
internal
;
};
Descriptor
::~
Descriptor
()
{
...
...
@@ -18,7 +18,7 @@ infiniStatus_t Descriptor::create(
Descriptor
**
desc_ptr
,
infiniopTensorDescriptor_t
result_desc
,
infiniopTensorDescriptor_t
probs_desc
)
{
auto
handle
=
reinterpret_cast
<
device
::
cud
a
::
Handle
*>
(
handle_
);
auto
handle
=
reinterpret_cast
<
device
::
nvidi
a
::
Handle
*>
(
handle_
);
auto
result
=
RandomSampleInfo
::
create
(
result_desc
,
probs_desc
);
CHECK_RESULT
(
result
);
...
...
@@ -99,4 +99,4 @@ infiniStatus_t Descriptor::calculate(
return
INFINI_STATUS_SUCCESS
;
}
}
// namespace op::random_sample::
cud
a
}
// namespace op::random_sample::
nvidi
a
src/infiniop/ops/random_sample/
cud
a/random_sample_
cud
a.cuh
→
src/infiniop/ops/random_sample/
nvidi
a/random_sample_
nvidi
a.cuh
View file @
ceb57c2a
...
...
@@ -3,6 +3,6 @@
#include "../random_sample.h"
DESCRIPTOR
(
cud
a
)
DESCRIPTOR
(
nvidi
a
)
#endif // __RANDOM_SAMPLE_CUDA_CUH__
src/infiniop/ops/random_sample/operator.cc
View file @
ceb57c2a
...
...
@@ -6,7 +6,7 @@
#include "cpu/random_sample_cpu.h"
#endif
#ifdef ENABLE_NVIDIA_API
#include "
cud
a/random_sample_
cud
a.cuh"
#include "
nvidi
a/random_sample_
nvidi
a.cuh"
#endif
#ifdef ENABLE_METAX_API
#include "metax/random_sample_metax.h"
...
...
@@ -36,7 +36,7 @@ infiniopCreateRandomSampleDescriptor(
CREATE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_NVIDIA_API
CREATE
(
INFINI_DEVICE_NVIDIA
,
cud
a
);
CREATE
(
INFINI_DEVICE_NVIDIA
,
nvidi
a
);
#endif
#ifdef ENABLE_METAX_API
CREATE
(
INFINI_DEVICE_METAX
,
metax
);
...
...
@@ -69,7 +69,7 @@ __C infiniStatus_t infiniopGetRandomSampleWorkspaceSize(
GET
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_NVIDIA_API
GET
(
INFINI_DEVICE_NVIDIA
,
cud
a
);
GET
(
INFINI_DEVICE_NVIDIA
,
nvidi
a
);
#endif
#ifdef ENABLE_METAX_API
GET
(
INFINI_DEVICE_METAX
,
metax
);
...
...
@@ -112,7 +112,7 @@ __C infiniStatus_t infiniopRandomSample(
CALCULATE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_NVIDIA_API
CALCULATE
(
INFINI_DEVICE_NVIDIA
,
cud
a
);
CALCULATE
(
INFINI_DEVICE_NVIDIA
,
nvidi
a
);
#endif
#ifdef ENABLE_METAX_API
CALCULATE
(
INFINI_DEVICE_METAX
,
metax
);
...
...
@@ -142,7 +142,7 @@ __C infiniStatus_t infiniopDestroyRandomSampleDescriptor(
DELETE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_NVIDIA_API
DELETE
(
INFINI_DEVICE_NVIDIA
,
cud
a
);
DELETE
(
INFINI_DEVICE_NVIDIA
,
nvidi
a
);
#endif
#ifdef ENABLE_METAX_API
DELETE
(
INFINI_DEVICE_METAX
,
metax
);
...
...
src/infiniop/ops/rearrange/
cud
a/rearrange_kernel.cuh
→
src/infiniop/ops/rearrange/
nvidi
a/rearrange_kernel.cuh
View file @
ceb57c2a
#ifndef __REARRANGE_CUDA_KERNEL_H__
#define __REARRANGE_CUDA_KERNEL_H__
#include "../../../devices/
cuda/cud
a_common.cuh"
#include "../../../devices/
nvidia/nvidi
a_common.cuh"
#define ARRAY_TYPE_STRIDE ptrdiff_t
#define ARRAY_TYPE_SIZE size_t
...
...
src/infiniop/ops/rearrange/
cud
a/rearrange_
cud
a.cu
→
src/infiniop/ops/rearrange/
nvidi
a/rearrange_
nvidi
a.cu
View file @
ceb57c2a
#include "../../../devices/
cuda/cud
a_common.cuh"
#include "../../../devices/
cuda/cud
a_kernel_common.cuh"
#include "../../../devices/
nvidia/nvidi
a_common.cuh"
#include "../../../devices/
nvidia/nvidi
a_kernel_common.cuh"
#include "../../../tensor.h"
#include "rearrange_cuda.cuh"
#include "rearrange_kernel.cuh"
#include "rearrange_nvidia.cuh"
#include <algorithm>
#include <cmath>
#include <memory>
#include <stdint.h>
#include <vector>
namespace
op
::
rearrange
::
cud
a
{
namespace
op
::
rearrange
::
nvidi
a
{
struct
Descriptor
::
Opaque
{
std
::
shared_ptr
<
device
::
cud
a
::
Handle
::
Internal
>
internal
;
std
::
shared_ptr
<
device
::
nvidi
a
::
Handle
::
Internal
>
internal
;
};
Descriptor
::~
Descriptor
()
{
...
...
@@ -49,7 +49,7 @@ infiniStatus_t Descriptor::create(
*
desc_ptr
=
new
Descriptor
(
std
::
move
(
*
meta
),
new
Opaque
{
reinterpret_cast
<
device
::
cud
a
::
Handle
*>
(
handle
)
->
internal
()},
new
Opaque
{
reinterpret_cast
<
device
::
nvidi
a
::
Handle
*>
(
handle
)
->
internal
()},
handle
->
device
,
handle
->
device_id
);
return
INFINI_STATUS_SUCCESS
;
}
...
...
@@ -482,4 +482,4 @@ infiniStatus_t Descriptor::calculate(
return
status
;
}
}
// namespace op::rearrange::
cud
a
}
// namespace op::rearrange::
nvidi
a
src/infiniop/ops/rearrange/
cud
a/rearrange_
cud
a.cuh
→
src/infiniop/ops/rearrange/
nvidi
a/rearrange_
nvidi
a.cuh
View file @
ceb57c2a
...
...
@@ -3,6 +3,6 @@
#include "../rearrange.h"
DESCRIPTOR
(
cud
a
)
DESCRIPTOR
(
nvidi
a
)
#endif // __REARRANGE_CUDA_H__
src/infiniop/ops/rearrange/operator.cc
View file @
ceb57c2a
...
...
@@ -10,7 +10,7 @@
#endif
#ifdef ENABLE_NVIDIA_API
#include "
cud
a/rearrange_
cud
a.cuh"
#include "
nvidi
a/rearrange_
nvidi
a.cuh"
#endif
#ifdef ENABLE_METAX_API
#include "metax/rearrange_metax.h"
...
...
@@ -40,7 +40,7 @@ __C infiniStatus_t infiniopCreateRearrangeDescriptor(
#endif
#ifdef ENABLE_NVIDIA_API
CREATE
(
INFINI_DEVICE_NVIDIA
,
cud
a
);
CREATE
(
INFINI_DEVICE_NVIDIA
,
nvidi
a
);
#endif
#ifdef ENABLE_METAX_API
CREATE
(
INFINI_DEVICE_METAX
,
metax
);
...
...
@@ -73,7 +73,7 @@ __C infiniStatus_t infiniopRearrange(
#endif
#ifdef ENABLE_NVIDIA_API
CALCULATE
(
INFINI_DEVICE_NVIDIA
,
cud
a
);
CALCULATE
(
INFINI_DEVICE_NVIDIA
,
nvidi
a
);
#endif
#ifdef ENABLE_METAX_API
CALCULATE
(
INFINI_DEVICE_METAX
,
metax
);
...
...
@@ -104,7 +104,7 @@ __C infiniStatus_t infiniopDestroyRearrangeDescriptor(
#endif
#ifdef ENABLE_NVIDIA_API
DELETE
(
INFINI_DEVICE_NVIDIA
,
cud
a
);
DELETE
(
INFINI_DEVICE_NVIDIA
,
nvidi
a
);
#endif
#ifdef ENABLE_METAX_API
DELETE
(
INFINI_DEVICE_METAX
,
metax
);
...
...
src/infiniop/ops/relu/cpu/relu_cpu.h
View file @
ceb57c2a
...
...
@@ -5,7 +5,7 @@
#include "../../../elementwise/cpu/elementwise_cpu.h"
ELEMENTWISE_DESCRIPTOR
(
relu
,
cpu
,
cpu
)
ELEMENTWISE_DESCRIPTOR
(
relu
,
cpu
)
namespace
op
::
relu
::
cpu
{
typedef
struct
ReluOp
{
...
...
src/infiniop/ops/rms_norm/nvidia/rms_norm_nvidia.cu
View file @
ceb57c2a
#include "../../../devices/
cuda/cud
a_common.cuh"
#include "../../../devices/
nvidia/nvidi
a_common.cuh"
#include "rms_norm_nvidia.cuh"
#include "../../../devices/
cuda/cud
a_kernel_common.cuh"
#include "../../../devices/
nvidia/nvidi
a_kernel_common.cuh"
#include <cub/block/block_reduce.cuh>
#include "../../../reduce/cuda/reduce.cuh"
...
...
@@ -23,7 +23,7 @@ INFINIOP_CUDA_KERNEL rmsnormKernel(
namespace
op
::
rms_norm
::
nvidia
{
struct
Descriptor
::
Opaque
{
std
::
shared_ptr
<
device
::
cud
a
::
Handle
::
Internal
>
internal
;
std
::
shared_ptr
<
device
::
nvidi
a
::
Handle
::
Internal
>
internal
;
};
Descriptor
::~
Descriptor
()
{
...
...
@@ -47,7 +47,7 @@ infiniStatus_t Descriptor::create(
}
*
desc_ptr
=
new
Descriptor
(
new
Opaque
{
reinterpret_cast
<
device
::
cud
a
::
Handle
*>
(
handle
)
->
internal
()},
new
Opaque
{
reinterpret_cast
<
device
::
nvidi
a
::
Handle
*>
(
handle
)
->
internal
()},
std
::
move
(
info
),
0
,
handle
->
device
,
handle
->
device_id
);
...
...
src/infiniop/ops/rope/nvidia/rope_nvidia.cu
View file @
ceb57c2a
#include "../../../devices/
cuda/cud
a_common.cuh"
#include "../../../devices/
nvidia/nvidi
a_common.cuh"
#include "rope_nvidia.cuh"
#include "../../../devices/
cuda/cud
a_kernel_common.cuh"
#include "../../../devices/
nvidia/nvidi
a_kernel_common.cuh"
#include "../cuda/kernel.cuh"
...
...
@@ -28,7 +28,7 @@ INFINIOP_CUDA_KERNEL ropeThreadPerItemKernel(
namespace
op
::
rope
::
nvidia
{
struct
Descriptor
::
Opaque
{
std
::
shared_ptr
<
device
::
cud
a
::
Handle
::
Internal
>
internal
;
std
::
shared_ptr
<
device
::
nvidi
a
::
Handle
::
Internal
>
internal
;
};
Descriptor
::~
Descriptor
()
{
...
...
@@ -44,7 +44,7 @@ infiniStatus_t Descriptor::create(
infiniopTensorDescriptor_t
sin_desc
,
infiniopTensorDescriptor_t
cos_desc
)
{
auto
handle
=
reinterpret_cast
<
device
::
cud
a
::
Handle
*>
(
handle_
);
auto
handle
=
reinterpret_cast
<
device
::
nvidi
a
::
Handle
*>
(
handle_
);
auto
info
=
RoPEInfo
::
createRoPEInfo
(
y_desc
,
x_desc
,
pos_desc
,
sin_desc
,
cos_desc
);
CHECK_RESULT
(
info
);
...
...
@@ -53,7 +53,7 @@ infiniStatus_t Descriptor::create(
*
desc_ptr
=
new
Descriptor
(
info
.
take
(),
0
,
new
Opaque
{
reinterpret_cast
<
device
::
cud
a
::
Handle
*>
(
handle
)
->
internal
()},
new
Opaque
{
reinterpret_cast
<
device
::
nvidi
a
::
Handle
*>
(
handle
)
->
internal
()},
handle
->
device
,
handle
->
device_id
);
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment