Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
infinicore
Commits
0166515c
Unverified
Commit
0166515c
authored
Aug 07, 2025
by
PanZezhong1725
Committed by
GitHub
Aug 07, 2025
Browse files
Merge branch 'main' into issue/300
parents
f0300ff3
a23c4d13
Changes
175
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
423 additions
and
192 deletions
+423
-192
src/infiniop/ops/sub/metax/sub_metax.maca
src/infiniop/ops/sub/metax/sub_metax.maca
+61
-0
src/infiniop/ops/sub/nvidia/sub_nvidia.cu
src/infiniop/ops/sub/nvidia/sub_nvidia.cu
+13
-9
src/infiniop/ops/sub/nvidia/sub_nvidia.cuh
src/infiniop/ops/sub/nvidia/sub_nvidia.cuh
+8
-0
src/infiniop/ops/sub/operator.cc
src/infiniop/ops/sub/operator.cc
+39
-12
src/infiniop/ops/swiglu/cuda/kernel.cuh
src/infiniop/ops/swiglu/cuda/kernel.cuh
+6
-10
src/infiniop/ops/swiglu/maca/swiglu_maca.h
src/infiniop/ops/swiglu/maca/swiglu_maca.h
+0
-8
src/infiniop/ops/swiglu/maca/swiglu_maca_internal.h
src/infiniop/ops/swiglu/maca/swiglu_maca_internal.h
+0
-40
src/infiniop/ops/swiglu/metax/swiglu_metax.h
src/infiniop/ops/swiglu/metax/swiglu_metax.h
+8
-0
src/infiniop/ops/swiglu/metax/swiglu_metax.maca
src/infiniop/ops/swiglu/metax/swiglu_metax.maca
+16
-11
src/infiniop/ops/swiglu/nvidia/swiglu_nvidia.cu
src/infiniop/ops/swiglu/nvidia/swiglu_nvidia.cu
+12
-9
src/infiniop/ops/swiglu/nvidia/swiglu_nvidia.cuh
src/infiniop/ops/swiglu/nvidia/swiglu_nvidia.cuh
+8
-0
src/infiniop/ops/swiglu/operator.cc
src/infiniop/ops/swiglu/operator.cc
+31
-24
src/infiniop/reduce/cuda/reduce.cuh
src/infiniop/reduce/cuda/reduce.cuh
+5
-2
src/infiniop/reduce/maca/reduce.h
src/infiniop/reduce/maca/reduce.h
+0
-63
src/infinirt-test/main.cc
src/infinirt-test/main.cc
+104
-0
src/infinirt-test/test.cc
src/infinirt-test/test.cc
+93
-0
src/infinirt-test/test.h
src/infinirt-test/test.h
+8
-0
src/infinirt/cuda/infinirt_cuda.cu
src/infinirt/cuda/infinirt_cuda.cu
+1
-1
src/infinirt/cuda/infinirt_cuda.cuh
src/infinirt/cuda/infinirt_cuda.cuh
+1
-1
src/infinirt/infinirt.cc
src/infinirt/infinirt.cc
+9
-2
No files found.
src/infiniop/ops/sub/metax/sub_metax.maca
0 → 100644
View file @
0166515c
#include "../../../elementwise/metax/elementwise_metax.h"
#include "../cuda/kernel.cuh"
#include "sub_metax.h"
namespace op::sub::metax {
Descriptor::~Descriptor() = default;
infiniStatus_t Descriptor::create(
infiniopHandle_t handle_,
Descriptor **desc_ptr,
infiniopTensorDescriptor_t out_desc,
std::vector<infiniopTensorDescriptor_t> input_desc_vec) {
auto handle = reinterpret_cast<device::metax::Handle *>(handle_);
auto dtype = out_desc->dtype();
const auto &a_desc = input_desc_vec.at(0);
const auto &b_desc = input_desc_vec.at(1);
const auto &c_shape = out_desc->shape();
const auto &a_shape = a_desc->shape();
const auto &b_shape = b_desc->shape();
CHECK_DTYPE(dtype, INFINI_DTYPE_F16, INFINI_DTYPE_F32, INFINI_DTYPE_F64, INFINI_DTYPE_BF16);
CHECK_SAME_SHAPE(c_shape, a_shape, b_shape);
// create CUDA elementwise descriptor
CREATE_ELEMENTWISE_METAX_DESCRIPTOR(handle, dtype, out_desc, input_desc_vec)
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t Descriptor::calculate(
void *workspace,
size_t workspace_size,
void *output,
std::vector<const void *> inputs,
void *stream) const {
if (workspace_size < _workspace_size) {
return INFINI_STATUS_INSUFFICIENT_WORKSPACE;
}
switch (_dtype) {
case INFINI_DTYPE_F16:
return _device_info->calculate<256, cuda::SubOp, half>(_info, workspace, output, inputs, stream);
case INFINI_DTYPE_F32:
return _device_info->calculate<256, cuda::SubOp, float>(_info, workspace, output, inputs, stream);
case INFINI_DTYPE_F64:
return _device_info->calculate<256, cuda::SubOp, double>(_info, workspace, output, inputs, stream);
case INFINI_DTYPE_BF16:
return _device_info->calculate<256, cuda::SubOp, cuda_bfloat16>(_info, workspace, output, inputs, stream);
default:
return INFINI_STATUS_BAD_TENSOR_DTYPE;
}
return INFINI_STATUS_SUCCESS;
}
} // namespace op::sub::metax
src/infiniop/ops/
mul/cuda/mul_cud
a.cu
→
src/infiniop/ops/
sub/nvidia/sub_nvidi
a.cu
View file @
0166515c
#include "mul_cuda.cuh"
#include "mul_cuda_internal.cuh"
#include "../../../elementwise/nvidia/elementwise_nvidia.cuh"
namespace
op
::
mul
::
cuda
{
#include "../cuda/kernel.cuh"
#include "sub_nvidia.cuh"
namespace
op
::
sub
::
nvidia
{
Descriptor
::~
Descriptor
()
=
default
;
...
...
@@ -11,7 +13,7 @@ infiniStatus_t Descriptor::create(
infiniopTensorDescriptor_t
out_desc
,
std
::
vector
<
infiniopTensorDescriptor_t
>
input_desc_vec
)
{
auto
handle
=
reinterpret_cast
<
device
::
cud
a
::
Handle
*>
(
handle_
);
auto
handle
=
reinterpret_cast
<
device
::
nvidi
a
::
Handle
*>
(
handle_
);
auto
dtype
=
out_desc
->
dtype
();
const
auto
&
a_desc
=
input_desc_vec
.
at
(
0
);
...
...
@@ -20,7 +22,7 @@ infiniStatus_t Descriptor::create(
const
auto
&
a_shape
=
a_desc
->
shape
();
const
auto
&
b_shape
=
b_desc
->
shape
();
CHECK_DTYPE
(
dtype
,
INFINI_DTYPE_F16
,
INFINI_DTYPE_F32
,
INFINI_DTYPE_F64
);
CHECK_DTYPE
(
dtype
,
INFINI_DTYPE_F16
,
INFINI_DTYPE_F32
,
INFINI_DTYPE_F64
,
INFINI_DTYPE_BF16
);
CHECK_SAME_SHAPE
(
c_shape
,
a_shape
,
b_shape
);
...
...
@@ -43,15 +45,17 @@ infiniStatus_t Descriptor::calculate(
switch
(
_dtype
)
{
case
INFINI_DTYPE_F16
:
return
_device_info
->
calculate
<
256
,
Mul
Op
,
half
>
(
_info
,
workspace
,
output
,
inputs
,
stream
);
return
_device_info
->
calculate
<
256
,
cuda
::
Sub
Op
,
half
>
(
_info
,
workspace
,
output
,
inputs
,
stream
);
case
INFINI_DTYPE_F32
:
return
_device_info
->
calculate
<
256
,
Mul
Op
,
float
>
(
_info
,
workspace
,
output
,
inputs
,
stream
);
return
_device_info
->
calculate
<
256
,
cuda
::
Sub
Op
,
float
>
(
_info
,
workspace
,
output
,
inputs
,
stream
);
case
INFINI_DTYPE_F64
:
return
_device_info
->
calculate
<
256
,
MulOp
,
double
>
(
_info
,
workspace
,
output
,
inputs
,
stream
);
return
_device_info
->
calculate
<
256
,
cuda
::
SubOp
,
double
>
(
_info
,
workspace
,
output
,
inputs
,
stream
);
case
INFINI_DTYPE_BF16
:
return
_device_info
->
calculate
<
256
,
cuda
::
SubOp
,
cuda_bfloat16
>
(
_info
,
workspace
,
output
,
inputs
,
stream
);
default:
return
INFINI_STATUS_BAD_TENSOR_DTYPE
;
}
return
INFINI_STATUS_SUCCESS
;
}
}
// namespace op::
mul::cud
a
}
// namespace op::
sub::nvidi
a
src/infiniop/ops/sub/
cud
a/sub_
cud
a.cuh
→
src/infiniop/ops/sub/
nvidi
a/sub_
nvidi
a.cuh
View file @
0166515c
#ifndef __SUB_CUDA_API_H__
#define __SUB_CUDA_API_H__
#include "../../../elementwise/
cud
a/elementwise_
cud
a_api.cuh"
#include "../../../elementwise/
nvidi
a/elementwise_
nvidi
a_api.cuh"
ELEMENTWISE_DESCRIPTOR
(
sub
,
cud
a
)
ELEMENTWISE_DESCRIPTOR
(
sub
,
nvidi
a
)
#endif // __SUB_CUDA_API_H__
src/infiniop/ops/sub/operator.cc
View file @
0166515c
...
...
@@ -5,8 +5,11 @@
#ifdef ENABLE_CPU_API
#include "cpu/sub_cpu.h"
#endif
#ifdef ENABLE_CUDA_API
#include "cuda/sub_cuda.cuh"
#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API)
#include "nvidia/sub_nvidia.cuh"
#endif
#ifdef ENABLE_METAX_API
#include "metax/sub_metax.h"
#endif
__C
infiniStatus_t
infiniopCreateSubDescriptor
(
...
...
@@ -30,8 +33,14 @@ __C infiniStatus_t infiniopCreateSubDescriptor(
#ifdef ENABLE_CPU_API
CREATE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_CUDA_API
CREATE
(
INFINI_DEVICE_NVIDIA
,
cuda
);
#ifdef ENABLE_NVIDIA_API
CREATE
(
INFINI_DEVICE_NVIDIA
,
nvidia
);
#endif
#ifdef ENABLE_ILUVATAR_API
CREATE
(
INFINI_DEVICE_ILUVATAR
,
nvidia
);
#endif
#ifdef ENABLE_METAX_API
CREATE
(
INFINI_DEVICE_METAX
,
metax
);
#endif
default:
...
...
@@ -46,14 +55,20 @@ __C infiniStatus_t infiniopGetSubWorkspaceSize(infiniopSubDescriptor_t desc, siz
#define GET(CASE, NAMESPACE) \
case CASE: \
*size = reinterpret_cast<op::sub::NAMESPACE::Descriptor *>(desc)->workspaceSize(); \
return INFINI_STATUS_SUCCESS
;
return INFINI_STATUS_SUCCESS
switch
(
desc
->
device_type
)
{
#ifdef ENABLE_CPU_API
GET
(
INFINI_DEVICE_CPU
,
cpu
)
GET
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_NVIDIA_API
GET
(
INFINI_DEVICE_NVIDIA
,
nvidia
);
#endif
#ifdef ENABLE_CUDA_API
GET
(
INFINI_DEVICE_NVIDIA
,
cuda
)
#ifdef ENABLE_ILUVATAR_API
GET
(
INFINI_DEVICE_ILUVATAR
,
nvidia
);
#endif
#ifdef ENABLE_METAX_API
GET
(
INFINI_DEVICE_METAX
,
metax
);
#endif
default:
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
...
...
@@ -82,8 +97,14 @@ __C infiniStatus_t infiniopSub(
#ifdef ENABLE_CPU_API
CALCULATE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_CUDA_API
CALCULATE
(
INFINI_DEVICE_NVIDIA
,
cuda
);
#ifdef ENABLE_NVIDIA_API
CALCULATE
(
INFINI_DEVICE_NVIDIA
,
nvidia
);
#endif
#ifdef ENABLE_ILUVATAR_API
CALCULATE
(
INFINI_DEVICE_ILUVATAR
,
nvidia
);
#endif
#ifdef ENABLE_METAX_API
CALCULATE
(
INFINI_DEVICE_METAX
,
metax
);
#endif
default:
...
...
@@ -106,8 +127,14 @@ infiniopDestroySubDescriptor(infiniopSubDescriptor_t desc) {
#ifdef ENABLE_CPU_API
DELETE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_CUDA_API
DELETE
(
INFINI_DEVICE_NVIDIA
,
cuda
);
#ifdef ENABLE_NVIDIA_API
DELETE
(
INFINI_DEVICE_NVIDIA
,
nvidia
);
#endif
#ifdef ENABLE_ILUVATAR_API
DELETE
(
INFINI_DEVICE_ILUVATAR
,
nvidia
);
#endif
#ifdef ENABLE_METAX_API
DELETE
(
INFINI_DEVICE_METAX
,
metax
);
#endif
default:
...
...
src/infiniop/ops/swiglu/cuda/
swiglu_cuda_int
ern
a
l.cuh
→
src/infiniop/ops/swiglu/cuda/
k
ern
e
l.cuh
View file @
0166515c
#ifndef __SWIGLU_CUDA_H__
#define __SWIGLU_CUDA_H__
#include "../../../elementwise/cuda/elementwise_cuda.cuh"
#include <cuda_bf16.h>
#include <cuda_fp16.h>
namespace
op
::
swiglu
::
cuda
{
typedef
struct
SwiGLUOp
{
private:
...
...
@@ -14,13 +10,13 @@ private:
return
h2rcp
(
__hadd2
(
make_half2
(
1
,
1
),
h2exp
(
__hneg2
(
x
))));
}
else
if
constexpr
(
std
::
is_same_v
<
T
,
half
>
)
{
return
hrcp
(
__hadd
(
half
(
1.
f
),
__float2half
(
__expf
(
__half2float
(
__hneg
(
x
))))));
}
else
if
constexpr
(
std
::
is_same_v
<
T
,
__nv
_bfloat162
>
)
{
}
else
if
constexpr
(
std
::
is_same_v
<
T
,
cuda
_bfloat162
>
)
{
float
x0
=
__bfloat162float
(
__low2bfloat16
(
x
));
float
x1
=
__bfloat162float
(
__high2bfloat16
(
x
));
float
sig0
=
__frcp_rn
(
__fadd_rn
(
1.0
f
,
__expf
(
-
x0
)));
float
sig1
=
__frcp_rn
(
__fadd_rn
(
1.0
f
,
__expf
(
-
x1
)));
return
__floats2bfloat162_rn
(
sig0
,
sig1
);
}
else
if
constexpr
(
std
::
is_same_v
<
T
,
__nv
_bfloat16
>
)
{
}
else
if
constexpr
(
std
::
is_same_v
<
T
,
cuda
_bfloat16
>
)
{
float
xf
=
__bfloat162float
(
x
);
return
__float2bfloat16_rn
(
__frcp_rn
(
__fadd_rn
(
1.0
f
,
__expf
(
-
xf
))));
}
else
if
constexpr
(
std
::
is_same_v
<
T
,
float
>
)
{
...
...
@@ -38,8 +34,8 @@ public:
return
__hmul2
(
__hmul2
(
gate
,
sigmoid
(
gate
)),
up
);
}
else
if
constexpr
(
std
::
is_same_v
<
T
,
half
>
)
{
return
__hmul
(
__hmul
(
gate
,
sigmoid
(
gate
)),
up
);
}
else
if
constexpr
(
std
::
is_same_v
<
T
,
__nv
_bfloat162
>
)
{
__nv
_bfloat162
sig
=
sigmoid
(
gate
);
}
else
if
constexpr
(
std
::
is_same_v
<
T
,
cuda
_bfloat162
>
)
{
cuda
_bfloat162
sig
=
sigmoid
(
gate
);
float
gate0
=
__bfloat162float
(
__low2bfloat16
(
gate
));
float
gate1
=
__bfloat162float
(
__high2bfloat16
(
gate
));
float
sig0
=
__bfloat162float
(
__low2bfloat16
(
sig
));
...
...
@@ -49,8 +45,8 @@ public:
float
res0
=
__fmul_rn
(
__fmul_rn
(
gate0
,
sig0
),
up0
);
float
res1
=
__fmul_rn
(
__fmul_rn
(
gate1
,
sig1
),
up1
);
return
__floats2bfloat162_rn
(
res0
,
res1
);
}
else
if
constexpr
(
std
::
is_same_v
<
T
,
__nv
_bfloat16
>
)
{
__nv
_bfloat16
sig
=
sigmoid
(
gate
);
}
else
if
constexpr
(
std
::
is_same_v
<
T
,
cuda
_bfloat16
>
)
{
cuda
_bfloat16
sig
=
sigmoid
(
gate
);
float
gatef
=
__bfloat162float
(
gate
);
float
sigf
=
__bfloat162float
(
sig
);
float
upf
=
__bfloat162float
(
up
);
...
...
src/infiniop/ops/swiglu/maca/swiglu_maca.h
deleted
100644 → 0
View file @
f0300ff3
#ifndef __SWIGLU_MACA_API_H__
#define __SWIGLU_MACA_API_H__
#include "../../../elementwise/maca/elementwise_maca_api.h"
ELEMENTWISE_DESCRIPTOR
(
swiglu
,
maca
)
#endif // __SWIGLU_MACA_API_H__
src/infiniop/ops/swiglu/maca/swiglu_maca_internal.h
deleted
100644 → 0
View file @
f0300ff3
#ifndef __SWIGLU_MACA_H__
#define __SWIGLU_MACA_H__
#include "../../../elementwise/maca/elementwise_maca.h"
#include <hctlass/half.h>
namespace
op
::
swiglu
::
maca
{
typedef
struct
SwiGLUOp
{
private:
template
<
typename
T
>
__device__
__forceinline__
T
sigmoid
(
const
T
&
x
)
const
{
if
constexpr
(
std
::
is_same_v
<
T
,
half2
>
)
{
return
h2rcp
(
__hadd2
(
make_half2
(
1
,
1
),
h2exp
(
__hneg2
(
x
))));
}
else
if
constexpr
(
std
::
is_same_v
<
T
,
half
>
)
{
return
hrcp
(
__hadd
(
half
(
1.
f
),
__float2half
(
__expf
(
__half2float
(
__hneg
(
x
))))));
}
else
if
constexpr
(
std
::
is_same_v
<
T
,
float
>
)
{
return
__frcp_rn
(
__fadd_rn
(
1
,
__expf
(
-
x
)));
}
else
{
return
1
/
(
1
+
std
::
exp
(
-
x
));
}
}
public:
static
constexpr
size_t
num_inputs
=
2
;
template
<
typename
T
>
__device__
__forceinline__
T
operator
()(
const
T
&
up
,
const
T
&
gate
)
const
{
if
constexpr
(
std
::
is_same_v
<
T
,
half2
>
)
{
return
__hmul2
(
__hmul2
(
gate
,
sigmoid
(
gate
)),
up
);
}
else
if
constexpr
(
std
::
is_same_v
<
T
,
half
>
)
{
return
__hmul
(
__hmul
(
gate
,
sigmoid
(
gate
)),
up
);
}
else
if
constexpr
(
std
::
is_same_v
<
T
,
float
>
)
{
return
__fmul_rn
(
__fmul_rn
(
gate
,
sigmoid
(
gate
)),
up
);
}
else
{
return
gate
*
sigmoid
(
gate
)
*
up
;
}
}
}
SwiGLUOp
;
}
// namespace op::swiglu::maca
#endif
src/infiniop/ops/swiglu/metax/swiglu_metax.h
0 → 100644
View file @
0166515c
#ifndef __SWIGLU_METAX_API_H__
#define __SWIGLU_METAX_API_H__
#include "../../../elementwise/metax/elementwise_metax_api.h"
ELEMENTWISE_DESCRIPTOR
(
swiglu
,
metax
)
#endif // __SWIGLU_METAX_API_H__
src/infiniop/ops/swiglu/m
aca
/swiglu_m
aca
.maca
→
src/infiniop/ops/swiglu/m
etax
/swiglu_m
etax
.maca
View file @
0166515c
#include "swiglu_maca.h"
#include "swiglu_maca_internal.h"
#include "swiglu_metax.h"
namespace op::swiglu::maca {
#include "../../../elementwise/metax/elementwise_metax.h"
#include "../cuda/kernel.cuh"
namespace op::swiglu::metax {
Descriptor::~Descriptor() = default;
...
...
@@ -11,7 +14,7 @@ infiniStatus_t Descriptor::create(
infiniopTensorDescriptor_t out_desc,
std::vector<infiniopTensorDescriptor_t> input_desc_vec) {
auto handle = reinterpret_cast<device::m
aca
::Handle *>(handle_);
auto handle = reinterpret_cast<device::m
etax
::Handle *>(handle_);
auto dtype = out_desc->dtype();
const auto &up_desc = input_desc_vec.at(0);
...
...
@@ -20,11 +23,11 @@ infiniStatus_t Descriptor::create(
const auto &up_shape = up_desc->shape();
const auto &gate_shape = gate_desc->shape();
CHECK_DTYPE(dtype, INFINI_DTYPE_F16, INFINI_DTYPE_F32, INFINI_DTYPE_F64);
CHECK_DTYPE(dtype, INFINI_DTYPE_F16,
INFINI_DTYPE_BF16,
INFINI_DTYPE_F32, INFINI_DTYPE_F64);
CHECK_SAME_SHAPE(out_shape, up_shape, gate_shape);
// create M
ACA
elementwise descriptor
CREATE_ELEMENTWISE_M
ACA
_DESCRIPTOR(handle, dtype, out_desc, input_desc_vec)
// create M
ETAX
elementwise descriptor
CREATE_ELEMENTWISE_M
ETAX
_DESCRIPTOR(handle, dtype, out_desc, input_desc_vec)
return INFINI_STATUS_SUCCESS;
}
...
...
@@ -42,15 +45,17 @@ infiniStatus_t Descriptor::calculate(
switch (_dtype) {
case INFINI_DTYPE_F16:
return _device_info->calculate<256, SwiGLUOp, half>(_info, workspace, output, inputs, stream);
return _device_info->calculate<256, cuda::SwiGLUOp, half>(_info, workspace, output, inputs, stream);
case INFINI_DTYPE_BF16:
return _device_info->calculate<256, cuda::SwiGLUOp, cuda_bfloat16>(_info, workspace, output, inputs, stream);
case INFINI_DTYPE_F32:
return _device_info->calculate<256, SwiGLUOp, float>(_info, workspace, output, inputs, stream);
return _device_info->calculate<256,
cuda::
SwiGLUOp, float>(_info, workspace, output, inputs, stream);
case INFINI_DTYPE_F64:
return _device_info->calculate<256, SwiGLUOp, double>(_info, workspace, output, inputs, stream);
return _device_info->calculate<256,
cuda::
SwiGLUOp, double>(_info, workspace, output, inputs, stream);
default:
return INFINI_STATUS_BAD_TENSOR_DTYPE;
}
return INFINI_STATUS_SUCCESS;
}
} // namespace op::swiglu::m
aca
} // namespace op::swiglu::m
etax
src/infiniop/ops/swiglu/
cud
a/swiglu_
cud
a.cu
→
src/infiniop/ops/swiglu/
nvidi
a/swiglu_
nvidi
a.cu
View file @
0166515c
#include "swiglu_cuda.cuh"
#include "swiglu_cuda_internal.cuh"
#include "swiglu_nvidia.cuh"
namespace
op
::
swiglu
::
cuda
{
#include "../../../elementwise/nvidia/elementwise_nvidia.cuh"
#include "../cuda/kernel.cuh"
namespace
op
::
swiglu
::
nvidia
{
Descriptor
::~
Descriptor
()
=
default
;
...
...
@@ -11,7 +14,7 @@ infiniStatus_t Descriptor::create(
infiniopTensorDescriptor_t
out_desc
,
std
::
vector
<
infiniopTensorDescriptor_t
>
input_desc_vec
)
{
auto
handle
=
reinterpret_cast
<
device
::
cud
a
::
Handle
*>
(
handle_
);
auto
handle
=
reinterpret_cast
<
device
::
nvidi
a
::
Handle
*>
(
handle_
);
auto
dtype
=
out_desc
->
dtype
();
const
auto
&
up_desc
=
input_desc_vec
.
at
(
0
);
...
...
@@ -42,17 +45,17 @@ infiniStatus_t Descriptor::calculate(
switch
(
_dtype
)
{
case
INFINI_DTYPE_F16
:
return
_device_info
->
calculate
<
256
,
SwiGLUOp
,
half
>
(
_info
,
workspace
,
output
,
inputs
,
stream
);
return
_device_info
->
calculate
<
256
,
cuda
::
SwiGLUOp
,
half
>
(
_info
,
workspace
,
output
,
inputs
,
stream
);
case
INFINI_DTYPE_BF16
:
return
_device_info
->
calculate
<
256
,
SwiGLUOp
,
__nv
_bfloat16
>
(
_info
,
workspace
,
output
,
inputs
,
stream
);
return
_device_info
->
calculate
<
256
,
cuda
::
SwiGLUOp
,
cuda
_bfloat16
>
(
_info
,
workspace
,
output
,
inputs
,
stream
);
case
INFINI_DTYPE_F32
:
return
_device_info
->
calculate
<
256
,
SwiGLUOp
,
float
>
(
_info
,
workspace
,
output
,
inputs
,
stream
);
return
_device_info
->
calculate
<
256
,
cuda
::
SwiGLUOp
,
float
>
(
_info
,
workspace
,
output
,
inputs
,
stream
);
case
INFINI_DTYPE_F64
:
return
_device_info
->
calculate
<
256
,
SwiGLUOp
,
double
>
(
_info
,
workspace
,
output
,
inputs
,
stream
);
return
_device_info
->
calculate
<
256
,
cuda
::
SwiGLUOp
,
double
>
(
_info
,
workspace
,
output
,
inputs
,
stream
);
default:
return
INFINI_STATUS_BAD_TENSOR_DTYPE
;
}
return
INFINI_STATUS_SUCCESS
;
}
}
// namespace op::swiglu::
cud
a
}
// namespace op::swiglu::
nvidi
a
src/infiniop/ops/swiglu/
cud
a/swiglu_
cud
a.cuh
→
src/infiniop/ops/swiglu/
nvidi
a/swiglu_
nvidi
a.cuh
View file @
0166515c
#ifndef __SWIGLU_CUDA_API_H__
#define __SWIGLU_CUDA_API_H__
#include "../../../elementwise/
cud
a/elementwise_
cud
a_api.cuh"
#include "../../../elementwise/
nvidi
a/elementwise_
nvidi
a_api.cuh"
ELEMENTWISE_DESCRIPTOR
(
swiglu
,
cud
a
)
ELEMENTWISE_DESCRIPTOR
(
swiglu
,
nvidi
a
)
#endif // __SWIGLU_CUDA_API_H__
src/infiniop/ops/swiglu/operator.cc
View file @
0166515c
...
...
@@ -5,14 +5,14 @@
#ifdef ENABLE_CPU_API
#include "cpu/swiglu_cpu.h"
#endif
#ifdef
ENABLE_CUDA
_API
#include "
cud
a/swiglu_
cud
a.cuh"
#if
def
ined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR
_API
)
#include "
nvidi
a/swiglu_
nvidi
a.cuh"
#endif
#ifdef ENABLE_KUNLUN_API
#include "kunlun/swiglu_kunlun.h"
#endif
#ifdef ENABLE_METAX_API
#include "m
aca
/swiglu_m
aca
.h"
#include "m
etax
/swiglu_m
etax
.h"
#endif
#ifdef ENABLE_ASCEND_API
#include "ascend/swiglu_ascend.h"
...
...
@@ -39,14 +39,17 @@ __C infiniStatus_t infiniopCreateSwiGLUDescriptor(
#ifdef ENABLE_CPU_API
CREATE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_CUDA_API
CREATE
(
INFINI_DEVICE_NVIDIA
,
cuda
);
#ifdef ENABLE_NVIDIA_API
CREATE
(
INFINI_DEVICE_NVIDIA
,
nvidia
);
#endif
#ifdef ENABLE_ILUVATAR_API
CREATE
(
INFINI_DEVICE_ILUVATAR
,
nvidia
);
#endif
#ifdef ENABLE_KUNLUN_API
CREATE
(
INFINI_DEVICE_KUNLUN
,
kunlun
);
#endif
#ifdef ENABLE_METAX_API
CREATE
(
INFINI_DEVICE_METAX
,
m
aca
);
CREATE
(
INFINI_DEVICE_METAX
,
m
etax
);
#endif
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
...
...
@@ -83,20 +86,23 @@ __C infiniStatus_t infiniopGetSwiGLUWorkspaceSize(infiniopSwiGLUDescriptor_t des
#define GET(CASE, NAMESPACE) \
case CASE: \
*size = reinterpret_cast<op::swiglu::NAMESPACE::Descriptor *>(desc)->workspaceSize(); \
return INFINI_STATUS_SUCCESS
;
return INFINI_STATUS_SUCCESS
switch
(
desc
->
device_type
)
{
#ifdef ENABLE_CPU_API
GET
(
INFINI_DEVICE_CPU
,
cpu
)
GET
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_NVIDIA_API
GET
(
INFINI_DEVICE_NVIDIA
,
nvidia
);
#endif
#ifdef ENABLE_
CUDA
_API
GET
(
INFINI_DEVICE_
NVIDIA
,
cud
a
)
#ifdef ENABLE_
ILUVATAR
_API
GET
(
INFINI_DEVICE_
ILUVATAR
,
nvidi
a
)
;
#endif
#ifdef ENABLE_KUNLUN_API
GET
(
INFINI_DEVICE_KUNLUN
,
kunlun
)
GET
(
INFINI_DEVICE_KUNLUN
,
kunlun
)
;
#endif
#ifdef ENABLE_METAX_API
GET
(
INFINI_DEVICE_METAX
,
m
aca
);
GET
(
INFINI_DEVICE_METAX
,
m
etax
);
#endif
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
...
...
@@ -104,12 +110,7 @@ __C infiniStatus_t infiniopGetSwiGLUWorkspaceSize(infiniopSwiGLUDescriptor_t des
}
#endif
#ifdef ENABLE_ASCEND_API
GET
(
INFINI_DEVICE_ASCEND
,
ascend
)
#endif
#ifdef ENABLE_METAX_GPU
case
DevMetaxGpu
:
{
return
macaGetSwiGLUWorkspaceSize
((
SwiGLUMacaDescriptor_t
)
desc
,
size
);
}
GET
(
INFINI_DEVICE_ASCEND
,
ascend
);
#endif
#ifdef ENABLE_MTHREADS_GPU
case
DevMthreadsGpu
:
{
...
...
@@ -142,14 +143,17 @@ __C infiniStatus_t infiniopSwiGLU(
#ifdef ENABLE_CPU_API
CALCULATE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_CUDA_API
CALCULATE
(
INFINI_DEVICE_NVIDIA
,
cuda
);
#ifdef ENABLE_NVIDIA_API
CALCULATE
(
INFINI_DEVICE_NVIDIA
,
nvidia
);
#endif
#ifdef ENABLE_ILUVATAR_API
CALCULATE
(
INFINI_DEVICE_ILUVATAR
,
nvidia
);
#endif
#ifdef ENABLE_KUNLUN_API
CALCULATE
(
INFINI_DEVICE_KUNLUN
,
kunlun
);
#endif
#ifdef ENABLE_METAX_API
CALCULATE
(
INFINI_DEVICE_METAX
,
m
aca
);
CALCULATE
(
INFINI_DEVICE_METAX
,
m
etax
);
#endif
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
...
...
@@ -188,14 +192,17 @@ infiniopDestroySwiGLUDescriptor(infiniopSwiGLUDescriptor_t desc) {
#ifdef ENABLE_CPU_API
DELETE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_CUDA_API
DELETE
(
INFINI_DEVICE_NVIDIA
,
cuda
);
#ifdef ENABLE_NVIDIA_API
DELETE
(
INFINI_DEVICE_NVIDIA
,
nvidia
);
#endif
#ifdef ENABLE_ILUVATAR_API
DELETE
(
INFINI_DEVICE_ILUVATAR
,
nvidia
);
#endif
#ifdef ENABLE_KUNLUN_API
DELETE
(
INFINI_DEVICE_KUNLUN
,
kunlun
);
#endif
#ifdef ENABLE_METAX_API
DELETE
(
INFINI_DEVICE_METAX
,
m
aca
);
DELETE
(
INFINI_DEVICE_METAX
,
m
etax
);
#endif
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
...
...
src/infiniop/reduce/cuda/reduce.cuh
View file @
0166515c
#ifndef __INFINIOP_REDUCE_CUDA_H__
#define __INFINIOP_REDUCE_CUDA_H__
#include <cub/block/block_reduce.cuh>
/*
* Device functions for reduction operations on CUDA.
*
* Note: Only local result on thread 0 is guranteed to be correct.
* A manual broadcast is needed for other threads.
*
* Important Note: This is a device-independent header file containing reduce kernels
* for all cuda-supporting platforms. Include device-specific headers
* (such as <cub/block/block_reduce.cuh> for nvidia) in your source file
* and then include this file for proper usage.
*/
namespace
op
::
common_cuda
::
reduce_op
{
...
...
src/infiniop/reduce/maca/reduce.h
deleted
100644 → 0
View file @
f0300ff3
#ifndef __INFINIOP_REDUCE_MACA_H__
#define __INFINIOP_REDUCE_MACA_H__
#include <hccub/block/block_reduce.cuh>
/*
* Device functions for reduction operations on MACA.
*
* Note: Only local result on thread 0 is guranteed to be correct.
* A manual broadcast is needed for other threads.
*/
namespace
op
::
common_maca
::
reduce_op
{
// Sum(x^2) on contiguous data of length count
template
<
unsigned
int
BLOCK_SIZE
,
typename
Tdata
,
typename
Tcompute
>
__device__
__forceinline__
Tcompute
sumSquared
(
const
Tdata
*
data_ptr
,
size_t
count
)
{
Tcompute
ss
=
0
;
// Each thread computes its partial sum
for
(
size_t
i
=
threadIdx
.
x
;
i
<
count
;
i
+=
BLOCK_SIZE
)
{
ss
+=
Tcompute
(
data_ptr
[
i
])
*
Tcompute
(
data_ptr
[
i
]);
}
// Use CUB block-level reduction
using
BlockReduce
=
cub
::
BlockReduce
<
Tcompute
,
BLOCK_SIZE
>
;
__shared__
typename
BlockReduce
::
TempStorage
temp_storage
;
return
BlockReduce
(
temp_storage
).
Sum
(
ss
);
}
// Sum(x) on contiguous data of length count
template
<
unsigned
int
BLOCK_SIZE
,
typename
Tdata
,
typename
Tcompute
>
__device__
__forceinline__
Tcompute
sum
(
const
Tdata
*
data_ptr
,
size_t
count
)
{
Tcompute
s
=
0
;
for
(
size_t
i
=
threadIdx
.
x
;
i
<
count
;
i
+=
BLOCK_SIZE
)
{
s
+=
Tcompute
(
data_ptr
[
i
]);
}
using
BlockReduce
=
cub
::
BlockReduce
<
Tcompute
,
BLOCK_SIZE
>
;
__shared__
typename
BlockReduce
::
TempStorage
temp_storage
;
return
BlockReduce
(
temp_storage
).
Sum
(
s
);
}
// Max(x) on contiguous data of length count
template
<
unsigned
int
BLOCK_SIZE
,
typename
Tdata
>
__device__
__forceinline__
Tdata
max
(
const
Tdata
*
data_ptr
,
size_t
count
)
{
Tdata
max_
=
data_ptr
[
0
];
for
(
size_t
i
=
threadIdx
.
x
;
i
<
count
;
i
+=
BLOCK_SIZE
)
{
max_
=
cub
::
Max
()(
max_
,
data_ptr
[
i
]);
}
using
BlockReduce
=
cub
::
BlockReduce
<
Tdata
,
BLOCK_SIZE
>
;
__shared__
typename
BlockReduce
::
TempStorage
temp_storage
;
return
BlockReduce
(
temp_storage
).
Reduce
(
max_
,
cub
::
Max
(),
BLOCK_SIZE
);
}
}
// namespace op::common_maca::reduce_op
#endif
src/infinirt-test/main.cc
0 → 100644
View file @
0166515c
#include "test.h"
#include <infinirt.h>
struct
ParsedArgs
{
infiniDevice_t
device_type
=
INFINI_DEVICE_CPU
;
};
void
printUsage
()
{
std
::
cout
<<
"Usage:"
<<
std
::
endl
<<
" infinirt-test [--<device>]"
<<
std
::
endl
<<
std
::
endl
<<
"Options:"
<<
std
::
endl
<<
" --<device> Specify the device type."
<<
std
::
endl
<<
std
::
endl
<<
"Available devices:"
<<
std
::
endl
<<
" cpu - Default"
<<
std
::
endl
<<
" nvidia"
<<
std
::
endl
<<
" cambricon"
<<
std
::
endl
<<
" ascend"
<<
std
::
endl
<<
" metax"
<<
std
::
endl
<<
" moore"
<<
std
::
endl
<<
" iluvatar"
<<
std
::
endl
<<
" kunlun"
<<
std
::
endl
<<
" sugon"
<<
std
::
endl
<<
std
::
endl
;
exit
(
EXIT_FAILURE
);
}
ParsedArgs
parseArgs
(
int
argc
,
char
*
argv
[])
{
ParsedArgs
args
;
if
(
argc
<
2
)
{
return
args
;
// 默认使用 CPU
}
std
::
string
arg
=
argv
[
1
];
if
(
arg
==
"--help"
||
arg
==
"-h"
)
{
printUsage
();
}
try
{
#define PARSE_DEVICE(FLAG, DEVICE) \
if (arg == FLAG) { \
args.device_type = DEVICE; \
}
// clang-format off
PARSE_DEVICE
(
"--cpu"
,
INFINI_DEVICE_CPU
)
else
PARSE_DEVICE
(
"--nvidia"
,
INFINI_DEVICE_NVIDIA
)
else
PARSE_DEVICE
(
"--cambricon"
,
INFINI_DEVICE_CAMBRICON
)
else
PARSE_DEVICE
(
"--ascend"
,
INFINI_DEVICE_ASCEND
)
else
PARSE_DEVICE
(
"--metax"
,
INFINI_DEVICE_METAX
)
else
PARSE_DEVICE
(
"--moore"
,
INFINI_DEVICE_MOORE
)
else
PARSE_DEVICE
(
"--iluvatar"
,
INFINI_DEVICE_ILUVATAR
)
else
PARSE_DEVICE
(
"--kunlun"
,
INFINI_DEVICE_KUNLUN
)
else
PARSE_DEVICE
(
"--sugon"
,
INFINI_DEVICE_SUGON
)
else
{
printUsage
();
}
// clang-format on
#undef PARSE_DEVICE
}
catch
(
const
std
::
exception
&
)
{
printUsage
();
}
return
args
;
}
int
main
(
int
argc
,
char
*
argv
[])
{
ParsedArgs
args
=
parseArgs
(
argc
,
argv
);
std
::
cout
<<
"Testing Device: "
<<
args
.
device_type
<<
std
::
endl
;
infiniDevice_t
device
=
args
.
device_type
;
// 获取设备总数
std
::
vector
<
int
>
deviceCounts
(
INFINI_DEVICE_TYPE_COUNT
,
0
);
if
(
infinirtGetAllDeviceCount
(
deviceCounts
.
data
())
!=
INFINI_STATUS_SUCCESS
)
{
std
::
cerr
<<
"Failed to get total device count."
<<
std
::
endl
;
return
1
;
}
int
numDevices
=
deviceCounts
[
device
];
std
::
cout
<<
"Device Type: "
<<
device
<<
" | Available Devices: "
<<
numDevices
<<
std
::
endl
;
if
(
numDevices
==
0
)
{
std
::
cout
<<
"Device type "
<<
device
<<
" has no available devices."
<<
std
::
endl
;
return
0
;
}
for
(
int
deviceId
=
0
;
deviceId
<
numDevices
;
++
deviceId
)
{
if
(
!
testSetDevice
(
device
,
deviceId
))
{
return
1
;
}
size_t
dataSize
[]
=
{
1
<<
10
,
4
<<
10
,
2
<<
20
,
1L
<<
30
};
for
(
size_t
size
:
dataSize
)
{
if
(
!
testMemcpy
(
device
,
deviceId
,
size
))
{
return
1
;
}
}
}
return
0
;
}
src/infinirt-test/test.cc
0 → 100644
View file @
0166515c
#include "test.h"
#include <cstring>
#include <infinirt.h>
#include <iostream>
bool
testMemcpy
(
infiniDevice_t
device
,
int
deviceId
,
size_t
dataSize
)
{
std
::
cout
<<
"==============================================
\n
"
<<
"Testing memcpy on Device ID: "
<<
deviceId
<<
"
\n
"
<<
"=============================================="
<<
std
::
endl
;
// 分配主机内存
std
::
cout
<<
"[Device "
<<
deviceId
<<
"] Allocating host memory: "
<<
dataSize
*
sizeof
(
float
)
<<
" bytes"
<<
std
::
endl
;
std
::
vector
<
float
>
hostData
(
dataSize
,
1.23
f
);
std
::
vector
<
float
>
hostCopy
(
dataSize
,
0.0
f
);
// 分配设备内存
void
*
deviceSrc
=
nullptr
,
*
deviceDst
=
nullptr
;
size_t
dataSizeInBytes
=
dataSize
*
sizeof
(
float
);
std
::
cout
<<
"[Device "
<<
deviceId
<<
"] Allocating device memory: "
<<
dataSizeInBytes
<<
" bytes"
<<
std
::
endl
;
if
(
infinirtMalloc
(
&
deviceSrc
,
dataSizeInBytes
)
!=
INFINI_STATUS_SUCCESS
)
{
std
::
cerr
<<
"[Device "
<<
deviceId
<<
"] Failed to allocate device memory for deviceSrc."
<<
std
::
endl
;
return
false
;
}
if
(
infinirtMalloc
(
&
deviceDst
,
dataSizeInBytes
)
!=
INFINI_STATUS_SUCCESS
)
{
std
::
cerr
<<
"[Device "
<<
deviceId
<<
"] Failed to allocate device memory for deviceDst."
<<
std
::
endl
;
infinirtFree
(
deviceSrc
);
return
false
;
}
// 复制数据到设备
std
::
cout
<<
"[Device "
<<
deviceId
<<
"] Copying data from host to device..."
<<
std
::
endl
;
if
(
infinirtMemcpy
(
deviceSrc
,
hostData
.
data
(),
dataSizeInBytes
,
INFINIRT_MEMCPY_H2D
)
!=
INFINI_STATUS_SUCCESS
)
{
std
::
cerr
<<
"[Device "
<<
deviceId
<<
"] Failed to copy data from host to device."
<<
std
::
endl
;
infinirtFree
(
deviceSrc
);
infinirtFree
(
deviceDst
);
return
false
;
}
// 设备内存间复制
std
::
cout
<<
"[Device "
<<
deviceId
<<
"] Copying data between device memory (D2D)..."
<<
std
::
endl
;
if
(
infinirtMemcpy
(
deviceDst
,
deviceSrc
,
dataSizeInBytes
,
INFINIRT_MEMCPY_D2D
)
!=
INFINI_STATUS_SUCCESS
)
{
std
::
cerr
<<
"[Device "
<<
deviceId
<<
"] Failed to copy data from device to device."
<<
std
::
endl
;
infinirtFree
(
deviceSrc
);
infinirtFree
(
deviceDst
);
return
false
;
}
// 设备数据复制回主机
std
::
cout
<<
"[Device "
<<
deviceId
<<
"] Copying data from device back to host..."
<<
std
::
endl
;
if
(
infinirtMemcpy
(
hostCopy
.
data
(),
deviceDst
,
dataSizeInBytes
,
INFINIRT_MEMCPY_D2H
)
!=
INFINI_STATUS_SUCCESS
)
{
std
::
cerr
<<
"[Device "
<<
deviceId
<<
"] Failed to copy data from device to host."
<<
std
::
endl
;
infinirtFree
(
deviceSrc
);
infinirtFree
(
deviceDst
);
return
false
;
}
// 数据验证
std
::
cout
<<
"[Device "
<<
deviceId
<<
"] Validating copied data..."
<<
std
::
endl
;
if
(
std
::
memcmp
(
hostData
.
data
(),
hostCopy
.
data
(),
dataSizeInBytes
)
!=
0
)
{
std
::
cerr
<<
"[Device "
<<
deviceId
<<
"] Data mismatch between hostData and hostCopy."
<<
std
::
endl
;
infinirtFree
(
deviceSrc
);
infinirtFree
(
deviceDst
);
return
false
;
}
std
::
cout
<<
"[Device "
<<
deviceId
<<
"] Data copied correctly!"
<<
std
::
endl
;
// 释放设备内存
std
::
cout
<<
"[Device "
<<
deviceId
<<
"] Freeing device memory..."
<<
std
::
endl
;
infinirtFree
(
deviceSrc
);
infinirtFree
(
deviceDst
);
std
::
cout
<<
"[Device "
<<
deviceId
<<
"] Memory copy test PASSED!"
<<
std
::
endl
;
return
true
;
}
bool
testSetDevice
(
infiniDevice_t
device
,
int
deviceId
)
{
std
::
cout
<<
"Setting device "
<<
device
<<
" with ID: "
<<
deviceId
<<
std
::
endl
;
infiniStatus_t
status
=
infinirtSetDevice
(
device
,
deviceId
);
if
(
status
!=
INFINI_STATUS_SUCCESS
)
{
std
::
cerr
<<
"Failed to set device "
<<
device
<<
" with ID "
<<
deviceId
<<
std
::
endl
;
return
false
;
}
return
true
;
}
src/infinirt-test/test.h
0 → 100644
View file @
0166515c
#ifndef __INFINIRT_TEST_H__
#define __INFINIRT_TEST_H__
#include "../utils.h"
bool
testSetDevice
(
infiniDevice_t
device
,
int
deviceId
);
bool
testMemcpy
(
infiniDevice_t
device
,
int
deviceId
,
size_t
dataSize
);
#endif
src/infinirt/cuda/infinirt_cuda.cu
View file @
0166515c
...
...
@@ -38,7 +38,7 @@ infiniStatus_t streamSynchronize(infinirtStream_t stream) {
}
infiniStatus_t
streamWaitEvent
(
infinirtStream_t
stream
,
infinirtEvent_t
event
)
{
#ifdef ENABLE_ILUVATAR_
CUDA_
API
#ifdef ENABLE_ILUVATAR_API
return
INFINI_STATUS_NOT_IMPLEMENTED
;
#else
CHECK_CUDART
(
cudaStreamWaitEvent
((
cudaStream_t
)
stream
,
(
cudaEvent_t
)
event
));
...
...
src/infinirt/cuda/infinirt_cuda.cuh
View file @
0166515c
...
...
@@ -3,7 +3,7 @@
#include "../infinirt_impl.h"
namespace
infinirt
::
cuda
{
#ifdef
ENABLE_CUDA
_API
#if
def
ined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR
_API
)
INFINIRT_DEVICE_API_IMPL
#else
INFINIRT_DEVICE_API_NOOP
...
...
src/infinirt/infinirt.cc
View file @
0166515c
...
...
@@ -5,7 +5,7 @@
#include "cpu/infinirt_cpu.h"
#include "cuda/infinirt_cuda.cuh"
#include "kunlun/infinirt_kunlun.h"
#include "m
aca
/infinirt_m
aca
.h"
#include "m
etax
/infinirt_m
etax
.h"
#include "musa/infinirt_musa.h"
thread_local
infiniDevice_t
CURRENT_DEVICE_TYPE
=
INFINI_DEVICE_CPU
;
...
...
@@ -23,6 +23,10 @@ __C infiniStatus_t infinirtGetAllDeviceCount(int *count_array) {
return
INFINI_STATUS_NULL_POINTER
;
}
for
(
size_t
i
=
0
;
i
<
INFINI_DEVICE_TYPE_COUNT
;
i
++
)
{
if
(
i
==
INFINI_DEVICE_ILUVATAR
||
i
==
INFINI_DEVICE_KUNLUN
||
i
==
INFINI_DEVICE_SUGON
)
{
count_array
[
i
]
=
0
;
continue
;
}
auto
status
=
infinirtGetDeviceCount
(
static_cast
<
infiniDevice_t
>
(
i
),
&
count_array
[
i
]);
if
(
status
!=
INFINI_STATUS_SUCCESS
)
{
return
status
;
...
...
@@ -62,7 +66,7 @@ __C infiniStatus_t infinirtGetDevice(infiniDevice_t *device_ptr, int *device_id_
_status = infinirt::ascend::API PARAMS; \
break; \
case INFINI_DEVICE_METAX: \
_status = infinirt::m
aca
::API PARAMS;
\
_status = infinirt::m
etax
::API PARAMS; \
break; \
case INFINI_DEVICE_MOORE: \
_status = infinirt::musa::API PARAMS; \
...
...
@@ -70,6 +74,9 @@ __C infiniStatus_t infinirtGetDevice(infiniDevice_t *device_ptr, int *device_id_
case INFINI_DEVICE_KUNLUN: \
_status = infinirt::kunlun::API PARAMS; \
break; \
case INFINI_DEVICE_ILUVATAR: \
_status = infinirt::cuda::API PARAMS; \
break; \
default: \
_status = INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; \
} \
...
...
Prev
1
…
3
4
5
6
7
8
9
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment