Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
infinicore
Commits
20a45b60
Unverified
Commit
20a45b60
authored
Nov 22, 2025
by
gongchensu
Committed by
GitHub
Nov 22, 2025
Browse files
Merge pull request #659 from gongchensu/feature/moore_adapt
Feature/moore adapt
parents
dfd1341e
c87a2ef4
Changes
19
Hide whitespace changes
Inline
Side-by-side
Showing
19 changed files
with
484 additions
and
11 deletions
+484
-11
src/infiniop/ops/add/moore/add_moore.h
src/infiniop/ops/add/moore/add_moore.h
+8
-0
src/infiniop/ops/add/moore/add_moore.mu
src/infiniop/ops/add/moore/add_moore.mu
+66
-0
src/infiniop/ops/add/moore/add_moore_kernel.h
src/infiniop/ops/add/moore/add_moore_kernel.h
+38
-0
src/infiniop/ops/add/operator.cc
src/infiniop/ops/add/operator.cc
+15
-0
src/infiniop/ops/mul/moore/mul_moore.h
src/infiniop/ops/mul/moore/mul_moore.h
+8
-0
src/infiniop/ops/mul/moore/mul_moore.mu
src/infiniop/ops/mul/moore/mul_moore.mu
+64
-0
src/infiniop/ops/mul/moore/mul_moore_kernel.h
src/infiniop/ops/mul/moore/mul_moore_kernel.h
+37
-0
src/infiniop/ops/mul/operator.cc
src/infiniop/ops/mul/operator.cc
+15
-0
src/infiniop/ops/silu/moore/silu_moore.h
src/infiniop/ops/silu/moore/silu_moore.h
+8
-0
src/infiniop/ops/silu/moore/silu_moore.mu
src/infiniop/ops/silu/moore/silu_moore.mu
+62
-0
src/infiniop/ops/silu/moore/silu_moore_kernel.h
src/infiniop/ops/silu/moore/silu_moore_kernel.h
+39
-0
src/infiniop/ops/silu/operator.cc
src/infiniop/ops/silu/operator.cc
+15
-0
test/infinicore/ops/adaptive_max_pool2d.py
test/infinicore/ops/adaptive_max_pool2d.py
+19
-1
test/infinicore/ops/isin.py
test/infinicore/ops/isin.py
+6
-2
test/infinicore/ops/mse_loss.py
test/infinicore/ops/mse_loss.py
+8
-3
test/infinicore/ops/pad.py
test/infinicore/ops/pad.py
+7
-3
test/infinicore/ops/sort.py
test/infinicore/ops/sort.py
+33
-1
test/infinicore/ops/std.py
test/infinicore/ops/std.py
+33
-1
test/infiniop/libinfiniop/utils.py
test/infiniop/libinfiniop/utils.py
+3
-0
No files found.
src/infiniop/ops/add/moore/add_moore.h
0 → 100644
View file @
20a45b60
#ifndef __ADD_MOORE_API_H__
#define __ADD_MOORE_API_H__
#include "../../../elementwise/moore/elementwise_moore_api.h"
ELEMENTWISE_DESCRIPTOR
(
add
,
moore
)
#endif // __ADD_MOORE_API_H__
src/infiniop/ops/add/moore/add_moore.mu
0 → 100644
View file @
20a45b60
#include "add_moore.h"
#include "../../../elementwise/moore/elementwise_moore.h"
#include "add_moore_kernel.h"
namespace op::add::moore {
Descriptor::~Descriptor() = default;
infiniStatus_t Descriptor::create(
infiniopHandle_t handle_,
Descriptor **desc_ptr,
infiniopTensorDescriptor_t out_desc,
std::vector<infiniopTensorDescriptor_t> input_desc_vec) {
auto handle = reinterpret_cast<device::moore::Handle *>(handle_);
auto dtype = out_desc->dtype();
const auto &a_desc = input_desc_vec.at(0);
const auto &b_desc = input_desc_vec.at(1);
const auto &c_shape = out_desc->shape();
const auto &a_shape = a_desc->shape();
const auto &b_shape = b_desc->shape();
CHECK_DTYPE(dtype, INFINI_DTYPE_F16, INFINI_DTYPE_F32, INFINI_DTYPE_F64, INFINI_DTYPE_BF16, INFINI_DTYPE_I32, INFINI_DTYPE_I64);
CHECK_SAME_SHAPE(c_shape, a_shape, b_shape);
// create MOORE elementwise descriptor
CREATE_ELEMENTWISE_MOORE_DESCRIPTOR(handle, dtype, out_desc, input_desc_vec)
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t Descriptor::calculate(
void *workspace,
size_t workspace_size,
void *output,
std::vector<const void *> inputs,
void *stream) const {
if (workspace_size < _workspace_size) {
return INFINI_STATUS_INSUFFICIENT_WORKSPACE;
}
switch (_dtype) {
case INFINI_DTYPE_F16:
return _device_info->calculate<256, moore::AddOp, half>(_info, workspace, output, inputs, stream);
case INFINI_DTYPE_BF16:
return _device_info->calculate<256, moore::AddOp, cuda_bfloat16>(_info, workspace, output, inputs, stream);
case INFINI_DTYPE_F32:
return _device_info->calculate<256, moore::AddOp, float>(_info, workspace, output, inputs, stream);
case INFINI_DTYPE_F64:
return _device_info->calculate<256, moore::AddOp, double>(_info, workspace, output, inputs, stream);
case INFINI_DTYPE_I32:
return _device_info->calculate<256, moore::AddOp, int32_t>(_info, workspace, output, inputs, stream);
case INFINI_DTYPE_I64:
return _device_info->calculate<256, moore::AddOp, int64_t>(_info, workspace, output, inputs, stream);
default:
return INFINI_STATUS_BAD_TENSOR_DTYPE;
}
return INFINI_STATUS_SUCCESS;
}
} // namespace op::add::moore
src/infiniop/ops/add/moore/add_moore_kernel.h
0 → 100644
View file @
20a45b60
#ifndef __ADD_MOORE_KERNEL_H__
#define __ADD_MOORE_KERNEL_H__
/*
* This file contains the Add operation implementation for the MUSA backend.
*
* It uses the 'op::add::cuda' namespace to maintain a consistent code structure
* and interface with the CUDA implementation, ensuring code alignment across different
* hardware platforms.
*/
namespace
op
::
add
::
moore
{
typedef
struct
AddOp
{
public:
static
constexpr
size_t
num_inputs
=
2
;
template
<
typename
T
>
__device__
__forceinline__
T
operator
()(
const
T
&
a
,
const
T
&
b
)
const
{
if
constexpr
(
std
::
is_same_v
<
T
,
half2
>
)
{
return
__hadd2
(
a
,
b
);
}
else
if
constexpr
(
std
::
is_same_v
<
T
,
half
>
)
{
return
__hadd
(
a
,
b
);
}
else
if
constexpr
(
std
::
is_same_v
<
T
,
cuda_bfloat16
>
)
{
// On MUSA platform, convert to float, add, then convert back to avoid ambiguous conversion
// from int (returned by __hadd) to __mt_bfloat16
float
a_f
=
__bfloat162float
(
a
);
float
b_f
=
__bfloat162float
(
b
);
return
__float2bfloat16_rn
(
a_f
+
b_f
);
}
else
if
constexpr
(
std
::
is_same_v
<
T
,
float
>
)
{
// Use __fadd_rn instead of __fadd_rd for moore platform compatibility
return
__fadd_rn
(
a
,
b
);
}
else
{
return
a
+
b
;
}
}
}
AddOp
;
}
// namespace op::add::moore
#endif // __ADD_MOORE_KERNEL_H__
src/infiniop/ops/add/operator.cc
View file @
20a45b60
...
@@ -17,6 +17,9 @@
...
@@ -17,6 +17,9 @@
#ifdef ENABLE_CAMBRICON_API
#ifdef ENABLE_CAMBRICON_API
#include "bang/add_bang.h"
#include "bang/add_bang.h"
#endif
#endif
#ifdef ENABLE_MOORE_API
#include "moore/add_moore.h"
#endif
__C
infiniStatus_t
infiniopCreateAddDescriptor
(
__C
infiniStatus_t
infiniopCreateAddDescriptor
(
infiniopHandle_t
handle
,
infiniopHandle_t
handle
,
...
@@ -57,6 +60,9 @@ __C infiniStatus_t infiniopCreateAddDescriptor(
...
@@ -57,6 +60,9 @@ __C infiniStatus_t infiniopCreateAddDescriptor(
#ifdef ENABLE_CAMBRICON_API
#ifdef ENABLE_CAMBRICON_API
CREATE
(
INFINI_DEVICE_CAMBRICON
,
bang
);
CREATE
(
INFINI_DEVICE_CAMBRICON
,
bang
);
#endif
#endif
#ifdef ENABLE_MOORE_API
CREATE
(
INFINI_DEVICE_MOORE
,
moore
);
#endif
default:
default:
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
...
@@ -93,6 +99,9 @@ __C infiniStatus_t infiniopGetAddWorkspaceSize(infiniopAddDescriptor_t desc, siz
...
@@ -93,6 +99,9 @@ __C infiniStatus_t infiniopGetAddWorkspaceSize(infiniopAddDescriptor_t desc, siz
#endif
#endif
#ifdef ENABLE_CAMBRICON_API
#ifdef ENABLE_CAMBRICON_API
GET
(
INFINI_DEVICE_CAMBRICON
,
bang
);
GET
(
INFINI_DEVICE_CAMBRICON
,
bang
);
#endif
#ifdef ENABLE_MOORE_API
GET
(
INFINI_DEVICE_MOORE
,
moore
);
#endif
#endif
default:
default:
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
...
@@ -139,6 +148,9 @@ __C infiniStatus_t infiniopAdd(
...
@@ -139,6 +148,9 @@ __C infiniStatus_t infiniopAdd(
#ifdef ENABLE_CAMBRICON_API
#ifdef ENABLE_CAMBRICON_API
CALCULATE
(
INFINI_DEVICE_CAMBRICON
,
bang
);
CALCULATE
(
INFINI_DEVICE_CAMBRICON
,
bang
);
#endif
#endif
#ifdef ENABLE_MOORE_API
CALCULATE
(
INFINI_DEVICE_MOORE
,
moore
);
#endif
default:
default:
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
...
@@ -178,6 +190,9 @@ infiniopDestroyAddDescriptor(infiniopAddDescriptor_t desc) {
...
@@ -178,6 +190,9 @@ infiniopDestroyAddDescriptor(infiniopAddDescriptor_t desc) {
#ifdef ENABLE_CAMBRICON_API
#ifdef ENABLE_CAMBRICON_API
DELETE
(
INFINI_DEVICE_CAMBRICON
,
bang
);
DELETE
(
INFINI_DEVICE_CAMBRICON
,
bang
);
#endif
#endif
#ifdef ENABLE_MOORE_API
DELETE
(
INFINI_DEVICE_MOORE
,
moore
);
#endif
default:
default:
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
...
...
src/infiniop/ops/mul/moore/mul_moore.h
0 → 100644
View file @
20a45b60
#ifndef __MUL_MOORE_API_H__
#define __MUL_MOORE_API_H__
#include "../../../elementwise/moore/elementwise_moore_api.h"
ELEMENTWISE_DESCRIPTOR
(
mul
,
moore
)
#endif // __MUL_MOORE_API_H__
src/infiniop/ops/mul/moore/mul_moore.mu
0 → 100644
View file @
20a45b60
#include "mul_moore.h"
#include "../../../elementwise/moore/elementwise_moore.h"
#include "mul_moore_kernel.h"
namespace op::mul::moore {
Descriptor::~Descriptor() = default;
infiniStatus_t Descriptor::create(
infiniopHandle_t handle_,
Descriptor **desc_ptr,
infiniopTensorDescriptor_t out_desc,
std::vector<infiniopTensorDescriptor_t> input_desc_vec) {
auto handle = reinterpret_cast<device::moore::Handle *>(handle_);
auto dtype = out_desc->dtype();
const auto &a_desc = input_desc_vec.at(0);
const auto &b_desc = input_desc_vec.at(1);
const auto &c_shape = out_desc->shape();
const auto &a_shape = a_desc->shape();
const auto &b_shape = b_desc->shape();
CHECK_DTYPE(dtype, INFINI_DTYPE_F16, INFINI_DTYPE_F32, INFINI_DTYPE_F64, INFINI_DTYPE_BF16);
CHECK_SAME_SHAPE(c_shape, a_shape, b_shape);
// create MOORE elementwise descriptor
CREATE_ELEMENTWISE_MOORE_DESCRIPTOR(handle, dtype, out_desc, input_desc_vec)
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t Descriptor::calculate(
void *workspace,
size_t workspace_size,
void *output,
std::vector<const void *> inputs,
void *stream) const {
if (workspace_size < _workspace_size) {
return INFINI_STATUS_INSUFFICIENT_WORKSPACE;
}
switch (_dtype) {
case INFINI_DTYPE_F16:
return _device_info->calculate<256, moore::MulOp, half>(_info, workspace, output, inputs, stream);
case INFINI_DTYPE_BF16:
return _device_info->calculate<256, moore::MulOp, cuda_bfloat16>(_info, workspace, output, inputs, stream);
case INFINI_DTYPE_F32:
return _device_info->calculate<256, moore::MulOp, float>(_info, workspace, output, inputs, stream);
case INFINI_DTYPE_F64:
return _device_info->calculate<256, moore::MulOp, double>(_info, workspace, output, inputs, stream);
default:
return INFINI_STATUS_BAD_TENSOR_DTYPE;
}
return INFINI_STATUS_SUCCESS;
}
} // namespace op::mul::moore
src/infiniop/ops/mul/moore/mul_moore_kernel.h
0 → 100644
View file @
20a45b60
#ifndef __MUL_MOORE_KERNEL_H__
#define __MUL_MOORE_KERNEL_H__
/*
* This file contains the Mul operation implementation for the MUSA backend.
*
* It uses the 'op::mul::cuda' namespace to maintain a consistent code structure
* and interface with the CUDA implementation, ensuring code alignment across different
* hardware platforms.
*/
namespace
op
::
mul
::
moore
{
typedef
struct
MulOp
{
public:
static
constexpr
size_t
num_inputs
=
2
;
template
<
typename
T
>
__device__
__forceinline__
T
operator
()(
const
T
&
a
,
const
T
&
b
)
const
{
if
constexpr
(
std
::
is_same_v
<
T
,
half2
>
)
{
return
__hmul2
(
a
,
b
);
}
else
if
constexpr
(
std
::
is_same_v
<
T
,
half
>
)
{
return
__hmul
(
a
,
b
);
}
else
if
constexpr
(
std
::
is_same_v
<
T
,
cuda_bfloat16
>
)
{
// On MUSA platform, convert to float, multiply, then convert back
float
a_f
=
__bfloat162float
(
a
);
float
b_f
=
__bfloat162float
(
b
);
return
__float2bfloat16_rn
(
a_f
*
b_f
);
}
else
if
constexpr
(
std
::
is_same_v
<
T
,
float
>
)
{
// Use __fmul_rn for moore platform compatibility
return
__fmul_rn
(
a
,
b
);
}
else
{
return
a
*
b
;
}
}
}
MulOp
;
}
// namespace op::mul::moore
#endif // __MUL_MOORE_KERNEL_H__
src/infiniop/ops/mul/operator.cc
View file @
20a45b60
...
@@ -14,6 +14,9 @@
...
@@ -14,6 +14,9 @@
#ifdef ENABLE_KUNLUN_API
#ifdef ENABLE_KUNLUN_API
#include "kunlun/mul_kunlun.h"
#include "kunlun/mul_kunlun.h"
#endif
#endif
#ifdef ENABLE_MOORE_API
#include "moore/mul_moore.h"
#endif
__C
infiniStatus_t
infiniopCreateMulDescriptor
(
__C
infiniStatus_t
infiniopCreateMulDescriptor
(
infiniopHandle_t
handle
,
infiniopHandle_t
handle
,
...
@@ -51,6 +54,9 @@ __C infiniStatus_t infiniopCreateMulDescriptor(
...
@@ -51,6 +54,9 @@ __C infiniStatus_t infiniopCreateMulDescriptor(
#ifdef ENABLE_KUNLUN_API
#ifdef ENABLE_KUNLUN_API
CREATE
(
INFINI_DEVICE_KUNLUN
,
kunlun
);
CREATE
(
INFINI_DEVICE_KUNLUN
,
kunlun
);
#endif
#endif
#ifdef ENABLE_MOORE_API
CREATE
(
INFINI_DEVICE_MOORE
,
moore
);
#endif
default:
default:
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
...
@@ -85,6 +91,9 @@ __C infiniStatus_t infiniopGetMulWorkspaceSize(infiniopMulDescriptor_t desc, siz
...
@@ -85,6 +91,9 @@ __C infiniStatus_t infiniopGetMulWorkspaceSize(infiniopMulDescriptor_t desc, siz
#ifdef ENABLE_KUNLUN_API
#ifdef ENABLE_KUNLUN_API
GET
(
INFINI_DEVICE_KUNLUN
,
kunlun
);
GET
(
INFINI_DEVICE_KUNLUN
,
kunlun
);
#endif
#endif
#ifdef ENABLE_MOORE_API
GET
(
INFINI_DEVICE_MOORE
,
moore
);
#endif
default:
default:
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
...
@@ -128,6 +137,9 @@ __C infiniStatus_t infiniopMul(
...
@@ -128,6 +137,9 @@ __C infiniStatus_t infiniopMul(
#ifdef ENABLE_KUNLUN_API
#ifdef ENABLE_KUNLUN_API
CALCULATE
(
INFINI_DEVICE_KUNLUN
,
kunlun
);
CALCULATE
(
INFINI_DEVICE_KUNLUN
,
kunlun
);
#endif
#endif
#ifdef ENABLE_MOORE_API
CALCULATE
(
INFINI_DEVICE_MOORE
,
moore
);
#endif
default:
default:
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
...
@@ -164,6 +176,9 @@ infiniopDestroyMulDescriptor(infiniopMulDescriptor_t desc) {
...
@@ -164,6 +176,9 @@ infiniopDestroyMulDescriptor(infiniopMulDescriptor_t desc) {
#ifdef ENABLE_KUNLUN_API
#ifdef ENABLE_KUNLUN_API
DELETE
(
INFINI_DEVICE_KUNLUN
,
kunlun
);
DELETE
(
INFINI_DEVICE_KUNLUN
,
kunlun
);
#endif
#endif
#ifdef ENABLE_MOORE_API
DELETE
(
INFINI_DEVICE_MOORE
,
moore
);
#endif
default:
default:
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
...
...
src/infiniop/ops/silu/moore/silu_moore.h
0 → 100644
View file @
20a45b60
#ifndef __SILU_MOORE_API_H__
#define __SILU_MOORE_API_H__
#include "../../../elementwise/moore/elementwise_moore_api.h"
ELEMENTWISE_DESCRIPTOR
(
silu
,
moore
)
#endif // __SILU_MOORE_API_H__
src/infiniop/ops/silu/moore/silu_moore.mu
0 → 100644
View file @
20a45b60
#include "silu_moore.h"
#include "../../../elementwise/moore/elementwise_moore.h"
#include "silu_moore_kernel.h"
namespace op::silu::moore {
Descriptor::~Descriptor() = default;
infiniStatus_t Descriptor::create(
infiniopHandle_t handle_,
Descriptor **desc_ptr,
infiniopTensorDescriptor_t out_desc,
std::vector<infiniopTensorDescriptor_t> input_desc_vec) {
auto handle = reinterpret_cast<device::moore::Handle *>(handle_);
auto dtype = out_desc->dtype();
const auto &input_desc = input_desc_vec.at(0);
const auto &output_shape = out_desc->shape();
const auto &input_shape = input_desc->shape();
CHECK_DTYPE(dtype, INFINI_DTYPE_BF16, INFINI_DTYPE_F16, INFINI_DTYPE_F32, INFINI_DTYPE_F64);
CHECK_SAME_SHAPE(output_shape, input_shape);
// create MOORE elementwise descriptor
CREATE_ELEMENTWISE_MOORE_DESCRIPTOR(handle, dtype, out_desc, input_desc_vec)
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t Descriptor::calculate(
void *workspace,
size_t workspace_size,
void *output,
std::vector<const void *> inputs,
void *stream) const {
if (workspace_size < _workspace_size) {
return INFINI_STATUS_INSUFFICIENT_WORKSPACE;
}
switch (_dtype) {
case INFINI_DTYPE_BF16:
return _device_info->calculate<256, moore::SiluOp, cuda_bfloat16>(_info, workspace, output, inputs, stream);
case INFINI_DTYPE_F16:
return _device_info->calculate<256, moore::SiluOp, half>(_info, workspace, output, inputs, stream);
case INFINI_DTYPE_F32:
return _device_info->calculate<256, moore::SiluOp, float>(_info, workspace, output, inputs, stream);
case INFINI_DTYPE_F64:
return _device_info->calculate<256, moore::SiluOp, double>(_info, workspace, output, inputs, stream);
default:
return INFINI_STATUS_BAD_TENSOR_DTYPE;
}
return INFINI_STATUS_SUCCESS;
}
} // namespace op::silu::moore
src/infiniop/ops/silu/moore/silu_moore_kernel.h
0 → 100644
View file @
20a45b60
#ifndef __SILU_MOORE_KERNEL_H__
#define __SILU_MOORE_KERNEL_H__
#include <cmath>
namespace
op
::
silu
::
moore
{
typedef
struct
SiluOp
{
public:
static
constexpr
size_t
num_inputs
=
1
;
template
<
typename
T
>
__device__
__forceinline__
T
operator
()(
const
T
&
x
)
const
{
if
constexpr
(
std
::
is_same_v
<
T
,
half2
>
)
{
// half2 vectorized optimization
return
__hmul2
(
x
,
__h2div
(
__float2half2_rn
(
1.0
f
),
__hadd2
(
__float2half2_rn
(
1.0
f
),
h2exp
(
__hneg2
(
x
)))));
}
else
if
constexpr
(
std
::
is_same_v
<
T
,
half
>
)
{
// FP16: convert to float, calculate, then convert back for MUSA platform compatibility
float
x_f
=
__half2float
(
x
);
float
sigmoid_f
=
1.0
f
/
(
1.0
f
+
__expf
(
-
x_f
));
return
__float2half
(
x_f
*
sigmoid_f
);
}
else
if
constexpr
(
std
::
is_same_v
<
T
,
cuda_bfloat16
>
)
{
// BF16: convert to float, calculate, then convert back
float
x_f
=
__bfloat162float
(
x
);
float
sigmoid_f
=
1.0
f
/
(
1.0
f
+
__expf
(
-
x_f
));
return
__float2bfloat16_rn
(
x_f
*
sigmoid_f
);
}
else
if
constexpr
(
std
::
is_same_v
<
T
,
float
>
)
{
// FP32: use __frcp_rn and __expf for moore platform compatibility
return
__fmul_rn
(
x
,
__frcp_rn
(
__fadd_rn
(
1.0
f
,
__expf
(
-
x
))));
}
else
if
constexpr
(
std
::
is_same_v
<
T
,
double
>
)
{
// FP64
return
x
/
(
1.0
+
exp
(
-
x
));
}
}
}
SiluOp
;
}
// namespace op::silu::moore
#endif // __SILU_MOORE_KERNEL_H__
src/infiniop/ops/silu/operator.cc
View file @
20a45b60
...
@@ -11,6 +11,9 @@
...
@@ -11,6 +11,9 @@
#ifdef ENABLE_METAX_API
#ifdef ENABLE_METAX_API
#include "metax/silu_metax.h"
#include "metax/silu_metax.h"
#endif
#endif
#ifdef ENABLE_MOORE_API
#include "moore/silu_moore.h"
#endif
__C
infiniStatus_t
infiniopCreateSiluDescriptor
(
__C
infiniStatus_t
infiniopCreateSiluDescriptor
(
infiniopHandle_t
handle
,
infiniopHandle_t
handle
,
...
@@ -40,6 +43,9 @@ __C infiniStatus_t infiniopCreateSiluDescriptor(
...
@@ -40,6 +43,9 @@ __C infiniStatus_t infiniopCreateSiluDescriptor(
#ifdef ENABLE_METAX_API
#ifdef ENABLE_METAX_API
CREATE
(
INFINI_DEVICE_METAX
,
metax
);
CREATE
(
INFINI_DEVICE_METAX
,
metax
);
#endif
#endif
#ifdef ENABLE_MOORE_API
CREATE
(
INFINI_DEVICE_MOORE
,
moore
);
#endif
default:
default:
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
...
@@ -67,6 +73,9 @@ __C infiniStatus_t infiniopGetSiluWorkspaceSize(infiniopSiluDescriptor_t desc, s
...
@@ -67,6 +73,9 @@ __C infiniStatus_t infiniopGetSiluWorkspaceSize(infiniopSiluDescriptor_t desc, s
#endif
#endif
#ifdef ENABLE_METAX_API
#ifdef ENABLE_METAX_API
GET
(
INFINI_DEVICE_METAX
,
metax
);
GET
(
INFINI_DEVICE_METAX
,
metax
);
#endif
#ifdef ENABLE_MOORE_API
GET
(
INFINI_DEVICE_MOORE
,
moore
);
#endif
#endif
default:
default:
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
...
@@ -103,6 +112,9 @@ __C infiniStatus_t infiniopSilu(
...
@@ -103,6 +112,9 @@ __C infiniStatus_t infiniopSilu(
#ifdef ENABLE_METAX_API
#ifdef ENABLE_METAX_API
CALCULATE
(
INFINI_DEVICE_METAX
,
metax
);
CALCULATE
(
INFINI_DEVICE_METAX
,
metax
);
#endif
#endif
#ifdef ENABLE_MOORE_API
CALCULATE
(
INFINI_DEVICE_MOORE
,
moore
);
#endif
default:
default:
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
...
@@ -133,6 +145,9 @@ infiniopDestroySiluDescriptor(infiniopSiluDescriptor_t desc) {
...
@@ -133,6 +145,9 @@ infiniopDestroySiluDescriptor(infiniopSiluDescriptor_t desc) {
#ifdef ENABLE_METAX_API
#ifdef ENABLE_METAX_API
DELETE
(
INFINI_DEVICE_METAX
,
metax
);
DELETE
(
INFINI_DEVICE_METAX
,
metax
);
#endif
#endif
#ifdef ENABLE_MOORE_API
DELETE
(
INFINI_DEVICE_MOORE
,
moore
);
#endif
default:
default:
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
...
...
test/infinicore/ops/adaptive_max_pool2d.py
View file @
20a45b60
...
@@ -5,9 +5,10 @@ sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
...
@@ -5,9 +5,10 @@ sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
import
torch
import
torch
import
infinicore
import
infinicore
from
framework.base
import
BaseOperatorTest
,
TensorSpec
,
TestCase
from
framework.base
import
BaseOperatorTest
,
TensorSpec
,
TestCase
,
TestResult
from
framework.runner
import
GenericTestRunner
from
framework.runner
import
GenericTestRunner
from
framework.utils
import
is_broadcast
from
framework.utils
import
is_broadcast
from
framework.devices
import
InfiniDeviceEnum
# Test cases format: (in_shape, in_strides_or_None, output_size_or_None)
# Test cases format: (in_shape, in_strides_or_None, output_size_or_None)
...
@@ -63,6 +64,23 @@ class OpTest(BaseOperatorTest):
...
@@ -63,6 +64,23 @@ class OpTest(BaseOperatorTest):
def
torch_operator
(
self
,
*
args
,
**
kwargs
):
def
torch_operator
(
self
,
*
args
,
**
kwargs
):
return
torch
.
nn
.
functional
.
adaptive_max_pool2d
(
*
args
,
**
kwargs
)
return
torch
.
nn
.
functional
.
adaptive_max_pool2d
(
*
args
,
**
kwargs
)
def
run_test
(
self
,
device
,
test_case
,
config
):
"""Skip non-contiguous tensor tests on Moore platform (muDNN only supports contiguous tensors for pooling operations)."""
if
device
==
InfiniDeviceEnum
.
MOORE
:
if
(
test_case
.
inputs
and
isinstance
(
test_case
.
inputs
[
0
],
TensorSpec
)
and
test_case
.
inputs
[
0
].
strides
is
not
None
):
return
TestResult
(
success
=
False
,
return_code
=-
2
,
test_case
=
test_case
,
device
=
device
,
error_message
=
"muDNN only supports contiguous tensors for pooling operations"
,
)
return
super
().
run_test
(
device
,
test_case
,
config
)
# def infinicore_operator(self, *args, **kwargs):
# def infinicore_operator(self, *args, **kwargs):
# """InfiniCore implementation (operator not yet available)."""
# """InfiniCore implementation (operator not yet available)."""
# return infinicore.nn.functional.adaptive_max_pool2d(*args, **kwargs)
# return infinicore.nn.functional.adaptive_max_pool2d(*args, **kwargs)
...
...
test/infinicore/ops/isin.py
View file @
20a45b60
...
@@ -22,12 +22,16 @@ _TEST_CASES_DATA = [
...
@@ -22,12 +22,16 @@ _TEST_CASES_DATA = [
]
]
_TOLERANCE_MAP
=
{
_TOLERANCE_MAP
=
{
infinicore
.
float16
:
{
"atol"
:
0
,
"rtol"
:
1e-2
},
#
infinicore.float16: {"atol": 0, "rtol": 1e-2},
infinicore
.
float32
:
{
"atol"
:
0
,
"rtol"
:
1e-3
},
infinicore
.
float32
:
{
"atol"
:
0
,
"rtol"
:
1e-3
},
infinicore
.
int32
:
{
"atol"
:
0
,
"rtol"
:
0
},
infinicore
.
int32
:
{
"atol"
:
0
,
"rtol"
:
0
},
}
}
_TENSOR_DTYPES
=
[
infinicore
.
int32
,
infinicore
.
float32
,
infinicore
.
float16
]
_TENSOR_DTYPES
=
[
infinicore
.
int32
,
infinicore
.
float32
,
# infinicore.float16,
]
def
parse_test_cases
():
def
parse_test_cases
():
...
...
test/infinicore/ops/mse_loss.py
View file @
20a45b60
...
@@ -21,12 +21,17 @@ _TEST_CASES_DATA = [
...
@@ -21,12 +21,17 @@ _TEST_CASES_DATA = [
]
]
_TOLERANCE_MAP
=
{
_TOLERANCE_MAP
=
{
infinicore
.
float16
:
{
"atol"
:
1e-3
,
"rtol"
:
1e-2
},
#
infinicore.float16: {"atol": 1e-3, "rtol": 1e-2},
infinicore
.
float32
:
{
"atol"
:
1e-5
,
"rtol"
:
1e-4
},
infinicore
.
float32
:
{
"atol"
:
1e-5
,
"rtol"
:
1e-4
},
infinicore
.
bfloat16
:
{
"atol"
:
1e-2
,
"rtol"
:
5e-2
},
#
infinicore.bfloat16: {"atol": 1e-2, "rtol": 5e-2},
}
}
_TENSOR_DTYPES
=
[
infinicore
.
float16
,
infinicore
.
bfloat16
,
infinicore
.
float32
]
_TENSOR_DTYPES
=
[
# infinicore.float16,
# some pytorch version doesn't support bfloat16
# infinicore.bfloat16,
infinicore
.
float32
,
]
def
parse_test_cases
():
def
parse_test_cases
():
...
...
test/infinicore/ops/pad.py
View file @
20a45b60
...
@@ -22,12 +22,16 @@ _TEST_CASES_DATA = [
...
@@ -22,12 +22,16 @@ _TEST_CASES_DATA = [
]
]
_TOLERANCE_MAP
=
{
_TOLERANCE_MAP
=
{
infinicore
.
float16
:
{
"atol"
:
1e-2
,
"rtol"
:
1e-2
},
#
infinicore.float16: {"atol": 1e-2, "rtol": 1e-2},
infinicore
.
float32
:
{
"atol"
:
1e-5
,
"rtol"
:
1e-4
},
infinicore
.
float32
:
{
"atol"
:
1e-5
,
"rtol"
:
1e-4
},
infinicore
.
bfloat16
:
{
"atol"
:
1e-2
,
"rtol"
:
5e-2
},
#
infinicore.bfloat16: {"atol": 1e-2, "rtol": 5e-2},
}
}
_TENSOR_DTYPES
=
[
infinicore
.
float16
,
infinicore
.
bfloat16
,
infinicore
.
float32
]
_TENSOR_DTYPES
=
[
# infinicore.float16,
# infinicore.bfloat16,
infinicore
.
float32
,
]
def
parse_test_cases
():
def
parse_test_cases
():
...
...
test/infinicore/ops/sort.py
View file @
20a45b60
...
@@ -5,9 +5,10 @@ sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
...
@@ -5,9 +5,10 @@ sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
import
torch
import
torch
import
infinicore
import
infinicore
from
framework.base
import
BaseOperatorTest
,
TensorSpec
,
TestCase
from
framework.base
import
BaseOperatorTest
,
TensorSpec
,
TestCase
,
TestResult
from
framework.runner
import
GenericTestRunner
from
framework.runner
import
GenericTestRunner
from
framework.utils
import
is_broadcast
from
framework.utils
import
is_broadcast
from
framework.devices
import
InfiniDeviceEnum
# ==============================================================================
# ==============================================================================
# Operator-specific configuration for sort
# Operator-specific configuration for sort
...
@@ -166,6 +167,37 @@ class OpTest(BaseOperatorTest):
...
@@ -166,6 +167,37 @@ class OpTest(BaseOperatorTest):
# forward to torch.sort; stable kwarg included for compatibility
# forward to torch.sort; stable kwarg included for compatibility
return
torch
.
sort
(
x
,
dim
=
dim
,
descending
=
descending
,
stable
=
stable
,
out
=
out
)
return
torch
.
sort
(
x
,
dim
=
dim
,
descending
=
descending
,
stable
=
stable
,
out
=
out
)
def
run_test
(
self
,
device
,
test_case
,
config
):
"""Skip non-contiguous tensor tests on Moore platform (muDNN Sort only supports contiguous tensors)."""
if
device
==
InfiniDeviceEnum
.
MOORE
:
# Check input tensor
if
(
test_case
.
inputs
and
isinstance
(
test_case
.
inputs
[
0
],
TensorSpec
)
and
test_case
.
inputs
[
0
].
strides
is
not
None
):
return
TestResult
(
success
=
False
,
return_code
=-
2
,
test_case
=
test_case
,
device
=
device
,
error_message
=
"muDNN Sort only supports contiguous tensors"
,
)
# Check output tensors (values and indices)
output_specs
=
getattr
(
test_case
,
"output_specs"
,
None
)
or
(
[
test_case
.
output_spec
]
if
test_case
.
output_spec
else
[]
)
for
spec
in
output_specs
:
if
isinstance
(
spec
,
TensorSpec
)
and
spec
.
strides
is
not
None
:
return
TestResult
(
success
=
False
,
return_code
=-
2
,
test_case
=
test_case
,
device
=
device
,
error_message
=
"muDNN Sort only supports contiguous tensors"
,
)
return
super
().
run_test
(
device
,
test_case
,
config
)
# def infinicore_operator(self, x, dim=-1, descending=False, stable=False, out=None, **kwargs):
# def infinicore_operator(self, x, dim=-1, descending=False, stable=False, out=None, **kwargs):
# # assume infinicore provides a similar API
# # assume infinicore provides a similar API
# return infinicore.sort(x, dim=dim, descending=descending, stable=stable, out=out)
# return infinicore.sort(x, dim=dim, descending=descending, stable=stable, out=out)
...
...
test/infinicore/ops/std.py
View file @
20a45b60
...
@@ -5,9 +5,10 @@ sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
...
@@ -5,9 +5,10 @@ sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
import
torch
import
torch
import
infinicore
import
infinicore
from
framework.base
import
BaseOperatorTest
,
TensorSpec
,
TestCase
from
framework.base
import
BaseOperatorTest
,
TensorSpec
,
TestCase
,
TestResult
from
framework.runner
import
GenericTestRunner
from
framework.runner
import
GenericTestRunner
from
framework.utils
import
is_broadcast
from
framework.utils
import
is_broadcast
from
framework.devices
import
InfiniDeviceEnum
# Test cases format:
# Test cases format:
# (in_shape, in_strides_or_None, dim_or_None, correction_or_None, keepdim_or_None, out_strides_or_None)
# (in_shape, in_strides_or_None, dim_or_None, correction_or_None, keepdim_or_None, out_strides_or_None)
...
@@ -108,6 +109,37 @@ class OpTest(BaseOperatorTest):
...
@@ -108,6 +109,37 @@ class OpTest(BaseOperatorTest):
def
torch_operator
(
self
,
*
args
,
**
kwargs
):
def
torch_operator
(
self
,
*
args
,
**
kwargs
):
return
torch
.
std
(
*
args
,
**
kwargs
)
return
torch
.
std
(
*
args
,
**
kwargs
)
def
run_test
(
self
,
device
,
test_case
,
config
):
"""Skip non-contiguous tensor tests on Moore platform (muDNN VARIANCE & STD only support contiguous tensors)."""
if
device
==
InfiniDeviceEnum
.
MOORE
:
# Check input tensor
if
(
test_case
.
inputs
and
isinstance
(
test_case
.
inputs
[
0
],
TensorSpec
)
and
test_case
.
inputs
[
0
].
strides
is
not
None
):
return
TestResult
(
success
=
False
,
return_code
=-
2
,
test_case
=
test_case
,
device
=
device
,
error_message
=
"muDNN VARIANCE & STD only support contiguous tensors"
,
)
# Check output tensor
if
(
test_case
.
output_spec
and
isinstance
(
test_case
.
output_spec
,
TensorSpec
)
and
test_case
.
output_spec
.
strides
is
not
None
):
return
TestResult
(
success
=
False
,
return_code
=-
2
,
test_case
=
test_case
,
device
=
device
,
error_message
=
"muDNN VARIANCE & STD only support contiguous tensors"
,
)
return
super
().
run_test
(
device
,
test_case
,
config
)
# def infinicore_operator(self, *args, **kwargs):
# def infinicore_operator(self, *args, **kwargs):
# """InfiniCore implementation (operator not yet available)."""
# """InfiniCore implementation (operator not yet available)."""
# return infinicore.std(*args, **kwargs)
# return infinicore.std(*args, **kwargs)
...
...
test/infiniop/libinfiniop/utils.py
View file @
20a45b60
...
@@ -465,6 +465,9 @@ def debug(actual, desired, atol=0, rtol=1e-2, equal_nan=False, verbose=True):
...
@@ -465,6 +465,9 @@ def debug(actual, desired, atol=0, rtol=1e-2, equal_nan=False, verbose=True):
def
filter_tensor_dtypes_by_device
(
device
,
tensor_dtypes
):
def
filter_tensor_dtypes_by_device
(
device
,
tensor_dtypes
):
if
device
in
(
InfiniDeviceEnum
.
CPU
,
InfiniDeviceEnum
.
NVIDIA
):
if
device
in
(
InfiniDeviceEnum
.
CPU
,
InfiniDeviceEnum
.
NVIDIA
):
return
tensor_dtypes
return
tensor_dtypes
elif
device
==
InfiniDeviceEnum
.
MOORE
:
# 过滤掉 BF16 和 F64(PyTorch 在摩尔平台上不支持这些类型的某些操作)
return
[
dt
for
dt
in
tensor_dtypes
if
dt
!=
InfiniDtype
.
BF16
and
dt
!=
InfiniDtype
.
F64
]
else
:
else
:
# 过滤掉 torch.bfloat16
# 过滤掉 torch.bfloat16
return
[
dt
for
dt
in
tensor_dtypes
if
dt
!=
torch
.
bfloat16
]
return
[
dt
for
dt
in
tensor_dtypes
if
dt
!=
torch
.
bfloat16
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment