Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
infinicore
Commits
c98e68be
Unverified
Commit
c98e68be
authored
May 13, 2025
by
goldenfox2025
Committed by
GitHub
May 13, 2025
Browse files
Merge branch 'main' into issue180
parents
d7c12d52
125afeb5
Changes
40
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
381 additions
and
11 deletions
+381
-11
src/infiniop/ops/swiglu/kunlun/swiglu_kunlun_internal.xpu
src/infiniop/ops/swiglu/kunlun/swiglu_kunlun_internal.xpu
+33
-0
src/infiniop/ops/swiglu/operator.cc
src/infiniop/ops/swiglu/operator.cc
+15
-0
src/infiniop/reduce/kunlun/reduce_kunlun.h
src/infiniop/reduce/kunlun/reduce_kunlun.h
+3
-1
src/infinirt/infinirt.cc
src/infinirt/infinirt.cc
+5
-1
test/infiniop-test/test_generate/testcases/swiglu.py
test/infiniop-test/test_generate/testcases/swiglu.py
+250
-0
test/infiniop/attention.py
test/infiniop/attention.py
+4
-0
test/infiniop/avg_pool.py
test/infiniop/avg_pool.py
+5
-0
test/infiniop/causal_softmax.py
test/infiniop/causal_softmax.py
+4
-0
test/infiniop/conv.py
test/infiniop/conv.py
+5
-1
test/infiniop/expand.py
test/infiniop/expand.py
+5
-1
test/infiniop/gemm.py
test/infiniop/gemm.py
+4
-0
test/infiniop/global_avg_pool.py
test/infiniop/global_avg_pool.py
+5
-1
test/infiniop/libinfiniop/utils.py
test/infiniop/libinfiniop/utils.py
+12
-0
test/infiniop/max_pool.py
test/infiniop/max_pool.py
+5
-1
test/infiniop/mlp.py
test/infiniop/mlp.py
+5
-0
test/infiniop/random_sample.py
test/infiniop/random_sample.py
+4
-0
test/infiniop/rearrange.py
test/infiniop/rearrange.py
+4
-0
test/infiniop/relu.py
test/infiniop/relu.py
+5
-1
test/infiniop/rms_norm.py
test/infiniop/rms_norm.py
+5
-2
test/infiniop/rope.py
test/infiniop/rope.py
+3
-2
No files found.
src/infiniop/ops/swiglu/kunlun/swiglu_kunlun_internal.xpu
0 → 100644
View file @
c98e68be
#ifndef __SWIGLU_KUNLUN_H__
#define __SWIGLU_KUNLUN_H__
#include "../../../devices/kunlun/kunlun_kernel_common.h"
#include "../../../elementwise/kunlun/elementwise_kunlun_kernel.h"
/// @brief Define swiglu op for local mem
typedef struct SwiGLUOp {
private:
template <typename T>
inline __device__ T sigmoid(T x) const {
return 1.0f / (1.0f + exp(-x));
}
public:
// This static number must be set in other Ops
static constexpr size_t num_inputs = 2;
template <typename T>
inline __device__ T operator()(const T *inputs) const {
T up = inputs[0];
T gate = inputs[1];
T out = gate * sigmoid(gate) * up;
return out;
}
} SwiGLUOp;
// Definition for swiglu kernel interface
LAUNCH_ELEMENTWISE_KERNEL_IMPL(SwiGLU, SwiGLUOp)
// Template instantiate
LAUNCH_ELEMENTWISE_KERNEL_INSTANTIATE(SwiGLU, float)
#endif // __SWIGLU_KUNLUN_H__
src/infiniop/ops/swiglu/operator.cc
View file @
c98e68be
...
...
@@ -8,6 +8,9 @@
#ifdef ENABLE_CUDA_API
#include "cuda/swiglu_cuda.cuh"
#endif
#ifdef ENABLE_KUNLUN_API
#include "kunlun/swiglu_kunlun.h"
#endif
__C
infiniStatus_t
infiniopCreateSwiGLUDescriptor
(
infiniopHandle_t
handle
,
...
...
@@ -33,6 +36,9 @@ __C infiniStatus_t infiniopCreateSwiGLUDescriptor(
#ifdef ENABLE_CUDA_API
CREATE
(
INFINI_DEVICE_NVIDIA
,
cuda
);
#endif
#ifdef ENABLE_KUNLUN_API
CREATE
(
INFINI_DEVICE_KUNLUN
,
kunlun
);
#endif
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
return
bangCreateSwiGLUDescriptor
((
BangHandle_t
)
handle
,
...
...
@@ -80,6 +86,9 @@ __C infiniStatus_t infiniopGetSwiGLUWorkspaceSize(infiniopSwiGLUDescriptor_t des
#ifdef ENABLE_CUDA_API
GET
(
INFINI_DEVICE_NVIDIA
,
cuda
)
#endif
#ifdef ENABLE_KUNLUN_API
GET
(
INFINI_DEVICE_KUNLUN
,
kunlun
)
#endif
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
return
bangGetSwiGLUWorkspaceSize
((
SwiGLUBangDescriptor_t
)
desc
,
size
);
...
...
@@ -127,6 +136,9 @@ __C infiniStatus_t infiniopSwiGLU(
#ifdef ENABLE_CUDA_API
CALCULATE
(
INFINI_DEVICE_NVIDIA
,
cuda
);
#endif
#ifdef ENABLE_KUNLUN_API
CALCULATE
(
INFINI_DEVICE_KUNLUN
,
kunlun
);
#endif
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
return
bangSwiGLU
((
SwiGLUBangDescriptor_t
)
desc
,
c
,
a
,
b
,
stream
);
...
...
@@ -168,6 +180,9 @@ infiniopDestroySwiGLUDescriptor(infiniopSwiGLUDescriptor_t desc) {
#ifdef ENABLE_CUDA_API
DELETE
(
INFINI_DEVICE_NVIDIA
,
cuda
);
#endif
#ifdef ENABLE_KUNLUN_API
DELETE
(
INFINI_DEVICE_KUNLUN
,
kunlun
);
#endif
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
return
bangDestroySwiGLUDescriptor
((
SwiGLUBangDescriptor_t
)
desc
);
...
...
src/infiniop/reduce/kunlun/reduce_kunlun.h
View file @
c98e68be
#ifndef __INFINIOP_REDUCE_KUNLUN_H__
#define __INFINIOP_REDUCE_KUNLUN_H__
#include "../../devices/kunlun/kunlun_common.h"
#include "../../devices/kunlun/kunlun_
kernel_
common.h"
namespace
op
::
common_kunlun
::
reduce_op
{
using
namespace
device
::
kunlun
::
kernel
;
// Use 16 floats instruction to calculate reduce
// data_ptr is the pointer of LM
static
inline
__device__
float
sumSquaredF32
(
float
*
data_ptr
,
int
count
)
{
...
...
src/infinirt/infinirt.cc
View file @
c98e68be
...
...
@@ -4,6 +4,7 @@
#include "bang/infinirt_bang.h"
#include "cpu/infinirt_cpu.h"
#include "cuda/infinirt_cuda.cuh"
#include "kunlun/infinirt_kunlun.h"
#include "maca/infinirt_maca.h"
#include "musa/infinirt_musa.h"
...
...
@@ -66,8 +67,11 @@ __C infiniStatus_t infinirtGetDevice(infiniDevice_t *device_ptr, int *device_id_
case INFINI_DEVICE_MOORE: \
_status = infinirt::musa::API PARAMS; \
break; \
case INFINI_DEVICE_KUNLUN: \
_status = infinirt::kunlun::API PARAMS; \
break; \
default: \
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
\
_status =
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; \
} \
{ ACTION; } \
return _status; \
...
...
test/infiniop-test/test_generate/testcases/swiglu.py
0 → 100644
View file @
c98e68be
import
numpy
as
np
import
gguf
from
typing
import
List
from
..
import
InfiniopTestWriter
,
InfiniopTestCase
,
np_dtype_to_ggml
,
gguf_strides
def
swiglu
(
a
:
np
.
ndarray
,
b
:
np
.
ndarray
,
):
c
=
a
*
b
/
(
1.0
+
np
.
exp
(
-
b
))
return
c
class
SwiGLUTestCase
(
InfiniopTestCase
):
def
__init__
(
self
,
a
:
np
.
ndarray
,
stride_a
:
List
[
int
]
|
None
,
b
:
np
.
ndarray
,
stride_b
:
List
[
int
]
|
None
,
c
:
np
.
ndarray
,
stride_c
:
List
[
int
]
|
None
,
):
super
().
__init__
(
"swiglu"
)
self
.
a
=
a
self
.
stride_a
=
stride_a
self
.
b
=
b
self
.
stride_b
=
stride_b
self
.
c
=
c
self
.
stride_c
=
stride_c
def
write_test
(
self
,
test_writer
:
"InfiniopTestWriter"
):
super
().
write_test
(
test_writer
)
if
self
.
stride_a
is
not
None
:
test_writer
.
add_array
(
test_writer
.
gguf_key
(
"a.strides"
),
self
.
stride_a
)
if
self
.
stride_b
is
not
None
:
test_writer
.
add_array
(
test_writer
.
gguf_key
(
"b.strides"
),
self
.
stride_b
)
if
self
.
stride_c
is
not
None
:
test_writer
.
add_array
(
test_writer
.
gguf_key
(
"c.strides"
),
self
.
stride_c
)
test_writer
.
add_tensor
(
test_writer
.
gguf_key
(
"a"
),
self
.
a
,
raw_dtype
=
np_dtype_to_ggml
(
self
.
a
.
dtype
)
)
test_writer
.
add_tensor
(
test_writer
.
gguf_key
(
"b"
),
self
.
b
,
raw_dtype
=
np_dtype_to_ggml
(
self
.
b
.
dtype
)
)
test_writer
.
add_tensor
(
test_writer
.
gguf_key
(
"c"
),
self
.
c
,
raw_dtype
=
np_dtype_to_ggml
(
self
.
c
.
dtype
)
)
ans
=
swiglu
(
self
.
a
.
astype
(
np
.
float64
),
self
.
b
.
astype
(
np
.
float64
),
)
test_writer
.
add_tensor
(
test_writer
.
gguf_key
(
"ans"
),
ans
,
raw_dtype
=
gguf
.
GGMLQuantizationType
.
F64
)
if
__name__
==
"__main__"
:
test_writer
=
InfiniopTestWriter
(
"swiglu.gguf"
)
test_cases
=
[
SwiGLUTestCase
(
np
.
random
.
rand
(
64
,
128
).
astype
(
np
.
float32
),
None
,
np
.
random
.
rand
(
64
,
128
).
astype
(
np
.
float32
),
None
,
np
.
random
.
rand
(
64
,
128
).
astype
(
np
.
float32
),
None
,
),
SwiGLUTestCase
(
np
.
random
.
rand
(
64
,
121
).
astype
(
np
.
float32
),
None
,
np
.
random
.
rand
(
64
,
121
).
astype
(
np
.
float32
),
None
,
np
.
random
.
rand
(
64
,
121
).
astype
(
np
.
float32
),
None
,
),
SwiGLUTestCase
(
np
.
random
.
rand
(
15
,
512
).
astype
(
np
.
float32
),
None
,
np
.
random
.
rand
(
15
,
512
).
astype
(
np
.
float32
),
None
,
np
.
random
.
rand
(
15
,
512
).
astype
(
np
.
float32
),
None
,
),
SwiGLUTestCase
(
np
.
random
.
rand
(
13
,
4
).
astype
(
np
.
float32
),
None
,
np
.
random
.
rand
(
13
,
4
).
astype
(
np
.
float32
),
None
,
np
.
random
.
rand
(
13
,
4
).
astype
(
np
.
float32
),
None
,
),
SwiGLUTestCase
(
np
.
random
.
rand
(
13
,
4
).
astype
(
np
.
float16
),
None
,
np
.
random
.
rand
(
13
,
4
).
astype
(
np
.
float16
),
None
,
np
.
random
.
rand
(
13
,
4
).
astype
(
np
.
float16
),
None
,
),
SwiGLUTestCase
(
np
.
random
.
rand
(
13
,
4
).
astype
(
np
.
float32
),
gguf_strides
(
10
,
1
),
np
.
random
.
rand
(
13
,
4
).
astype
(
np
.
float32
),
gguf_strides
(
10
,
1
),
np
.
random
.
rand
(
13
,
4
).
astype
(
np
.
float32
),
gguf_strides
(
10
,
1
),
),
SwiGLUTestCase
(
np
.
random
.
rand
(
13
,
4
).
astype
(
np
.
float16
),
gguf_strides
(
10
,
1
),
np
.
random
.
rand
(
13
,
4
).
astype
(
np
.
float16
),
gguf_strides
(
10
,
1
),
np
.
random
.
rand
(
13
,
4
).
astype
(
np
.
float16
),
gguf_strides
(
10
,
1
),
),
SwiGLUTestCase
(
np
.
random
.
rand
(
13
,
4
,
4
).
astype
(
np
.
float32
),
None
,
np
.
random
.
rand
(
13
,
4
,
4
).
astype
(
np
.
float32
),
None
,
np
.
random
.
rand
(
13
,
4
,
4
).
astype
(
np
.
float32
),
None
,
),
SwiGLUTestCase
(
np
.
random
.
rand
(
13
,
4
,
4
).
astype
(
np
.
float16
),
None
,
np
.
random
.
rand
(
13
,
4
,
4
).
astype
(
np
.
float16
),
None
,
np
.
random
.
rand
(
13
,
4
,
4
).
astype
(
np
.
float16
),
None
,
),
SwiGLUTestCase
(
np
.
random
.
rand
(
13
,
4
,
4
).
astype
(
np
.
float32
),
gguf_strides
(
20
,
4
,
1
),
np
.
random
.
rand
(
13
,
4
,
4
).
astype
(
np
.
float32
),
gguf_strides
(
20
,
4
,
1
),
np
.
random
.
rand
(
13
,
4
,
4
).
astype
(
np
.
float32
),
gguf_strides
(
20
,
4
,
1
),
),
SwiGLUTestCase
(
np
.
random
.
rand
(
13
,
4
,
4
).
astype
(
np
.
float16
),
gguf_strides
(
20
,
4
,
1
),
np
.
random
.
rand
(
13
,
4
,
4
).
astype
(
np
.
float16
),
gguf_strides
(
20
,
4
,
1
),
np
.
random
.
rand
(
13
,
4
,
4
).
astype
(
np
.
float16
),
gguf_strides
(
20
,
4
,
1
),
),
SwiGLUTestCase
(
np
.
random
.
rand
(
16
,
5632
).
astype
(
np
.
float32
),
None
,
np
.
random
.
rand
(
16
,
5632
).
astype
(
np
.
float32
),
None
,
np
.
random
.
rand
(
16
,
5632
).
astype
(
np
.
float32
),
None
,
),
SwiGLUTestCase
(
np
.
random
.
rand
(
16
,
5632
).
astype
(
np
.
float16
),
None
,
np
.
random
.
rand
(
16
,
5632
).
astype
(
np
.
float16
),
None
,
np
.
random
.
rand
(
16
,
5632
).
astype
(
np
.
float16
),
None
,
),
SwiGLUTestCase
(
np
.
random
.
rand
(
16
,
5632
).
astype
(
np
.
float32
),
gguf_strides
(
13312
,
1
),
np
.
random
.
rand
(
16
,
5632
).
astype
(
np
.
float32
),
gguf_strides
(
13312
,
1
),
np
.
random
.
rand
(
16
,
5632
).
astype
(
np
.
float32
),
gguf_strides
(
13312
,
1
),
),
SwiGLUTestCase
(
np
.
random
.
rand
(
16
,
5632
).
astype
(
np
.
float16
),
gguf_strides
(
13312
,
1
),
np
.
random
.
rand
(
16
,
5632
).
astype
(
np
.
float16
),
gguf_strides
(
13312
,
1
),
np
.
random
.
rand
(
16
,
5632
).
astype
(
np
.
float16
),
gguf_strides
(
13312
,
1
),
),
SwiGLUTestCase
(
np
.
random
.
rand
(
16
,
5632
).
astype
(
np
.
float32
),
gguf_strides
(
5632
,
1
),
np
.
random
.
rand
(
16
,
5632
).
astype
(
np
.
float32
),
gguf_strides
(
5632
,
1
),
np
.
random
.
rand
(
16
,
5632
).
astype
(
np
.
float32
),
gguf_strides
(
1
,
16
),
),
SwiGLUTestCase
(
np
.
random
.
rand
(
16
,
5632
).
astype
(
np
.
float16
),
gguf_strides
(
5632
,
1
),
np
.
random
.
rand
(
16
,
5632
).
astype
(
np
.
float16
),
gguf_strides
(
5632
,
1
),
np
.
random
.
rand
(
16
,
5632
).
astype
(
np
.
float16
),
gguf_strides
(
1
,
16
),
),
SwiGLUTestCase
(
np
.
random
.
rand
(
2
,
3
,
400
).
astype
(
np
.
float32
),
gguf_strides
(
1200
,
400
,
1
),
np
.
random
.
rand
(
2
,
3
,
400
).
astype
(
np
.
float32
),
gguf_strides
(
1200
,
400
,
1
),
np
.
random
.
rand
(
2
,
3
,
400
).
astype
(
np
.
float32
),
gguf_strides
(
1
,
2
,
6
),
),
SwiGLUTestCase
(
np
.
random
.
rand
(
2
,
3
,
400
).
astype
(
np
.
float16
),
gguf_strides
(
1200
,
400
,
1
),
np
.
random
.
rand
(
2
,
3
,
400
).
astype
(
np
.
float16
),
gguf_strides
(
1200
,
400
,
1
),
np
.
random
.
rand
(
2
,
3
,
400
).
astype
(
np
.
float16
),
gguf_strides
(
1
,
2
,
6
),
),
SwiGLUTestCase
(
np
.
random
.
rand
(
4
,
4
,
5632
).
astype
(
np
.
float32
),
None
,
np
.
random
.
rand
(
4
,
4
,
5632
).
astype
(
np
.
float32
),
None
,
np
.
random
.
rand
(
4
,
4
,
5632
).
astype
(
np
.
float32
),
None
,
),
SwiGLUTestCase
(
np
.
random
.
rand
(
4
,
4
,
5632
).
astype
(
np
.
float16
),
None
,
np
.
random
.
rand
(
4
,
4
,
5632
).
astype
(
np
.
float16
),
None
,
np
.
random
.
rand
(
4
,
4
,
5632
).
astype
(
np
.
float16
),
None
,
),
SwiGLUTestCase
(
np
.
random
.
rand
(
4
,
4
,
5632
).
astype
(
np
.
float32
),
gguf_strides
(
45056
,
5632
,
1
),
np
.
random
.
rand
(
4
,
4
,
5632
).
astype
(
np
.
float32
),
gguf_strides
(
45056
,
5632
,
1
),
np
.
random
.
rand
(
4
,
4
,
5632
).
astype
(
np
.
float32
),
gguf_strides
(
45056
,
5632
,
1
),
),
SwiGLUTestCase
(
np
.
random
.
rand
(
4
,
4
,
5632
).
astype
(
np
.
float16
),
gguf_strides
(
45056
,
5632
,
1
),
np
.
random
.
rand
(
4
,
4
,
5632
).
astype
(
np
.
float16
),
gguf_strides
(
45056
,
5632
,
1
),
np
.
random
.
rand
(
4
,
4
,
5632
).
astype
(
np
.
float16
),
gguf_strides
(
45056
,
5632
,
1
),
),
]
test_writer
.
add_tests
(
test_cases
)
test_writer
.
save
()
test/infiniop/attention.py
View file @
c98e68be
...
...
@@ -101,6 +101,7 @@ def test(
v_stride
=
None
,
k_cache_stride
=
None
,
v_cache_stride
=
None
,
sync
=
None
):
print
(
f
"Testing Attention on
{
torch_device
}
with n_q_head:
{
n_q_head
}
n_kv_head:
{
n_kv_head
}
seq_len:
{
seq_len
}
head_dim:
{
head_dim
}
pos:
{
pos
}
"
...
...
@@ -139,6 +140,9 @@ def test(
v_tensor
=
to_tensor
(
v
,
lib
)
k_cache_tensor
=
to_tensor
(
k_cache
,
lib
)
v_cache_tensor
=
to_tensor
(
v_cache
,
lib
)
if
sync
is
not
None
:
sync
()
descriptor
=
infiniopAttentionDescriptor_t
()
check_error
(
...
...
test/infiniop/avg_pool.py
View file @
c98e68be
...
...
@@ -88,6 +88,7 @@ def test(
padding
,
strides
,
tensor_dtype
=
torch
.
float16
,
sync
=
None
):
print
(
f
"Testing AvgPool on
{
torch_device
}
with x_shape:
{
x_shape
}
kernel_shape:
{
k_shape
}
padding:
{
padding
}
strides:
{
strides
}
dtype:
{
tensor_dtype
}
"
...
...
@@ -109,6 +110,10 @@ def test(
x_tensor
=
to_tensor
(
x
,
lib
)
y_tensor
=
to_tensor
(
y
,
lib
)
if
sync
is
not
None
:
sync
()
descriptor
=
infiniopAvgPoolDescriptor_t
()
check_error
(
...
...
test/infiniop/causal_softmax.py
View file @
c98e68be
...
...
@@ -87,6 +87,7 @@ def test(
y_stride
=
None
,
inplace
=
Inplace
.
OUT_OF_PLACE
,
dtype
=
torch
.
float16
,
sync
=
None
):
print
(
f
"Testing CausalSoftmax on
{
torch_device
}
with shape:
{
shape
}
x_stride:
{
x_stride
}
y_stride:
{
y_stride
}
dtype:
{
dtype
}
inplace:
{
inplace
}
"
...
...
@@ -107,6 +108,9 @@ def test(
y
=
torch
.
zeros
(
shape
,
dtype
=
dtype
).
to
(
torch_device
)
y
=
rearrange_if_needed
(
y
,
y_stride
)
y_tensor
=
to_tensor
(
y
,
lib
)
if
sync
is
not
None
:
sync
()
descriptor
=
infiniopCausalSoftmaxDescriptor_t
()
check_error
(
...
...
test/infiniop/conv.py
View file @
c98e68be
...
...
@@ -95,6 +95,7 @@ def test(
dilations
,
tensor_stride
=
None
,
tensor_dtype
=
torch
.
float16
,
sync
=
None
):
assert
len
(
pads
)
==
len
(
strides
)
==
len
(
dilations
)
print
(
...
...
@@ -118,8 +119,11 @@ def test(
x_tensor
=
to_tensor
(
x
,
lib
)
w_tensor
=
to_tensor
(
w
,
lib
)
y_tensor
=
to_tensor
(
y
,
lib
)
descriptor
=
infiniopConvDescriptor_t
()
if
sync
is
not
None
:
sync
()
descriptor
=
infiniopConvDescriptor_t
()
check_error
(
lib
.
infiniopCreateConvDescriptor
(
handle
,
...
...
test/infiniop/expand.py
View file @
c98e68be
...
...
@@ -52,6 +52,7 @@ def test(
y_stride
=
None
,
x_stride
=
None
,
tensor_dtype
=
torch
.
float16
,
sync
=
None
):
print
(
f
"Testing Expand on
{
torch_device
}
with x_shape:
{
x_shape
}
y_shape:
{
y_shape
}
x_stride:
{
x_stride
}
y_stride:
{
y_stride
}
dtype:
{
tensor_dtype
}
"
...
...
@@ -76,8 +77,11 @@ def test(
x_tensor
=
to_tensor
(
x
,
lib
)
y_tensor
=
to_tensor
(
y
,
lib
)
descriptor
=
infiniopExpandDescriptor_t
()
if
sync
is
not
None
:
sync
()
descriptor
=
infiniopExpandDescriptor_t
()
check_error
(
lib
.
infiniopCreateExpandDescriptor
(
handle
,
...
...
test/infiniop/gemm.py
View file @
c98e68be
...
...
@@ -83,6 +83,7 @@ def test(
b_stride
=
None
,
c_stride
=
None
,
dtype
=
torch
.
float16
,
sync
=
None
):
print
(
f
"Testing Gemm on
{
torch_device
}
with alpha:
{
alpha
}
, beta:
{
beta
}
,"
...
...
@@ -104,6 +105,9 @@ def test(
]
a_tensor
,
b_tensor
,
c_tensor
=
[
to_tensor
(
tensor
,
lib
)
for
tensor
in
[
a
,
b
,
c
]]
if
sync
is
not
None
:
sync
()
descriptor
=
infiniopGemmDescriptor_t
()
check_error
(
lib
.
infiniopCreateGemmDescriptor
(
...
...
test/infiniop/global_avg_pool.py
View file @
c98e68be
...
...
@@ -51,6 +51,7 @@ def test(
torch_device
,
x_shape
,
tensor_dtype
=
torch
.
float16
,
sync
=
None
):
print
(
f
"Testing GlobalAvgPool on
{
torch_device
}
with input tensor_shape:
{
x_shape
}
dtype:
{
tensor_dtype
}
"
...
...
@@ -70,8 +71,11 @@ def test(
x_tensor
=
to_tensor
(
x
,
lib
)
y_tensor
=
to_tensor
(
y
,
lib
)
descriptor
=
infiniopGlobalAvgPoolDescriptor_t
()
if
sync
is
not
None
:
sync
()
descriptor
=
infiniopGlobalAvgPoolDescriptor_t
()
check_error
(
lib
.
infiniopCreateGlobalAvgPoolDescriptor
(
handle
,
...
...
test/infiniop/libinfiniop/utils.py
View file @
c98e68be
...
...
@@ -423,6 +423,7 @@ def test_operator(lib, device, test_func, test_cases, tensor_dtypes):
infiniDeviceEnum_str_map
[
device
],
*
test_case
,
tensor_dtype
,
get_sync_func
(
device
)
)
finally
:
destroy_handle
(
lib
,
handle
)
...
...
@@ -471,3 +472,14 @@ def get_test_devices(args):
devices_to_test
=
[
InfiniDeviceEnum
.
CPU
]
return
devices_to_test
def
get_sync_func
(
device
):
import
torch
if
device
==
"cpu"
:
sync
=
None
else
:
sync
=
getattr
(
torch
,
infiniDeviceEnum_str_map
[
device
]).
synchronize
return
sync
test/infiniop/max_pool.py
View file @
c98e68be
...
...
@@ -83,6 +83,7 @@ def test(
padding
,
strides
,
tensor_dtype
=
torch
.
float16
,
sync
=
None
):
print
(
f
"Testing MaxPool on
{
torch_device
}
with x_shape:
{
x_shape
}
kernel_shape:
{
k_shape
}
padding:
{
padding
}
strides:
{
strides
}
dtype:
{
tensor_dtype
}
"
...
...
@@ -104,8 +105,11 @@ def test(
x_tensor
=
to_tensor
(
x
,
lib
)
y_tensor
=
to_tensor
(
y
,
lib
)
descriptor
=
infiniopMaxPoolDescriptor_t
()
if
sync
is
not
None
:
sync
()
descriptor
=
infiniopMaxPoolDescriptor_t
()
check_error
(
lib
.
infiniopCreateMaxPoolDescriptor
(
handle
,
...
...
test/infiniop/mlp.py
View file @
c98e68be
...
...
@@ -65,6 +65,7 @@ def test(
y_stride
=
None
,
w12_stride
=
None
,
w3_stride
=
None
,
sync
=
None
):
print
(
f
"Testing MLP on
{
torch_device
}
with num_tokens:
{
num_tokens
}
hidden_size:
{
hidden_size
}
intermediate_size:
{
intermediate_size
}
"
...
...
@@ -97,6 +98,10 @@ def test(
x_tensor
=
to_tensor
(
x
,
lib
)
w12_tensor
=
to_tensor
(
w12
,
lib
)
w3_tensor
=
to_tensor
(
w3
,
lib
)
if
sync
is
not
None
:
sync
()
descriptor
=
infiniopMLPDescriptor_t
()
check_error
(
lib
.
infiniopCreateMLPDescriptor
(
...
...
test/infiniop/random_sample.py
View file @
c98e68be
...
...
@@ -103,6 +103,7 @@ def test(
topk
,
temperature
,
dtype
=
torch
.
float16
,
sync
=
None
):
print
(
f
"Testing RandomSample on
{
torch_device
}
with voc:
{
voc
}
random_val:
{
random_val
}
topp:
{
topp
}
topk:
{
topk
}
temperature:
{
temperature
}
dtype:
{
dtype
}
"
...
...
@@ -122,6 +123,9 @@ def test(
indices_tensor
.
descriptor
.
contents
.
dt
=
InfiniDtype
.
U64
# treat int64 as uint64
if
sync
is
not
None
:
sync
()
descriptor
=
infiniopRandomSampleDescriptor_t
()
check_error
(
lib
.
infiniopCreateRandomSampleDescriptor
(
...
...
test/infiniop/rearrange.py
View file @
c98e68be
...
...
@@ -131,6 +131,7 @@ def test(
x_stride
,
y_stride
,
dtype
=
torch
.
float16
,
sync
=
None
):
print
(
f
"Testing Rerrange on
{
torch_device
}
with shape:
{
shape
}
x_stride:
{
x_stride
}
y_stride:
{
y_stride
}
dtype:
{
dtype
}
"
...
...
@@ -145,6 +146,9 @@ def test(
]
x_tensor
,
y_tensor
=
[
to_tensor
(
tensor
,
lib
)
for
tensor
in
[
x
,
y
]]
if
sync
is
not
None
:
sync
()
descriptor
=
infiniopRearrangeDescriptor_t
()
check_error
(
...
...
test/infiniop/relu.py
View file @
c98e68be
...
...
@@ -55,6 +55,7 @@ def test(
tensor_shape
,
tensor_dtype
=
torch
.
float16
,
inplace
=
Inplace
.
OUT_OF_PLACE
,
sync
=
None
):
print
(
f
"Testing Relu on
{
torch_device
}
with tensor_shape:
{
tensor_shape
}
dtype:
{
tensor_dtype
}
inplace:
{
inplace
.
name
}
"
...
...
@@ -78,8 +79,11 @@ def test(
x_tensor
=
to_tensor
(
x
,
lib
)
y_tensor
=
to_tensor
(
y
,
lib
)
if
inplace
==
Inplace
.
OUT_OF_PLACE
else
x_tensor
descriptor
=
infiniopReluDescriptor_t
()
if
sync
is
not
None
:
sync
()
descriptor
=
infiniopReluDescriptor_t
()
check_error
(
lib
.
infiniopCreateReluDescriptor
(
handle
,
...
...
test/infiniop/rms_norm.py
View file @
c98e68be
...
...
@@ -72,6 +72,7 @@ def test(
x_stride
,
w_dtype
=
torch
.
float16
,
dtype
=
torch
.
float16
,
sync
=
None
):
print
(
f
"Testing RMS_Norm on
{
torch_device
}
with y_shape:
{
y_shape
}
x_shape:
{
x_shape
}
w_shape:
{
w_shape
}
"
...
...
@@ -89,9 +90,11 @@ def test(
rearrange_if_needed
(
tensor
,
stride
)
for
tensor
,
stride
in
zip
([
x
,
y
],
[
x_stride
,
y_stride
])
]
x_tensor
,
y_tensor
,
w_tensor
=
[
to_tensor
(
tensor
,
lib
)
for
tensor
in
[
x
,
y
,
w
]]
if
sync
is
not
None
:
sync
()
descriptor
=
infiniopRMSNormDescriptor_t
()
check_error
(
...
...
test/infiniop/rope.py
View file @
c98e68be
...
...
@@ -117,6 +117,7 @@ def test(
y_strides
=
None
,
inplace
=
Inplace
.
OUT_OF_PLACE
,
dtype
=
torch
.
float32
,
sync
=
None
):
if
inplace
==
Inplace
.
INPLACE_X
:
y_strides
=
x_strides
...
...
@@ -147,8 +148,8 @@ def test(
else
:
y_tensor
=
to_tensor
(
y
,
lib
)
if
torch_device
==
"npu"
:
sync
hronize_device
(
torch_device
)
if
sync
is
not
None
:
sync
(
)
check_error
(
lib
.
infiniopCreateRoPEDescriptor
(
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment