Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
infinicore
Commits
9b32b4b1
Commit
9b32b4b1
authored
Jun 04, 2025
by
Catheriany
Browse files
Merge remote-tracking branch 'origin/main' into issue/150
parents
15bcbdfc
4799ddbf
Changes
103
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
960 additions
and
270 deletions
+960
-270
test/infiniop-test/test_generate/testcases/add.py
test/infiniop-test/test_generate/testcases/add.py
+132
-0
test/infiniop-test/test_generate/testcases/clip.py
test/infiniop-test/test_generate/testcases/clip.py
+242
-0
test/infiniop-test/test_generate/testcases/mul.py
test/infiniop-test/test_generate/testcases/mul.py
+60
-95
test/infiniop-test/test_generate/testcases/swiglu.py
test/infiniop-test/test_generate/testcases/swiglu.py
+117
-0
test/infiniop/attention.py
test/infiniop/attention.py
+84
-165
test/infiniop/avg_pool.py
test/infiniop/avg_pool.py
+5
-0
test/infiniop/causal_softmax.py
test/infiniop/causal_softmax.py
+8
-1
test/infiniop/clip.py
test/infiniop/clip.py
+246
-0
test/infiniop/conv.py
test/infiniop/conv.py
+5
-1
test/infiniop/expand.py
test/infiniop/expand.py
+5
-1
test/infiniop/gemm.py
test/infiniop/gemm.py
+4
-0
test/infiniop/global_avg_pool.py
test/infiniop/global_avg_pool.py
+5
-1
test/infiniop/libinfiniop/utils.py
test/infiniop/libinfiniop/utils.py
+13
-0
test/infiniop/max_pool.py
test/infiniop/max_pool.py
+5
-1
test/infiniop/mlp.py
test/infiniop/mlp.py
+5
-0
test/infiniop/random_sample.py
test/infiniop/random_sample.py
+4
-0
test/infiniop/rearrange.py
test/infiniop/rearrange.py
+4
-0
test/infiniop/relu.py
test/infiniop/relu.py
+5
-1
test/infiniop/rms_norm.py
test/infiniop/rms_norm.py
+5
-2
test/infiniop/rope.py
test/infiniop/rope.py
+6
-2
No files found.
test/infiniop-test/test_generate/testcases/add.py
0 → 100644
View file @
9b32b4b1
from
ast
import
List
import
numpy
as
np
import
gguf
from
typing
import
List
from
numpy.lib.stride_tricks
import
as_strided
from
..
import
InfiniopTestWriter
,
InfiniopTestCase
,
np_dtype_to_ggml
,
gguf_strides
,
contiguous_gguf_strides
def
add
(
a
:
np
.
ndarray
,
b
:
np
.
ndarray
,
):
return
a
+
b
def
process_tensor
(
a
,
b
,
stride_a
=
None
,
stride_b
=
None
):
def
normalize_stride
(
tensor
,
stride
):
if
stride
:
slices
=
tuple
(
slice
(
0
,
1
)
if
s
==
0
else
slice
(
None
)
for
s
in
stride
)
return
tensor
[
slices
]
else
:
return
tensor
a_unique
=
normalize_stride
(
a
,
stride_a
)
b_unique
=
normalize_stride
(
b
,
stride_b
)
return
a_unique
,
b_unique
class
AddTestCase
(
InfiniopTestCase
):
def
__init__
(
self
,
a
:
np
.
ndarray
,
shape_a
:
List
[
int
]
|
None
,
stride_a
:
List
[
int
]
|
None
,
b
:
np
.
ndarray
,
shape_b
:
List
[
int
]
|
None
,
stride_b
:
List
[
int
]
|
None
,
c
:
np
.
ndarray
,
shape_c
:
List
[
int
]
|
None
,
stride_c
:
List
[
int
]
|
None
,
):
super
().
__init__
(
"add"
)
self
.
a
=
a
self
.
shape_a
=
shape_a
self
.
stride_a
=
stride_a
self
.
b
=
b
self
.
shape_b
=
shape_b
self
.
stride_b
=
stride_b
self
.
c
=
c
self
.
shape_c
=
shape_c
self
.
stride_c
=
stride_c
def
write_test
(
self
,
test_writer
:
"InfiniopTestWriter"
):
super
().
write_test
(
test_writer
)
if
self
.
shape_a
is
not
None
:
test_writer
.
add_array
(
test_writer
.
gguf_key
(
"a.shape"
),
self
.
shape_a
)
if
self
.
shape_b
is
not
None
:
test_writer
.
add_array
(
test_writer
.
gguf_key
(
"b.shape"
),
self
.
shape_b
)
if
self
.
shape_c
is
not
None
:
test_writer
.
add_array
(
test_writer
.
gguf_key
(
"c.shape"
),
self
.
shape_c
)
if
self
.
stride_a
is
not
None
:
test_writer
.
add_array
(
test_writer
.
gguf_key
(
"a.strides"
),
gguf_strides
(
*
self
.
stride_a
))
if
self
.
stride_b
is
not
None
:
test_writer
.
add_array
(
test_writer
.
gguf_key
(
"b.strides"
),
gguf_strides
(
*
self
.
stride_b
))
test_writer
.
add_array
(
test_writer
.
gguf_key
(
"c.strides"
),
gguf_strides
(
*
self
.
stride_c
if
self
.
stride_c
is
not
None
else
contiguous_gguf_strides
(
self
.
shape_c
))
)
test_writer
.
add_tensor
(
test_writer
.
gguf_key
(
"a"
),
self
.
a
,
raw_dtype
=
np_dtype_to_ggml
(
self
.
a
.
dtype
)
)
test_writer
.
add_tensor
(
test_writer
.
gguf_key
(
"b"
),
self
.
b
,
raw_dtype
=
np_dtype_to_ggml
(
self
.
b
.
dtype
)
)
test_writer
.
add_tensor
(
test_writer
.
gguf_key
(
"c"
),
self
.
c
,
raw_dtype
=
np_dtype_to_ggml
(
self
.
c
.
dtype
)
)
ans
=
add
(
self
.
a
.
astype
(
np
.
float64
),
self
.
b
.
astype
(
np
.
float64
),
)
test_writer
.
add_tensor
(
test_writer
.
gguf_key
(
"ans"
),
ans
,
raw_dtype
=
gguf
.
GGMLQuantizationType
.
F64
)
if
__name__
==
"__main__"
:
test_writer
=
InfiniopTestWriter
(
"add.gguf"
)
test_cases
=
[]
# ==============================================================================
# Configuration (Internal Use Only)
# ==============================================================================
# These are not meant to be imported from other modules
_TEST_CASES_
=
[
# shape, a_stride, b_stride, c_stride
((
13
,
4
),
None
,
None
,
None
),
((
13
,
4
),
(
10
,
1
),
(
10
,
1
),
(
10
,
1
)),
((
13
,
4
),
(
0
,
1
),
None
,
None
),
((
13
,
4
,
4
),
None
,
None
,
None
),
((
13
,
4
,
4
),
(
20
,
4
,
1
),
(
20
,
4
,
1
),
(
20
,
4
,
1
)),
((
13
,
4
,
4
),
(
4
,
0
,
1
),
(
0
,
4
,
1
),
None
),
((
16
,
5632
),
None
,
None
,
None
),
((
16
,
5632
),
(
13312
,
1
),
(
13312
,
1
),
(
13312
,
1
)),
((
4
,
4
,
5632
),
None
,
None
,
None
),
((
4
,
4
,
5632
),
(
45056
,
5632
,
1
),
(
45056
,
5632
,
1
),
(
45056
,
5632
,
1
)),
]
_TENSOR_DTYPES_
=
[
np
.
float32
,
np
.
float16
]
for
dtype
in
_TENSOR_DTYPES_
:
for
shape
,
stride_a
,
stride_b
,
stride_c
in
_TEST_CASES_
:
a
=
np
.
random
.
rand
(
*
shape
).
astype
(
dtype
)
b
=
np
.
random
.
rand
(
*
shape
).
astype
(
dtype
)
c
=
np
.
empty
(
tuple
(
0
for
_
in
shape
),
dtype
=
dtype
)
a
,
b
=
process_tensor
(
a
,
b
,
stride_a
,
stride_b
)
if
stride_c
is
None
:
stride_c
=
contiguous_gguf_strides
(
shape
)
test_case
=
AddTestCase
(
a
=
a
,
shape_a
=
shape
,
stride_a
=
stride_a
,
b
=
b
,
shape_b
=
shape
,
stride_b
=
stride_b
,
c
=
c
,
shape_c
=
shape
,
stride_c
=
stride_c
,
)
test_cases
.
append
(
test_case
)
test_writer
.
add_tests
(
test_cases
)
test_writer
.
save
()
\ No newline at end of file
test/infiniop-test/test_generate/testcases/clip.py
0 → 100644
View file @
9b32b4b1
import
numpy
as
np
import
gguf
from
typing
import
List
,
Optional
,
Tuple
from
..
import
InfiniopTestWriter
,
InfiniopTestCase
,
np_dtype_to_ggml
,
gguf_strides
def
clip
(
x
:
np
.
ndarray
,
min_val
:
np
.
ndarray
,
max_val
:
np
.
ndarray
,
)
->
np
.
ndarray
:
"""
Clip the values in input tensor x to the range [min_val, max_val].
Args:
x: Input tensor
min_val: Tensor with minimum values (same shape as x)
max_val: Tensor with maximum values (same shape as x)
Returns:
Clipped tensor with the same shape as x
"""
return
np
.
maximum
(
np
.
minimum
(
x
,
max_val
),
min_val
)
def
random_tensor
(
shape
,
dtype
):
"""
Generate a random tensor with values in the range [-2, 2].
Args:
shape: Shape of the tensor
dtype: Data type of the tensor
Returns:
Random tensor with the specified shape and dtype
"""
return
(
np
.
random
.
rand
(
*
shape
).
astype
(
dtype
)
*
4.0
-
2.0
)
class
ClipTestCase
(
InfiniopTestCase
):
"""
Test case for the Clip operator.
"""
def
__init__
(
self
,
x
:
np
.
ndarray
,
x_stride
:
Optional
[
List
[
int
]],
min_val
:
np
.
ndarray
,
min_stride
:
Optional
[
List
[
int
]],
max_val
:
np
.
ndarray
,
max_stride
:
Optional
[
List
[
int
]],
y
:
np
.
ndarray
,
y_stride
:
Optional
[
List
[
int
]],
):
super
().
__init__
(
"clip"
)
self
.
x
=
x
self
.
x_stride
=
x_stride
self
.
min_val
=
min_val
self
.
min_stride
=
min_stride
self
.
max_val
=
max_val
self
.
max_stride
=
max_stride
self
.
y
=
y
self
.
y_stride
=
y_stride
def
write_test
(
self
,
test_writer
:
"InfiniopTestWriter"
):
super
().
write_test
(
test_writer
)
# Add strides as arrays if they exist
if
self
.
x_stride
is
not
None
:
test_writer
.
add_array
(
test_writer
.
gguf_key
(
"x.strides"
),
self
.
x_stride
)
if
self
.
min_stride
is
not
None
:
test_writer
.
add_array
(
test_writer
.
gguf_key
(
"min_val.strides"
),
self
.
min_stride
)
if
self
.
max_stride
is
not
None
:
test_writer
.
add_array
(
test_writer
.
gguf_key
(
"max_val.strides"
),
self
.
max_stride
)
if
self
.
y_stride
is
not
None
:
test_writer
.
add_array
(
test_writer
.
gguf_key
(
"y.strides"
),
self
.
y_stride
)
# Add tensors to the test
test_writer
.
add_tensor
(
test_writer
.
gguf_key
(
"x"
),
self
.
x
,
raw_dtype
=
np_dtype_to_ggml
(
self
.
x
.
dtype
)
)
test_writer
.
add_tensor
(
test_writer
.
gguf_key
(
"min_val"
),
self
.
min_val
,
raw_dtype
=
np_dtype_to_ggml
(
self
.
min_val
.
dtype
)
)
test_writer
.
add_tensor
(
test_writer
.
gguf_key
(
"max_val"
),
self
.
max_val
,
raw_dtype
=
np_dtype_to_ggml
(
self
.
max_val
.
dtype
)
)
test_writer
.
add_tensor
(
test_writer
.
gguf_key
(
"y"
),
self
.
y
,
raw_dtype
=
np_dtype_to_ggml
(
self
.
y
.
dtype
)
)
# Calculate the expected result
ans
=
clip
(
self
.
x
.
astype
(
np
.
float64
),
self
.
min_val
.
astype
(
np
.
float64
),
self
.
max_val
.
astype
(
np
.
float64
)
)
# Add the expected result to the test
test_writer
.
add_tensor
(
test_writer
.
gguf_key
(
"ans"
),
ans
,
raw_dtype
=
gguf
.
GGMLQuantizationType
.
F64
)
if
__name__
==
"__main__"
:
test_writer
=
InfiniopTestWriter
(
"clip.gguf"
)
# Create test cases for different shapes, strides, and data types
test_cases
=
[]
# Test case shapes
shapes
=
[
(
10
,),
# 1D tensor
(
5
,
10
),
# 2D tensor
(
2
,
3
,
4
),
# 3D tensor
(
7
,
13
),
# Prime dimensions
(
1
,
1
),
# Minimum shape
(
100
,
100
),
# Large shape
(
16
,
16
,
16
),
# Large 3D
]
# Test case min/max values
min_max_values
=
[
(
-
1.0
,
1.0
),
# Standard range
(
0.0
,
2.0
),
# Positive range
(
-
2.0
,
0.0
),
# Negative range
(
-
1000.0
,
1000.0
),
# Large range
(
-
0.001
,
0.001
),
# Small range
(
0.0
,
0.0
),
# min=max
]
# Data types to test
dtypes
=
[
np
.
float16
,
np
.
float32
,
np
.
float64
]
# Generate test cases with contiguous tensors
for
shape
in
shapes
:
for
min_val
,
max_val
in
min_max_values
:
for
dtype
in
dtypes
:
x
=
random_tensor
(
shape
,
dtype
)
min_tensor
=
np
.
full
(
shape
,
min_val
,
dtype
=
dtype
)
max_tensor
=
np
.
full
(
shape
,
max_val
,
dtype
=
dtype
)
y
=
np
.
zeros
(
shape
,
dtype
=
dtype
)
test_cases
.
append
(
ClipTestCase
(
x
=
x
,
x_stride
=
None
,
min_val
=
min_tensor
,
min_stride
=
None
,
max_val
=
max_tensor
,
max_stride
=
None
,
y
=
y
,
y_stride
=
None
)
)
# Generate test cases with strided tensors (for 2D shapes only)
for
shape
in
[
s
for
s
in
shapes
if
len
(
s
)
==
2
]:
for
dtype
in
dtypes
:
# Row-major stride
row_stride
=
gguf_strides
(
shape
[
1
],
1
)
# Column-major stride
col_stride
=
gguf_strides
(
1
,
shape
[
0
])
# Test case with row-major input and output
x
=
random_tensor
(
shape
,
dtype
)
min_tensor
=
np
.
full
(
shape
,
-
1.0
,
dtype
=
dtype
)
max_tensor
=
np
.
full
(
shape
,
1.0
,
dtype
=
dtype
)
y
=
np
.
zeros
(
shape
,
dtype
=
dtype
)
test_cases
.
append
(
ClipTestCase
(
x
=
x
,
x_stride
=
row_stride
,
min_val
=
min_tensor
,
min_stride
=
row_stride
,
max_val
=
max_tensor
,
max_stride
=
row_stride
,
y
=
y
,
y_stride
=
row_stride
)
)
# Test case with column-major input and output
x
=
random_tensor
(
shape
,
dtype
)
min_tensor
=
np
.
full
(
shape
,
-
1.0
,
dtype
=
dtype
)
max_tensor
=
np
.
full
(
shape
,
1.0
,
dtype
=
dtype
)
y
=
np
.
zeros
(
shape
,
dtype
=
dtype
)
test_cases
.
append
(
ClipTestCase
(
x
=
x
,
x_stride
=
col_stride
,
min_val
=
min_tensor
,
min_stride
=
col_stride
,
max_val
=
max_tensor
,
max_stride
=
col_stride
,
y
=
y
,
y_stride
=
col_stride
)
)
# Test case with different strides for input and output
x
=
random_tensor
(
shape
,
dtype
)
min_tensor
=
np
.
full
(
shape
,
-
1.0
,
dtype
=
dtype
)
max_tensor
=
np
.
full
(
shape
,
1.0
,
dtype
=
dtype
)
y
=
np
.
zeros
(
shape
,
dtype
=
dtype
)
test_cases
.
append
(
ClipTestCase
(
x
=
x
,
x_stride
=
row_stride
,
min_val
=
min_tensor
,
min_stride
=
row_stride
,
max_val
=
max_tensor
,
max_stride
=
row_stride
,
y
=
y
,
y_stride
=
col_stride
)
)
# Add all test cases to the writer
test_writer
.
add_tests
(
test_cases
)
# Save the test cases to a GGUF file
test_writer
.
save
()
print
(
f
"Generated
{
len
(
test_cases
)
}
test cases for the Clip operator"
)
test/infiniop-test/test_generate/testcases/mul.py
View file @
9b32b4b1
...
...
@@ -2,7 +2,7 @@ import numpy as np
import
gguf
from
typing
import
List
from
..
import
InfiniopTestWriter
,
InfiniopTestCase
,
np_dtype_to_ggml
,
gguf_strides
from
..
import
InfiniopTestWriter
,
InfiniopTestCase
,
np_dtype_to_ggml
,
gguf_strides
,
contiguous_gguf_strides
def
mul
(
a
:
np
.
ndarray
,
...
...
@@ -19,28 +19,44 @@ class MulTestCase(InfiniopTestCase):
def
__init__
(
self
,
a
:
np
.
ndarray
,
shape_a
:
List
[
int
]
|
None
,
stride_a
:
List
[
int
]
|
None
,
b
:
np
.
ndarray
,
shape_b
:
List
[
int
]
|
None
,
stride_b
:
List
[
int
]
|
None
,
c
:
np
.
ndarray
,
shape_c
:
List
[
int
]
|
None
,
stride_c
:
List
[
int
]
|
None
,
):
super
().
__init__
(
"mul"
)
self
.
a
=
a
self
.
shape_a
=
shape_a
self
.
stride_a
=
stride_a
self
.
b
=
b
self
.
shape_b
=
shape_b
self
.
stride_b
=
stride_b
self
.
c
=
c
self
.
shape_c
=
shape_c
self
.
stride_c
=
stride_c
def
write_test
(
self
,
test_writer
:
"InfiniopTestWriter"
):
super
().
write_test
(
test_writer
)
if
self
.
shape_a
is
not
None
:
test_writer
.
add_array
(
test_writer
.
gguf_key
(
"a.shape"
),
self
.
shape_a
)
if
self
.
shape_b
is
not
None
:
test_writer
.
add_array
(
test_writer
.
gguf_key
(
"b.shape"
),
self
.
shape_b
)
if
self
.
shape_c
is
not
None
:
test_writer
.
add_array
(
test_writer
.
gguf_key
(
"c.shape"
),
self
.
shape_c
)
if
self
.
stride_a
is
not
None
:
test_writer
.
add_array
(
test_writer
.
gguf_key
(
"a.strides"
),
self
.
stride_a
)
test_writer
.
add_array
(
test_writer
.
gguf_key
(
"a.strides"
),
gguf_strides
(
*
self
.
stride_a
)
)
if
self
.
stride_b
is
not
None
:
test_writer
.
add_array
(
test_writer
.
gguf_key
(
"b.strides"
),
self
.
stride_b
)
if
self
.
stride_c
is
not
None
:
test_writer
.
add_array
(
test_writer
.
gguf_key
(
"c.strides"
),
self
.
stride_c
)
test_writer
.
add_array
(
test_writer
.
gguf_key
(
"b.strides"
),
gguf_strides
(
*
self
.
stride_b
))
test_writer
.
add_array
(
test_writer
.
gguf_key
(
"c.strides"
),
gguf_strides
(
*
self
.
stride_c
if
self
.
stride_c
is
not
None
else
contiguous_gguf_strides
(
self
.
shape_c
))
)
test_writer
.
add_tensor
(
test_writer
.
gguf_key
(
"a"
),
self
.
a
,
raw_dtype
=
np_dtype_to_ggml
(
self
.
a
.
dtype
)
)
...
...
@@ -52,6 +68,7 @@ class MulTestCase(InfiniopTestCase):
)
a_fp64
=
self
.
a
.
astype
(
np
.
float64
)
b_fp64
=
self
.
b
.
astype
(
np
.
float64
)
ans_fp64
=
np
.
multiply
(
a_fp64
,
b_fp64
)
ans
=
mul
(
self
.
a
,
self
.
b
)
test_writer
.
add_tensor
(
...
...
@@ -65,95 +82,43 @@ class MulTestCase(InfiniopTestCase):
if
__name__
==
'__main__'
:
test_writer
=
InfiniopTestWriter
(
"mul.gguf"
)
test_cases
=
[
MulTestCase
(
random_tensor
((
2
,
3
),
np
.
float32
),
gguf_strides
(
3
,
1
),
random_tensor
((
2
,
3
),
np
.
float32
),
gguf_strides
(
1
,
2
),
random_tensor
((
2
,
3
),
np
.
float32
),
gguf_strides
(
3
,
1
),
),
MulTestCase
(
random_tensor
((
2
,
3
),
np
.
float16
),
gguf_strides
(
1
,
2
),
random_tensor
((
2
,
3
),
np
.
float16
),
gguf_strides
(
3
,
1
),
random_tensor
((
2
,
3
),
np
.
float16
),
gguf_strides
(
1
,
2
),
),
MulTestCase
(
random_tensor
((
2
,
3
),
np
.
float64
),
gguf_strides
(
3
,
1
),
random_tensor
((
2
,
3
),
np
.
float64
),
gguf_strides
(
3
,
1
),
random_tensor
((
2
,
3
),
np
.
float64
),
gguf_strides
(
1
,
2
),
),
MulTestCase
(
random_tensor
((
4
,
6
),
np
.
float16
),
gguf_strides
(
1
,
4
),
random_tensor
((
4
,
6
),
np
.
float16
),
gguf_strides
(
1
,
5
),
random_tensor
((
4
,
6
),
np
.
float16
),
gguf_strides
(
6
,
1
),
),
MulTestCase
(
random_tensor
((
1
,
2048
),
np
.
float16
),
gguf_strides
(
1
,
1
),
random_tensor
((
1
,
2048
),
np
.
float16
),
gguf_strides
(
2048
,
1
),
random_tensor
((
1
,
2048
),
np
.
float16
),
gguf_strides
(
1
,
1
),
),
MulTestCase
(
random_tensor
((
2048
,
2048
),
np
.
float32
),
None
,
random_tensor
((
2048
,
2048
),
np
.
float32
),
gguf_strides
(
1
,
2048
),
random_tensor
((
2048
,
2048
),
np
.
float32
),
None
,
),
MulTestCase
(
random_tensor
((
2
,
4
,
2048
),
np
.
float16
),
gguf_strides
(
4
*
2048
,
2048
,
1
),
random_tensor
((
2
,
4
,
2048
),
np
.
float16
),
gguf_strides
(
1
,
2
,
2
*
4
),
random_tensor
((
2
,
4
,
2048
),
np
.
float16
),
gguf_strides
(
4
*
2048
,
2048
,
1
),
),
MulTestCase
(
random_tensor
((
2
,
4
,
2048
),
np
.
float32
),
gguf_strides
(
1
,
2
,
2
*
4
),
random_tensor
((
2
,
4
,
2048
),
np
.
float32
),
None
,
random_tensor
((
2
,
4
,
2048
),
np
.
float32
),
gguf_strides
(
1
,
2
,
2
*
4
),
),
MulTestCase
(
random_tensor
((
2048
,
2560
),
np
.
float32
),
gguf_strides
(
2560
,
1
),
random_tensor
((
2048
,
2560
),
np
.
float32
),
gguf_strides
(
1
,
2048
),
random_tensor
((
2048
,
2560
),
np
.
float32
),
gguf_strides
(
2560
,
1
),
),
MulTestCase
(
random_tensor
((
4
,
48
,
64
),
np
.
float16
),
gguf_strides
(
64
*
48
,
64
,
1
),
random_tensor
((
4
,
48
,
64
),
np
.
float16
),
gguf_strides
(
1
,
4
,
4
*
48
),
random_tensor
((
4
,
48
,
64
),
np
.
float16
),
None
),
MulTestCase
(
random_tensor
((
4
,
48
,
64
),
np
.
float32
),
None
,
random_tensor
((
4
,
48
,
64
),
np
.
float32
),
gguf_strides
(
1
,
4
,
4
*
48
),
random_tensor
((
4
,
48
,
64
),
np
.
float32
),
gguf_strides
(
48
*
64
,
64
,
1
),
)
]
test_cases
=
[]
_TEST_CASES_
=
[
((
2
,
3
),
(
3
,
1
),
(
1
,
2
),
(
3
,
1
)),
((
2
,
3
),
(
1
,
2
),
(
3
,
1
),
(
1
,
2
)),
((
2
,
3
),
(
3
,
1
),
(
3
,
1
),
(
1
,
2
)),
((
4
,
6
),
(
1
,
4
),
(
1
,
5
),
(
6
,
1
)),
((
1
,
2048
),
(
1
,
1
),
(
2048
,
1
),
(
1
,
1
)),
((
2048
,
2048
),
None
,
(
1
,
2048
),
None
),
((
2
,
4
,
2048
),
(
4
*
2048
,
2048
,
1
),
(
1
,
2
,
8
),
(
4
*
2048
,
2048
,
1
)),
((
2
,
4
,
2048
),
(
1
,
2
,
8
),
None
,
(
1
,
2
,
8
)),
((
2048
,
2560
),
(
2560
,
1
),
(
1
,
2048
),
(
2560
,
1
)),
((
4
,
48
,
64
),
(
64
*
48
,
64
,
1
),
(
1
,
4
,
192
),
None
),
((
4
,
48
,
64
),
None
,
(
1
,
4
,
192
),
(
48
*
64
,
64
,
1
)),
]
_TENSOR_DTYPES_
=
[
np
.
float32
,
np
.
float16
]
for
dtype
in
_TENSOR_DTYPES_
:
for
shape
,
stride_a
,
stride_b
,
stride_c
in
_TEST_CASES_
:
a
=
random_tensor
(
shape
,
dtype
)
b
=
random_tensor
(
shape
,
dtype
)
c
=
np
.
empty
(
tuple
(
0
for
_
in
shape
),
dtype
=
dtype
)
test_cases
.
append
(
MulTestCase
(
a
=
a
,
shape_a
=
shape
,
stride_a
=
stride_a
,
b
=
b
,
shape_b
=
shape
,
stride_b
=
stride_b
,
c
=
c
,
shape_c
=
shape
,
stride_c
=
stride_c
,
)
)
test_writer
.
add_tests
(
test_cases
)
test_writer
.
save
()
test/infiniop-test/test_generate/testcases/swiglu.py
0 → 100644
View file @
9b32b4b1
import
numpy
as
np
import
gguf
from
typing
import
List
from
..
import
InfiniopTestWriter
,
InfiniopTestCase
,
np_dtype_to_ggml
,
gguf_strides
,
contiguous_gguf_strides
def
swiglu
(
a
:
np
.
ndarray
,
b
:
np
.
ndarray
,
):
c
=
a
*
b
/
(
1.0
+
np
.
exp
(
-
b
))
return
c
class
SwiGLUTestCase
(
InfiniopTestCase
):
def
__init__
(
self
,
a
:
np
.
ndarray
,
shape_a
:
List
[
int
]
|
None
,
stride_a
:
List
[
int
]
|
None
,
b
:
np
.
ndarray
,
shape_b
:
List
[
int
]
|
None
,
stride_b
:
List
[
int
]
|
None
,
c
:
np
.
ndarray
,
shape_c
:
List
[
int
]
|
None
,
stride_c
:
List
[
int
]
|
None
,
):
super
().
__init__
(
"swiglu"
)
self
.
a
=
a
self
.
shape_a
=
shape_a
self
.
stride_a
=
stride_a
self
.
b
=
b
self
.
shape_b
=
shape_b
self
.
stride_b
=
stride_b
self
.
c
=
c
self
.
shape_c
=
shape_c
self
.
stride_c
=
stride_c
def
write_test
(
self
,
test_writer
:
"InfiniopTestWriter"
):
super
().
write_test
(
test_writer
)
if
self
.
shape_a
is
not
None
:
test_writer
.
add_array
(
test_writer
.
gguf_key
(
"a.shape"
),
self
.
shape_a
)
if
self
.
shape_b
is
not
None
:
test_writer
.
add_array
(
test_writer
.
gguf_key
(
"b.shape"
),
self
.
shape_b
)
if
self
.
shape_c
is
not
None
:
test_writer
.
add_array
(
test_writer
.
gguf_key
(
"c.shape"
),
self
.
shape_c
)
if
self
.
stride_a
is
not
None
:
test_writer
.
add_array
(
test_writer
.
gguf_key
(
"a.strides"
),
gguf_strides
(
*
self
.
stride_a
))
if
self
.
stride_b
is
not
None
:
test_writer
.
add_array
(
test_writer
.
gguf_key
(
"b.strides"
),
gguf_strides
(
*
self
.
stride_b
))
test_writer
.
add_array
(
test_writer
.
gguf_key
(
"c.strides"
),
gguf_strides
(
*
self
.
stride_c
if
self
.
stride_c
is
not
None
else
contiguous_gguf_strides
(
self
.
shape_c
))
)
test_writer
.
add_tensor
(
test_writer
.
gguf_key
(
"a"
),
self
.
a
,
raw_dtype
=
np_dtype_to_ggml
(
self
.
a
.
dtype
)
)
test_writer
.
add_tensor
(
test_writer
.
gguf_key
(
"b"
),
self
.
b
,
raw_dtype
=
np_dtype_to_ggml
(
self
.
b
.
dtype
)
)
test_writer
.
add_tensor
(
test_writer
.
gguf_key
(
"c"
),
self
.
c
,
raw_dtype
=
np_dtype_to_ggml
(
self
.
c
.
dtype
)
)
ans
=
swiglu
(
self
.
a
.
astype
(
np
.
float64
),
self
.
b
.
astype
(
np
.
float64
),
)
test_writer
.
add_tensor
(
test_writer
.
gguf_key
(
"ans"
),
ans
,
raw_dtype
=
gguf
.
GGMLQuantizationType
.
F64
)
if
__name__
==
"__main__"
:
test_writer
=
InfiniopTestWriter
(
"swiglu.gguf"
)
test_cases
=
[]
_TEST_CASES_
=
[
((
64
,
128
),
None
,
None
,
None
),
((
64
,
121
),
None
,
None
,
None
),
((
15
,
512
),
None
,
None
,
None
),
((
13
,
4
),
None
,
None
,
None
),
((
13
,
4
),
(
10
,
1
),
(
10
,
1
),
(
10
,
1
)),
((
13
,
4
,
4
),
None
,
None
,
None
),
((
13
,
4
,
4
),
(
20
,
4
,
1
),
(
20
,
4
,
1
),
(
20
,
4
,
1
)),
((
16
,
5632
),
None
,
None
,
None
),
((
16
,
5632
),
(
13312
,
1
),
(
13312
,
1
),
(
13312
,
1
)),
((
16
,
5632
),
(
5632
,
1
),
(
5632
,
1
),
(
1
,
16
)),
((
2
,
3
,
400
),
(
1200
,
400
,
1
),
(
1200
,
400
,
1
),
(
1
,
2
,
6
)),
((
4
,
4
,
5632
),
None
,
None
,
None
),
((
4
,
4
,
5632
),
(
45056
,
5632
,
1
),
(
45056
,
5632
,
1
),
(
45056
,
5632
,
1
)),
]
_TENSOR_DTYPES_
=
[
np
.
float32
,
np
.
float16
]
for
dtype
in
_TENSOR_DTYPES_
:
for
shape
,
stride_a
,
stride_b
,
stride_c
in
_TEST_CASES_
:
a
=
np
.
random
.
rand
(
*
shape
).
astype
(
dtype
)
b
=
np
.
random
.
rand
(
*
shape
).
astype
(
dtype
)
c
=
np
.
empty
(
tuple
(
0
for
_
in
shape
),
dtype
=
dtype
)
test_case
=
SwiGLUTestCase
(
a
=
a
,
shape_a
=
list
(
shape
),
stride_a
=
stride_a
,
b
=
b
,
shape_b
=
list
(
shape
),
stride_b
=
stride_b
,
c
=
c
,
shape_c
=
list
(
shape
),
stride_c
=
stride_c
,
)
test_cases
.
append
(
test_case
)
test_writer
.
add_tests
(
test_cases
)
test_writer
.
save
()
test/infiniop/attention.py
View file @
9b32b4b1
from
ctypes
import
POINTER
,
Structure
,
c_int32
,
c_uint64
,
c_void_p
,
c_float
,
c_bool
from
ctypes
import
POINTER
,
Structure
,
c_int32
,
c_uint64
,
c_void_p
import
ctypes
import
sys
import
os
sys
.
path
.
insert
(
0
,
os
.
path
.
abspath
(
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
".."
,
".."
)))
from
operatorspy
import
(
from
libinfiniop
import
(
open_lib
,
to_tensor
,
CTensor
,
DeviceEnum
,
infiniopHandle_t
,
infiniopTensorDescriptor_t
,
create_handle
,
destroy_handle
,
check_error
,
rearrange_tensor
,
create_workspace
,
get_args
,
get_test_devices
,
test_operator
,
debug
,
get_tolerance
,
profile_operation
,
)
from
operatorspy.tests.test_utils
import
get_args
import
torch
import
torch.nn.functional
as
F
class
AttentionDescriptor
(
Structure
):
...
...
@@ -95,12 +95,13 @@ def test(
pos
,
k_cache_buf_len
,
v_cache_buf_len
,
dtype
=
torch
.
float16
,
q_stride
=
None
,
k_stride
=
None
,
v_stride
=
None
,
k_cache_stride
=
None
,
v_cache_stride
=
None
,
dtype
=
torch
.
float16
,
sync
=
None
,
):
print
(
f
"Testing Attention on
{
torch_device
}
with n_q_head:
{
n_q_head
}
n_kv_head:
{
n_kv_head
}
seq_len:
{
seq_len
}
head_dim:
{
head_dim
}
pos:
{
pos
}
"
...
...
@@ -140,6 +141,9 @@ def test(
k_cache_tensor
=
to_tensor
(
k_cache
,
lib
)
v_cache_tensor
=
to_tensor
(
v_cache
,
lib
)
if
sync
is
not
None
:
sync
()
descriptor
=
infiniopAttentionDescriptor_t
()
check_error
(
lib
.
infiniopCreateAttentionDescriptor
(
...
...
@@ -156,12 +160,15 @@ def test(
)
# Invalidate the shape and strides in the descriptor to prevent them from being directly used by the kernel
out_tensor
.
descriptor
.
contents
.
invalidate
()
q_tensor
.
descriptor
.
contents
.
invalidate
()
k_tensor
.
descriptor
.
contents
.
invalidate
()
v_tensor
.
descriptor
.
contents
.
invalidate
()
k_cache_tensor
.
descriptor
.
contents
.
invalidate
()
v_cache_tensor
.
descriptor
.
contents
.
invalidate
()
for
tensor
in
[
out_tensor
,
q_tensor
,
k_tensor
,
v_tensor
,
k_cache_tensor
,
v_cache_tensor
,
]:
tensor
.
destroyDesc
(
lib
)
workspace_size
=
c_uint64
(
0
)
check_error
(
...
...
@@ -169,152 +176,52 @@ def test(
)
workspace
=
create_workspace
(
workspace_size
.
value
,
out
.
device
)
check_error
(
lib
.
infiniopAttention
(
descriptor
,
workspace
.
data_ptr
()
if
workspace
is
not
None
else
None
,
workspace_size
.
value
,
out_tensor
.
data
,
q_tensor
.
data
,
k_tensor
.
data
,
v_tensor
.
data
,
k_cache_tensor
.
data
,
v_cache_tensor
.
data
,
None
,
)
)
assert
torch
.
allclose
(
out
,
ans
,
atol
=
1e-4
,
rtol
=
1e-2
)
check_error
(
lib
.
infiniopDestroyAttentionDescriptor
(
descriptor
))
def
test_cpu
(
lib
,
test_cases
):
device
=
DeviceEnum
.
DEVICE_CPU
handle
=
create_handle
(
lib
,
device
)
for
(
n_q_head
,
n_kv_head
,
seq_len
,
head_dim
,
pos
,
k_cache_buf_len
,
v_cache_buf_len
,
dtype
,
q_stride
,
k_stride
,
v_stride
,
k_cache_stride
,
v_cache_stride
,
)
in
test_cases
:
test
(
lib
,
handle
,
"cpu"
,
n_q_head
,
n_kv_head
,
seq_len
,
head_dim
,
pos
,
k_cache_buf_len
,
v_cache_buf_len
,
dtype
,
q_stride
,
k_stride
,
v_stride
,
k_cache_stride
,
v_cache_stride
,
def
lib_attention
():
check_error
(
lib
.
infiniopAttention
(
descriptor
,
workspace
.
data_ptr
()
if
workspace
is
not
None
else
None
,
workspace_size
.
value
,
out_tensor
.
data
,
q_tensor
.
data
,
k_tensor
.
data
,
v_tensor
.
data
,
k_cache_tensor
.
data
,
v_cache_tensor
.
data
,
None
,
)
)
destroy_handle
(
lib
,
handle
)
def
test_cuda
(
lib
,
test_cases
):
device
=
DeviceEnum
.
DEVICE_CUDA
handle
=
create_handle
(
lib
,
device
)
for
(
n_q_head
,
n_kv_head
,
seq_len
,
head_dim
,
pos
,
k_cache_buf_len
,
v_cache_buf_len
,
dtype
,
q_stride
,
k_stride
,
v_stride
,
k_cache_stride
,
v_cache_stride
,
)
in
test_cases
:
test
(
lib
,
handle
,
"cuda"
,
n_q_head
,
n_kv_head
,
seq_len
,
head_dim
,
pos
,
k_cache_buf_len
,
v_cache_buf_len
,
dtype
,
q_stride
,
k_stride
,
v_stride
,
k_cache_stride
,
v_cache_stride
,
)
lib_attention
()
destroy_handle
(
lib
,
handle
)
def
test_bang
(
lib
,
test_cases
):
import
torch_mlu
device
=
DeviceEnum
.
DEVICE_BANG
handle
=
create_handle
(
lib
,
device
)
for
(
n_q_head
,
n_kv_head
,
seq_len
,
head_dim
,
pos
,
k_cache_buf_len
,
v_cache_buf_len
,
dtype
,
q_stride
,
k_stride
,
v_stride
,
k_cache_stride
,
v_cache_stride
,
)
in
test_cases
:
test
(
lib
,
handle
,
"mlu"
,
n_q_head
,
n_kv_head
,
seq_len
,
head_dim
,
pos
,
k_cache_buf_len
,
v_cache_buf_len
,
dtype
,
q_stride
,
k_stride
,
v_stride
,
k_cache_stride
,
v_cache_stride
,
)
# Validate results
atol
,
rtol
=
get_tolerance
(
_TOLERANCE_MAP
,
dtype
)
if
DEBUG
:
debug
(
out
,
ans
,
atol
=
atol
,
rtol
=
rtol
)
assert
torch
.
allclose
(
out
,
ans
,
atol
=
atol
,
rtol
=
rtol
)
destroy_handle
(
lib
,
handle
)
# Profiling workflow
if
PROFILE
:
# fmt: off
profile_operation
(
"PyTorch"
,
lambda
:
attention
(
q
,
k
,
v
,
k_cache
,
v_cache
,
pos
),
torch_device
,
NUM_PRERUN
,
NUM_ITERATIONS
)
profile_operation
(
" lib"
,
lambda
:
lib_attention
(),
torch_device
,
NUM_PRERUN
,
NUM_ITERATIONS
)
# fmt: on
check_error
(
lib
.
infiniopDestroyAttentionDescriptor
(
descriptor
))
if
__name__
==
"__main__"
:
_TENSOR_DTYPES
=
[
torch
.
float16
,
torch
.
float32
]
# Tolerance map for different data types
_TOLERANCE_MAP
=
{
torch
.
float16
:
{
"atol"
:
1e-4
,
"rtol"
:
1e-2
},
torch
.
float32
:
{
"atol"
:
1e-5
,
"rtol"
:
1e-3
},
}
DEBUG
=
False
PROFILE
=
False
NUM_PRERUN
=
10
NUM_ITERATIONS
=
1000
test_cases
=
[
# prefill
(
...
...
@@ -325,7 +232,6 @@ if __name__ == "__main__":
0
,
# pos
2048
,
# k_cache_buf_len
2048
,
# v_cache_buf_len
torch
.
float16
,
# dtype
[
64
,
2560
,
1
],
# q_stride
[
64
,
2560
,
1
],
# k_stride
[
64
,
2560
,
1
],
# v_stride
...
...
@@ -341,7 +247,6 @@ if __name__ == "__main__":
3
,
# pos
2048
,
# k_cache_buf_len
2048
,
# v_cache_buf_len
torch
.
float16
,
# dtype
[
64
,
2560
,
1
],
# q_stride
[
64
,
2560
,
1
],
# k_stride
[
64
,
2560
,
1
],
# v_stride
...
...
@@ -357,13 +262,26 @@ if __name__ == "__main__":
1
,
# pos
8
,
# k_cache_buf_len
8
,
# v_cache_buf_len
torch
.
float16
,
# dtype
None
,
# q_stride
None
,
# k_stride
None
,
# v_stride
None
,
# k_cache_stride
None
,
# v_cache_stride
),
(
28
,
# n_q_head
28
,
# n_kv_head
15
,
# seq_len
128
,
# head_dim
0
,
# pos
2048
,
# k_cache_buf_len
2048
,
# v_cache_buf_len
[
128
,
10752
,
1
],
# q_stride
[
128
,
10752
,
1
],
# k_stride
[
128
,
10752
,
1
],
# v_stride
[
128
,
3584
,
1
],
# k_cache_stride
[
128
,
3584
,
1
],
# v_cache_stride
),
]
args
=
get_args
()
lib
=
open_lib
()
...
...
@@ -406,12 +324,13 @@ if __name__ == "__main__":
infiniopAttentionDescriptor_t
,
]
if
args
.
cpu
:
test_cpu
(
lib
,
test_cases
)
if
args
.
cuda
:
test_cuda
(
lib
,
test_cases
)
if
args
.
bang
:
test_bang
(
lib
,
test_cases
)
if
not
(
args
.
cpu
or
args
.
cuda
or
args
.
bang
):
test_cpu
(
lib
,
test_cases
)
# Configure testing options
DEBUG
=
args
.
debug
PROFILE
=
args
.
profile
NUM_PRERUN
=
args
.
num_prerun
NUM_ITERATIONS
=
args
.
num_iterations
# Execute tests
for
device
in
get_test_devices
(
args
):
test_operator
(
lib
,
device
,
test
,
test_cases
,
_TENSOR_DTYPES
)
print
(
"
\033
[92mTest passed!
\033
[0m"
)
test/infiniop/avg_pool.py
View file @
9b32b4b1
...
...
@@ -88,6 +88,7 @@ def test(
padding
,
strides
,
tensor_dtype
=
torch
.
float16
,
sync
=
None
):
print
(
f
"Testing AvgPool on
{
torch_device
}
with x_shape:
{
x_shape
}
kernel_shape:
{
k_shape
}
padding:
{
padding
}
strides:
{
strides
}
dtype:
{
tensor_dtype
}
"
...
...
@@ -109,6 +110,10 @@ def test(
x_tensor
=
to_tensor
(
x
,
lib
)
y_tensor
=
to_tensor
(
y
,
lib
)
if
sync
is
not
None
:
sync
()
descriptor
=
infiniopAvgPoolDescriptor_t
()
check_error
(
...
...
test/infiniop/causal_softmax.py
View file @
9b32b4b1
...
...
@@ -37,7 +37,7 @@ _TENSOR_DTYPES = [torch.float16]
# Tolerance map for different data types
_TOLERANCE_MAP
=
{
torch
.
float16
:
{
"atol"
:
0
,
"rtol"
:
1e-2
},
torch
.
float16
:
{
"atol"
:
1e-3
,
"rtol"
:
1e-2
},
}
...
...
@@ -87,6 +87,7 @@ def test(
y_stride
=
None
,
inplace
=
Inplace
.
OUT_OF_PLACE
,
dtype
=
torch
.
float16
,
sync
=
None
):
print
(
f
"Testing CausalSoftmax on
{
torch_device
}
with shape:
{
shape
}
x_stride:
{
x_stride
}
y_stride:
{
y_stride
}
dtype:
{
dtype
}
inplace:
{
inplace
}
"
...
...
@@ -107,6 +108,9 @@ def test(
y
=
torch
.
zeros
(
shape
,
dtype
=
dtype
).
to
(
torch_device
)
y
=
rearrange_if_needed
(
y
,
y_stride
)
y_tensor
=
to_tensor
(
y
,
lib
)
if
sync
is
not
None
:
sync
()
descriptor
=
infiniopCausalSoftmaxDescriptor_t
()
check_error
(
...
...
@@ -139,6 +143,9 @@ def test(
)
lib_causal_softmax
()
if
sync
is
not
None
:
sync
()
atol
,
rtol
=
get_tolerance
(
_TOLERANCE_MAP
,
dtype
)
if
DEBUG
:
...
...
test/infiniop/clip.py
0 → 100644
View file @
9b32b4b1
#!/usr/bin/env python3
import
torch
import
ctypes
from
ctypes
import
POINTER
,
Structure
,
c_int32
,
c_size_t
,
c_uint64
,
c_void_p
,
c_float
from
libinfiniop
import
(
infiniopHandle_t
,
infiniopTensorDescriptor_t
,
open_lib
,
to_tensor
,
get_test_devices
,
check_error
,
rearrange_if_needed
,
create_workspace
,
test_operator
,
get_args
,
debug
,
get_tolerance
,
profile_operation
,
)
from
enum
import
Enum
,
auto
# ==============================================================================
# Configuration (Internal Use Only)
# ==============================================================================
# These are not meant to be imported from other modules
_TEST_CASES_
=
[
# shape, x_stride, y_stride, min_val, max_val
# 基本形状测试
((
10
,),
None
,
None
,
-
1.0
,
1.0
),
((
5
,
10
),
None
,
None
,
-
1.0
,
1.0
),
((
2
,
3
,
4
),
None
,
None
,
-
1.0
,
1.0
),
# 不同的min_val和max_val
((
10
,),
None
,
None
,
0.0
,
2.0
),
((
5
,
10
),
None
,
None
,
0.0
,
2.0
),
((
2
,
3
,
4
),
None
,
None
,
0.0
,
2.0
),
((
10
,),
None
,
None
,
-
2.0
,
0.0
),
((
5
,
10
),
None
,
None
,
-
2.0
,
0.0
),
((
2
,
3
,
4
),
None
,
None
,
-
2.0
,
0.0
),
# 奇怪形状测试
((
7
,
13
),
None
,
None
,
-
1.0
,
1.0
),
# 质数维度
((
3
,
5
,
7
),
None
,
None
,
-
1.0
,
1.0
),
# 三维质数
# 非标准形状测试
((
1
,
1
),
None
,
None
,
-
1.0
,
1.0
),
# 最小形状
((
100
,
100
),
None
,
None
,
-
1.0
,
1.0
),
# 大形状
((
16
,
16
,
16
),
None
,
None
,
-
1.0
,
1.0
),
# 大三维
# 极端值测试
((
10
,),
None
,
None
,
-
1000.0
,
1000.0
),
# 大范围
((
10
,),
None
,
None
,
-
0.001
,
0.001
),
# 小范围
((
10
,),
None
,
None
,
0.0
,
0.0
),
# min=max
# 特殊形状测试
((
0
,),
None
,
None
,
-
1.0
,
1.0
),
# 空张量
((
1
,
0
),
None
,
None
,
-
1.0
,
1.0
),
# 空维度
]
_TENSOR_DTYPES
=
[
torch
.
float16
,
torch
.
float32
]
_TOLERANCE_MAP
=
{
torch
.
float16
:
{
"atol"
:
1e-3
,
"rtol"
:
1e-3
},
torch
.
float32
:
{
"atol"
:
1e-7
,
"rtol"
:
1e-6
},
}
class
Inplace
(
Enum
):
OUT_OF_PLACE
=
auto
()
INPLACE_X
=
auto
()
_INPLACE
=
[
Inplace
.
INPLACE_X
,
Inplace
.
OUT_OF_PLACE
,
]
_TEST_CASES
=
[
test_case
+
(
inplace_item
,)
for
test_case
in
_TEST_CASES_
for
inplace_item
in
_INPLACE
]
DEBUG
=
False
PROFILE
=
False
NUM_PRERUN
=
10
NUM_ITERATIONS
=
1000
class
ClipDescriptor
(
Structure
):
_fields_
=
[(
"device_type"
,
c_int32
),
(
"device_id"
,
c_int32
)]
infiniopClipDescriptor_t
=
POINTER
(
ClipDescriptor
)
def
clip
(
x
,
min_val
,
max_val
):
return
torch
.
clamp
(
x
,
min_val
,
max_val
)
def
create_tensor_with_stride
(
shape
,
stride
,
dtype
,
device
):
"""Create a tensor with specific stride without using view() that might cause errors."""
x
=
torch
.
rand
(
shape
,
dtype
=
dtype
,
device
=
device
)
*
4.0
-
2.0
# Range: [-2, 2]
if
stride
is
None
:
return
x
if
len
(
shape
)
==
2
and
len
(
stride
)
==
2
:
if
stride
==
(
shape
[
1
],
1
):
return
x
.
contiguous
()
elif
stride
==
(
1
,
shape
[
0
]):
return
x
.
transpose
(
0
,
1
).
contiguous
().
transpose
(
0
,
1
)
else
:
y
=
torch
.
zeros
(
shape
,
dtype
=
dtype
,
device
=
device
)
for
i
in
range
(
shape
[
0
]):
for
j
in
range
(
shape
[
1
]):
y
[
i
,
j
]
=
x
[
i
,
j
]
return
y
.
contiguous
()
return
x
def
test
(
lib
,
handle
,
torch_device
,
shape
,
x_stride
=
None
,
y_stride
=
None
,
min_val
=-
1.0
,
max_val
=
1.0
,
inplace
=
Inplace
.
OUT_OF_PLACE
,
dtype
=
torch
.
float32
,
):
print
(
f
"Testing Clip on
{
torch_device
}
with shape:
{
shape
}
x_stride:
{
x_stride
}
y_stride:
{
y_stride
}
"
f
"min_val:
{
min_val
}
max_val:
{
max_val
}
dtype:
{
dtype
}
inplace:
{
inplace
}
"
)
x
=
create_tensor_with_stride
(
shape
,
x_stride
,
dtype
,
torch_device
)
ans
=
clip
(
x
,
min_val
,
max_val
)
x
=
rearrange_if_needed
(
x
,
x_stride
)
x_tensor
=
to_tensor
(
x
,
lib
)
if
inplace
==
Inplace
.
INPLACE_X
:
y
=
x
y_tensor
=
x_tensor
else
:
y
=
torch
.
zeros
(
shape
,
dtype
=
dtype
).
to
(
torch_device
)
y
=
rearrange_if_needed
(
y
,
y_stride
)
y_tensor
=
to_tensor
(
y
,
lib
)
descriptor
=
infiniopClipDescriptor_t
()
check_error
(
lib
.
infiniopCreateClipDescriptor
(
handle
,
ctypes
.
byref
(
descriptor
),
y_tensor
.
descriptor
,
x_tensor
.
descriptor
)
)
workspace_size
=
c_uint64
(
0
)
check_error
(
lib
.
infiniopGetClipWorkspaceSize
(
descriptor
,
ctypes
.
byref
(
workspace_size
)
)
)
workspace
=
create_workspace
(
workspace_size
.
value
,
x
.
device
)
def
lib_clip
():
check_error
(
lib
.
infiniopClip
(
descriptor
,
workspace
.
data_ptr
()
if
workspace
is
not
None
else
None
,
workspace_size
.
value
,
y_tensor
.
data
,
x_tensor
.
data
,
c_float
(
min_val
),
c_float
(
max_val
),
None
,
)
)
lib_clip
()
# Now we can destroy the tensor descriptors
x_tensor
.
destroyDesc
(
lib
)
if
inplace
!=
Inplace
.
INPLACE_X
:
y_tensor
.
destroyDesc
(
lib
)
atol
,
rtol
=
get_tolerance
(
_TOLERANCE_MAP
,
dtype
)
if
DEBUG
or
not
torch
.
allclose
(
y
,
ans
,
atol
=
atol
,
rtol
=
rtol
):
print
(
"
\n
Expected:"
)
print
(
ans
)
print
(
"
\n
Actual:"
)
print
(
y
)
print
(
"
\n
Difference:"
)
print
(
torch
.
abs
(
y
-
ans
))
print
(
"
\n
Max difference:"
,
torch
.
max
(
torch
.
abs
(
y
-
ans
)).
item
())
debug
(
y
,
ans
,
atol
=
atol
,
rtol
=
rtol
)
assert
torch
.
allclose
(
y
,
ans
,
atol
=
atol
,
rtol
=
rtol
)
# Profiling workflow
if
PROFILE
:
# fmt: off
profile_operation
(
"PyTorch"
,
lambda
:
clip
(
x
,
min_val
,
max_val
),
torch_device
,
NUM_PRERUN
,
NUM_ITERATIONS
)
profile_operation
(
" lib"
,
lambda
:
lib_clip
(),
torch_device
,
NUM_PRERUN
,
NUM_ITERATIONS
)
# fmt: on
check_error
(
lib
.
infiniopDestroyClipDescriptor
(
descriptor
))
if
__name__
==
"__main__"
:
args
=
get_args
()
lib
=
open_lib
()
lib
.
infiniopCreateClipDescriptor
.
restype
=
c_int32
lib
.
infiniopCreateClipDescriptor
.
argtypes
=
[
infiniopHandle_t
,
POINTER
(
infiniopClipDescriptor_t
),
infiniopTensorDescriptor_t
,
infiniopTensorDescriptor_t
,
]
lib
.
infiniopGetClipWorkspaceSize
.
restype
=
c_int32
lib
.
infiniopGetClipWorkspaceSize
.
argtypes
=
[
infiniopClipDescriptor_t
,
POINTER
(
c_uint64
),
]
lib
.
infiniopClip
.
restype
=
c_int32
lib
.
infiniopClip
.
argtypes
=
[
infiniopClipDescriptor_t
,
c_void_p
,
c_uint64
,
c_void_p
,
c_void_p
,
c_float
,
c_float
,
c_void_p
,
]
lib
.
infiniopDestroyClipDescriptor
.
restype
=
c_int32
lib
.
infiniopDestroyClipDescriptor
.
argtypes
=
[
infiniopClipDescriptor_t
,
]
# Configure testing options
DEBUG
=
args
.
debug
PROFILE
=
args
.
profile
NUM_PRERUN
=
args
.
num_prerun
NUM_ITERATIONS
=
args
.
num_iterations
for
device
in
get_test_devices
(
args
):
test_operator
(
lib
,
device
,
test
,
_TEST_CASES
,
_TENSOR_DTYPES
)
print
(
"
\033
[92mTest passed!
\033
[0m"
)
test/infiniop/conv.py
View file @
9b32b4b1
...
...
@@ -95,6 +95,7 @@ def test(
dilations
,
tensor_stride
=
None
,
tensor_dtype
=
torch
.
float16
,
sync
=
None
):
assert
len
(
pads
)
==
len
(
strides
)
==
len
(
dilations
)
print
(
...
...
@@ -118,8 +119,11 @@ def test(
x_tensor
=
to_tensor
(
x
,
lib
)
w_tensor
=
to_tensor
(
w
,
lib
)
y_tensor
=
to_tensor
(
y
,
lib
)
descriptor
=
infiniopConvDescriptor_t
()
if
sync
is
not
None
:
sync
()
descriptor
=
infiniopConvDescriptor_t
()
check_error
(
lib
.
infiniopCreateConvDescriptor
(
handle
,
...
...
test/infiniop/expand.py
View file @
9b32b4b1
...
...
@@ -52,6 +52,7 @@ def test(
y_stride
=
None
,
x_stride
=
None
,
tensor_dtype
=
torch
.
float16
,
sync
=
None
):
print
(
f
"Testing Expand on
{
torch_device
}
with x_shape:
{
x_shape
}
y_shape:
{
y_shape
}
x_stride:
{
x_stride
}
y_stride:
{
y_stride
}
dtype:
{
tensor_dtype
}
"
...
...
@@ -76,8 +77,11 @@ def test(
x_tensor
=
to_tensor
(
x
,
lib
)
y_tensor
=
to_tensor
(
y
,
lib
)
descriptor
=
infiniopExpandDescriptor_t
()
if
sync
is
not
None
:
sync
()
descriptor
=
infiniopExpandDescriptor_t
()
check_error
(
lib
.
infiniopCreateExpandDescriptor
(
handle
,
...
...
test/infiniop/gemm.py
View file @
9b32b4b1
...
...
@@ -83,6 +83,7 @@ def test(
b_stride
=
None
,
c_stride
=
None
,
dtype
=
torch
.
float16
,
sync
=
None
):
print
(
f
"Testing Gemm on
{
torch_device
}
with alpha:
{
alpha
}
, beta:
{
beta
}
,"
...
...
@@ -104,6 +105,9 @@ def test(
]
a_tensor
,
b_tensor
,
c_tensor
=
[
to_tensor
(
tensor
,
lib
)
for
tensor
in
[
a
,
b
,
c
]]
if
sync
is
not
None
:
sync
()
descriptor
=
infiniopGemmDescriptor_t
()
check_error
(
lib
.
infiniopCreateGemmDescriptor
(
...
...
test/infiniop/global_avg_pool.py
View file @
9b32b4b1
...
...
@@ -51,6 +51,7 @@ def test(
torch_device
,
x_shape
,
tensor_dtype
=
torch
.
float16
,
sync
=
None
):
print
(
f
"Testing GlobalAvgPool on
{
torch_device
}
with input tensor_shape:
{
x_shape
}
dtype:
{
tensor_dtype
}
"
...
...
@@ -70,8 +71,11 @@ def test(
x_tensor
=
to_tensor
(
x
,
lib
)
y_tensor
=
to_tensor
(
y
,
lib
)
descriptor
=
infiniopGlobalAvgPoolDescriptor_t
()
if
sync
is
not
None
:
sync
()
descriptor
=
infiniopGlobalAvgPoolDescriptor_t
()
check_error
(
lib
.
infiniopCreateGlobalAvgPoolDescriptor
(
handle
,
...
...
test/infiniop/libinfiniop/utils.py
View file @
9b32b4b1
...
...
@@ -423,6 +423,7 @@ def test_operator(lib, device, test_func, test_cases, tensor_dtypes):
infiniDeviceEnum_str_map
[
device
],
*
test_case
,
tensor_dtype
,
get_sync_func
(
device
)
)
finally
:
destroy_handle
(
lib
,
handle
)
...
...
@@ -471,3 +472,15 @@ def get_test_devices(args):
devices_to_test
=
[
InfiniDeviceEnum
.
CPU
]
return
devices_to_test
def
get_sync_func
(
device
):
import
torch
device_str
=
infiniDeviceEnum_str_map
[
device
]
if
device
==
InfiniDeviceEnum
.
CPU
:
sync
=
None
else
:
sync
=
getattr
(
torch
,
device_str
).
synchronize
return
sync
test/infiniop/max_pool.py
View file @
9b32b4b1
...
...
@@ -83,6 +83,7 @@ def test(
padding
,
strides
,
tensor_dtype
=
torch
.
float16
,
sync
=
None
):
print
(
f
"Testing MaxPool on
{
torch_device
}
with x_shape:
{
x_shape
}
kernel_shape:
{
k_shape
}
padding:
{
padding
}
strides:
{
strides
}
dtype:
{
tensor_dtype
}
"
...
...
@@ -104,8 +105,11 @@ def test(
x_tensor
=
to_tensor
(
x
,
lib
)
y_tensor
=
to_tensor
(
y
,
lib
)
descriptor
=
infiniopMaxPoolDescriptor_t
()
if
sync
is
not
None
:
sync
()
descriptor
=
infiniopMaxPoolDescriptor_t
()
check_error
(
lib
.
infiniopCreateMaxPoolDescriptor
(
handle
,
...
...
test/infiniop/mlp.py
View file @
9b32b4b1
...
...
@@ -65,6 +65,7 @@ def test(
y_stride
=
None
,
w12_stride
=
None
,
w3_stride
=
None
,
sync
=
None
):
print
(
f
"Testing MLP on
{
torch_device
}
with num_tokens:
{
num_tokens
}
hidden_size:
{
hidden_size
}
intermediate_size:
{
intermediate_size
}
"
...
...
@@ -97,6 +98,10 @@ def test(
x_tensor
=
to_tensor
(
x
,
lib
)
w12_tensor
=
to_tensor
(
w12
,
lib
)
w3_tensor
=
to_tensor
(
w3
,
lib
)
if
sync
is
not
None
:
sync
()
descriptor
=
infiniopMLPDescriptor_t
()
check_error
(
lib
.
infiniopCreateMLPDescriptor
(
...
...
test/infiniop/random_sample.py
View file @
9b32b4b1
...
...
@@ -103,6 +103,7 @@ def test(
topk
,
temperature
,
dtype
=
torch
.
float16
,
sync
=
None
):
print
(
f
"Testing RandomSample on
{
torch_device
}
with voc:
{
voc
}
random_val:
{
random_val
}
topp:
{
topp
}
topk:
{
topk
}
temperature:
{
temperature
}
dtype:
{
dtype
}
"
...
...
@@ -122,6 +123,9 @@ def test(
indices_tensor
.
descriptor
.
contents
.
dt
=
InfiniDtype
.
U64
# treat int64 as uint64
if
sync
is
not
None
:
sync
()
descriptor
=
infiniopRandomSampleDescriptor_t
()
check_error
(
lib
.
infiniopCreateRandomSampleDescriptor
(
...
...
test/infiniop/rearrange.py
View file @
9b32b4b1
...
...
@@ -131,6 +131,7 @@ def test(
x_stride
,
y_stride
,
dtype
=
torch
.
float16
,
sync
=
None
):
print
(
f
"Testing Rerrange on
{
torch_device
}
with shape:
{
shape
}
x_stride:
{
x_stride
}
y_stride:
{
y_stride
}
dtype:
{
dtype
}
"
...
...
@@ -145,6 +146,9 @@ def test(
]
x_tensor
,
y_tensor
=
[
to_tensor
(
tensor
,
lib
)
for
tensor
in
[
x
,
y
]]
if
sync
is
not
None
:
sync
()
descriptor
=
infiniopRearrangeDescriptor_t
()
check_error
(
...
...
test/infiniop/relu.py
View file @
9b32b4b1
...
...
@@ -55,6 +55,7 @@ def test(
tensor_shape
,
tensor_dtype
=
torch
.
float16
,
inplace
=
Inplace
.
OUT_OF_PLACE
,
sync
=
None
):
print
(
f
"Testing Relu on
{
torch_device
}
with tensor_shape:
{
tensor_shape
}
dtype:
{
tensor_dtype
}
inplace:
{
inplace
.
name
}
"
...
...
@@ -78,8 +79,11 @@ def test(
x_tensor
=
to_tensor
(
x
,
lib
)
y_tensor
=
to_tensor
(
y
,
lib
)
if
inplace
==
Inplace
.
OUT_OF_PLACE
else
x_tensor
descriptor
=
infiniopReluDescriptor_t
()
if
sync
is
not
None
:
sync
()
descriptor
=
infiniopReluDescriptor_t
()
check_error
(
lib
.
infiniopCreateReluDescriptor
(
handle
,
...
...
test/infiniop/rms_norm.py
View file @
9b32b4b1
...
...
@@ -72,6 +72,7 @@ def test(
x_stride
,
w_dtype
=
torch
.
float16
,
dtype
=
torch
.
float16
,
sync
=
None
):
print
(
f
"Testing RMS_Norm on
{
torch_device
}
with y_shape:
{
y_shape
}
x_shape:
{
x_shape
}
w_shape:
{
w_shape
}
"
...
...
@@ -89,9 +90,11 @@ def test(
rearrange_if_needed
(
tensor
,
stride
)
for
tensor
,
stride
in
zip
([
x
,
y
],
[
x_stride
,
y_stride
])
]
x_tensor
,
y_tensor
,
w_tensor
=
[
to_tensor
(
tensor
,
lib
)
for
tensor
in
[
x
,
y
,
w
]]
if
sync
is
not
None
:
sync
()
descriptor
=
infiniopRMSNormDescriptor_t
()
check_error
(
...
...
test/infiniop/rope.py
View file @
9b32b4b1
...
...
@@ -117,6 +117,7 @@ def test(
y_strides
=
None
,
inplace
=
Inplace
.
OUT_OF_PLACE
,
dtype
=
torch
.
float32
,
sync
=
None
):
if
inplace
==
Inplace
.
INPLACE_X
:
y_strides
=
x_strides
...
...
@@ -147,8 +148,8 @@ def test(
else
:
y_tensor
=
to_tensor
(
y
,
lib
)
if
torch_device
==
"npu"
:
sync
hronize_device
(
torch_device
)
if
sync
is
not
None
:
sync
(
)
check_error
(
lib
.
infiniopCreateRoPEDescriptor
(
...
...
@@ -188,6 +189,9 @@ def test(
)
lib_rope
()
if
sync
is
not
None
:
sync
()
atol
,
rtol
=
get_tolerance
(
_TOLERANCE_MAP
,
dtype
)
if
DEBUG
:
...
...
Prev
1
2
3
4
5
6
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment