Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
infinicore
Commits
8d09630a
Unverified
Commit
8d09630a
authored
Feb 11, 2026
by
gongchensu
Committed by
GitHub
Feb 11, 2026
Browse files
Merge branch 'demo131' into Issue/862
parents
ab52dead
012df56c
Changes
387
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1504 additions
and
710 deletions
+1504
-710
test/infinicore/nn/embedding.py
test/infinicore/nn/embedding.py
+12
-12
test/infinicore/nn/linear.py
test/infinicore/nn/linear.py
+7
-2
test/infinicore/nn/rmsnorm.py
test/infinicore/nn/rmsnorm.py
+7
-2
test/infinicore/nn/rope.py
test/infinicore/nn/rope.py
+7
-2
test/infinicore/ops/adaptive_max_pool2d.py
test/infinicore/ops/adaptive_max_pool2d.py
+2
-1
test/infinicore/ops/add_rms_norm.py
test/infinicore/ops/add_rms_norm.py
+195
-0
test/infinicore/ops/embedding.py
test/infinicore/ops/embedding.py
+4
-18
test/infinicore/ops/flash_attention.py
test/infinicore/ops/flash_attention.py
+115
-0
test/infinicore/ops/kv_caching.py
test/infinicore/ops/kv_caching.py
+134
-0
test/infinicore/ops/paged_attention.py
test/infinicore/ops/paged_attention.py
+200
-0
test/infinicore/ops/paged_attention_prefill.py
test/infinicore/ops/paged_attention_prefill.py
+248
-0
test/infinicore/ops/paged_caching.py
test/infinicore/ops/paged_caching.py
+181
-0
test/infinicore/ops/random_sample.py
test/infinicore/ops/random_sample.py
+6
-6
test/infinicore/ops/silu_and_mul.py
test/infinicore/ops/silu_and_mul.py
+126
-0
test/infinicore/ops/sort.py
test/infinicore/ops/sort.py
+3
-2
test/infinicore/ops/std.py
test/infinicore/ops/std.py
+3
-2
test/infinicore/run.py
test/infinicore/run.py
+227
-654
test/infinicore/tensor/narrow.py
test/infinicore/tensor/narrow.py
+9
-3
test/infinicore/tensor/squeeze.py
test/infinicore/tensor/squeeze.py
+9
-3
test/infinicore/tensor/unsqueeze.py
test/infinicore/tensor/unsqueeze.py
+9
-3
No files found.
test/infinicore/nn/embedding.py
View file @
8d09630a
...
...
@@ -4,10 +4,15 @@ import sys
sys
.
path
.
insert
(
0
,
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
".."
))
import
torch
from
framework.base
import
BaseOperatorTest
,
TensorSpec
,
TestCase
from
framework.runner
import
GenericTestRunner
from
framework.tensor
import
TensorInitializer
from
framework.utils
import
convert_infinicore_to_torch
from
framework
import
(
BaseOperatorTest
,
GenericTestRunner
,
TensorInitializer
,
TensorSpec
,
TestCase
,
convert_infinicore_to_torch
,
)
import
infinicore
...
...
@@ -109,14 +114,9 @@ class OpTest(BaseOperatorTest):
def
infinicore_operator
(
self
,
x
,
weight
):
"""InfiniCore nn.Embedding implementation"""
if
x
.
device
.
type
!=
"cpu"
:
# 将 input的数据 转移到 cpu 上
x_torch
=
convert_infinicore_to_torch
(
x
)
x_torch_cpu
=
x_torch
.
contiguous
().
cpu
()
x
=
infinicore
.
from_torch
(
x_torch_cpu
)
# Note: embedding now supports device-side input for graph recording
# No need to convert to CPU anymore - the implementation handles both CPU and device inputs
num_embeddings
,
embedding_dim
=
weight
.
shape
model
=
infinicore
.
nn
.
Embedding
(
...
...
test/infinicore/nn/linear.py
View file @
8d09630a
...
...
@@ -4,8 +4,13 @@ import sys
sys
.
path
.
insert
(
0
,
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
".."
))
import
torch
from
framework.base
import
BaseOperatorTest
,
TensorSpec
,
TestCase
from
framework.runner
import
GenericTestRunner
from
framework
import
(
BaseOperatorTest
,
GenericTestRunner
,
TensorSpec
,
TestCase
)
import
infinicore
...
...
test/infinicore/nn/rmsnorm.py
View file @
8d09630a
...
...
@@ -4,8 +4,13 @@ import sys
sys
.
path
.
insert
(
0
,
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
".."
))
import
torch
from
framework.base
import
BaseOperatorTest
,
TensorSpec
,
TestCase
from
framework.runner
import
GenericTestRunner
from
framework
import
(
BaseOperatorTest
,
GenericTestRunner
,
TensorSpec
,
TestCase
)
import
infinicore
...
...
test/infinicore/nn/rope.py
View file @
8d09630a
...
...
@@ -4,8 +4,13 @@ import sys
sys
.
path
.
insert
(
0
,
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
".."
))
import
torch
from
framework.base
import
BaseOperatorTest
,
TensorSpec
,
TestCase
from
framework.runner
import
GenericTestRunner
from
framework
import
(
BaseOperatorTest
,
GenericTestRunner
,
TensorSpec
,
TestCase
)
from
infinicore.nn.functional
import
RopeAlgo
import
infinicore
...
...
test/infinicore/ops/adaptive_max_pool2d.py
View file @
8d09630a
...
...
@@ -7,6 +7,7 @@ import torch
import
infinicore
from
framework
import
(
BaseOperatorTest
,
CaseResult
,
TensorSpec
,
TestCase
,
GenericTestRunner
,
...
...
@@ -76,7 +77,7 @@ class OpTest(BaseOperatorTest):
and
isinstance
(
test_case
.
inputs
[
0
],
TensorSpec
)
and
test_case
.
inputs
[
0
].
strides
is
not
None
):
return
Test
Result
(
return
Case
Result
(
success
=
False
,
return_code
=-
2
,
test_case
=
test_case
,
...
...
test/infinicore/ops/add_rms_norm.py
0 → 100644
View file @
8d09630a
import
sys
import
os
sys
.
path
.
insert
(
0
,
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
".."
))
import
torch
import
infinicore
from
framework
import
(
BaseOperatorTest
,
TensorSpec
,
TestCase
,
GenericTestRunner
,
is_broadcast
,
)
# ==============================================================================
# Operator-specific configuration
# ==============================================================================
# Test cases format: (y_shape, a_shape, b_shape, w_shape, y_strides, a_strides, b_strides)
_TEST_CASES_DATA
=
[
# Basic cases
((
1
,
4
),
(
1
,
4
),
(
1
,
4
),
(
4
,),
None
,
None
,
None
),
((
2
,
4
),
(
2
,
4
),
(
2
,
4
),
(
4
,),
None
,
None
,
None
),
((
2
,
2
,
4
),
(
2
,
2
,
4
),
(
2
,
2
,
4
),
(
4
,),
None
,
None
,
None
),
# Strided cases
((
2
,
2
,
4
),
(
2
,
2
,
4
),
(
2
,
2
,
4
),
(
4
,),
(
12
,
8
,
1
),
(
12
,
8
,
1
),
(
12
,
8
,
1
)),
# Large tensors
((
16
,
2048
),
(
16
,
2048
),
(
16
,
2048
),
(
2048
,),
None
,
None
,
None
),
((
16
,
2048
),
(
16
,
2048
),
(
16
,
2048
),
(
2048
,),
(
4096
,
1
),
(
4096
,
1
),
(
4096
,
1
)),
((
15
,
3584
),
(
15
,
3584
),
(
15
,
3584
),
(
3584
,),
None
,
None
,
None
),
((
4
,
4
,
2048
),
(
4
,
4
,
2048
),
(
4
,
4
,
2048
),
(
2048
,),
None
,
None
,
None
),
(
(
4
,
4
,
2048
),
(
4
,
4
,
2048
),
(
4
,
4
,
2048
),
(
2048
,),
(
2048
,
8192
,
1
),
(
2048
,
8192
,
1
),
(
2048
,
8192
,
1
),
),
(
(
4
,
4
,
2048
),
(
4
,
4
,
2048
),
(
4
,
4
,
2048
),
(
2048
,),
(
16384
,
4096
,
1
),
(
16384
,
4096
,
1
),
(
16384
,
4096
,
1
),
),
]
# Tolerance configuration
_TOLERANCE_MAP
=
{
infinicore
.
float16
:
{
"atol"
:
2e-3
,
"rtol"
:
2e-3
},
infinicore
.
bfloat16
:
{
"atol"
:
1e-2
,
"rtol"
:
1e-2
},
infinicore
.
float32
:
{
"atol"
:
1e-5
,
"rtol"
:
1e-4
},
}
# Data types for individual tensors
_INPUT_DTYPES
=
[
infinicore
.
float16
,
infinicore
.
bfloat16
]
_WEIGHT_DTYPES
=
[
infinicore
.
float16
,
infinicore
.
bfloat16
,
infinicore
.
float32
]
# EPSILON constant for AddRMSNorm
_EPSILON
=
1e-5
def
parse_test_cases
():
"""
Parse AddRMSNorm test case data and return list of TestCase objects.
Format: (y_shape, a_shape, b_shape, w_shape, y_strides, a_strides, b_strides)
"""
test_cases
=
[]
for
data
in
_TEST_CASES_DATA
:
y_shape
=
data
[
0
]
# Output shape
a_shape
=
data
[
1
]
# First input shape
b_shape
=
data
[
2
]
# Second input shape
w_shape
=
data
[
3
]
# Weight shape (1D)
y_strides
=
data
[
4
]
if
len
(
data
)
>
4
else
None
a_strides
=
data
[
5
]
if
len
(
data
)
>
5
else
None
b_strides
=
data
[
6
]
if
len
(
data
)
>
6
else
None
# Check if tensors support in-place operations
a_supports_inplace
=
not
is_broadcast
(
a_strides
)
b_supports_inplace
=
not
is_broadcast
(
b_strides
)
y_supports_inplace
=
not
is_broadcast
(
y_strides
)
# Generate test cases for all dtype combinations
for
input_dtype
in
_INPUT_DTYPES
:
for
weight_dtype
in
_WEIGHT_DTYPES
:
# Use input dtype tolerance for output
tolerance
=
_TOLERANCE_MAP
.
get
(
input_dtype
,
{
"atol"
:
1e-5
,
"rtol"
:
1e-4
}
)
# Create typed tensor specs
a_spec
=
TensorSpec
.
from_tensor
(
a_shape
,
a_strides
,
input_dtype
)
b_spec
=
TensorSpec
.
from_tensor
(
b_shape
,
b_strides
,
input_dtype
)
w_spec
=
TensorSpec
.
from_tensor
(
w_shape
,
None
,
weight_dtype
)
# Weight is always contiguous
y_spec
=
TensorSpec
.
from_tensor
(
y_shape
,
y_strides
,
input_dtype
)
# Test Case 1: Out-of-place (return value) - returns (normalized_result, add_result)
residual_out_spec
=
TensorSpec
.
from_tensor
(
a_shape
,
a_strides
,
input_dtype
)
test_cases
.
append
(
TestCase
(
inputs
=
[
a_spec
,
b_spec
,
w_spec
],
kwargs
=
{
"epsilon"
:
_EPSILON
},
output_specs
=
None
,
# Two outputs
comparison_target
=
None
,
tolerance
=
tolerance
,
output_count
=
2
,
# Two outputs: normalized_result and add_result
description
=
f
"AddRMSNorm - OUT_OF_PLACE"
,
)
)
# Test Case 2: In-place with explicit output tensors (add_rms_norm_(y, residual_out, a, b, w))
# if y_supports_inplace:
# residual_out_spec = TensorSpec.from_tensor(
# a_shape, a_strides, input_dtype
# )
# test_cases.append(
# TestCase(
# inputs=[a_spec, b_spec, w_spec],
# kwargs={
# "epsilon": _EPSILON,
# "out": y_spec,
# "residual": residual_out_spec,
# },
# output_specs=[y_spec, residual_out_spec], # Two outputs
# comparison_target="out",
# tolerance=tolerance,
# output_count=2,
# description=f"AddRMSNorm - INPLACE(out)",
# )
# )
return
test_cases
class
OpTest
(
BaseOperatorTest
):
"""AddRMSNorm operator test with simplified implementation"""
def
__init__
(
self
):
super
().
__init__
(
"AddRMSNorm"
)
def
get_test_cases
(
self
):
return
parse_test_cases
()
def
torch_operator
(
self
,
a
,
b
,
weight
,
epsilon
=
_EPSILON
,
out
=
None
,
residual
=
None
,
**
kwargs
):
"""PyTorch AddRMSNorm implementation - returns (normalized_result, add_result)"""
input_dtype
=
a
.
dtype
# Compute add(a, b)
sum_tensor
=
a
.
to
(
torch
.
float32
)
+
b
.
to
(
torch
.
float32
)
weight_fp32
=
weight
.
to
(
torch
.
float32
)
# Calculate RMSNorm: (a + b) * weight / sqrt(mean((a+b)^2) + epsilon)
variance
=
sum_tensor
.
pow
(
2
).
mean
(
-
1
,
keepdim
=
True
)
normalized_result
=
sum_tensor
*
torch
.
rsqrt
(
variance
+
epsilon
)
*
weight_fp32
# Convert back to original dtype
normalized_result
=
normalized_result
.
to
(
input_dtype
)
add_result
=
sum_tensor
.
to
(
input_dtype
)
if
out
is
not
None
:
out
.
copy_
(
normalized_result
)
if
residual
is
not
None
:
residual
.
copy_
(
add_result
)
return
(
normalized_result
,
add_result
)
def
infinicore_operator
(
self
,
a
,
b
,
weight
,
epsilon
=
_EPSILON
,
out
=
None
,
residual
=
None
,
**
kwargs
):
"""InfiniCore AddRMSNorm implementation - returns (normalized_result, add_result)"""
return
infinicore
.
add_rms_norm
(
a
,
b
,
weight
,
epsilon
,
out
=
out
,
residual
=
residual
)
def
main
():
"""Main entry point"""
runner
=
GenericTestRunner
(
OpTest
)
runner
.
run_and_exit
()
if
__name__
==
"__main__"
:
main
()
test/infinicore/ops/embedding.py
View file @
8d09630a
...
...
@@ -6,7 +6,7 @@ sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
import
torch
from
framework
import
BaseOperatorTest
,
TensorSpec
,
TestCase
,
GenericTestRunner
from
framework.tensor
import
TensorInitializer
from
framework.utils
import
(
from
framework.utils
.tensor_utils
import
(
convert_infinicore_to_torch
,
infinicore_tensor_from_torch
,
to_torch_dtype
,
...
...
@@ -102,23 +102,9 @@ class OpTest(BaseOperatorTest):
def
infinicore_operator
(
self
,
input
,
weight
,
out
=
None
,
**
kwargs
):
"""InfiniCore Embedding implementation"""
if
input
.
device
.
type
==
"cpu"
:
input_cpu
=
input
else
:
# 将 input的数据 转移到 cpu 上
torch_reference
=
torch
.
zeros
(
input
.
shape
,
dtype
=
to_torch_dtype
(
input
.
dtype
),
device
=
"cpu"
if
"cpu"
==
input
.
device
.
type
else
"cuda"
,
)
torch_reference
=
convert_infinicore_to_torch
(
input
)
torch_reference
=
torch_reference
.
contiguous
().
cpu
()
# 创建cpu的 input
input_cpu
=
infinicore_tensor_from_torch
(
torch_reference
)
return
infinicore
.
nn
.
functional
.
embedding
(
input_cpu
,
weight
,
out
=
out
)
# Note: embedding now supports device-side input for graph recording
# No need to convert to CPU anymore - the implementation handles both CPU and device inputs
return
infinicore
.
nn
.
functional
.
embedding
(
input
,
weight
,
out
=
out
)
def
main
():
...
...
test/infinicore/ops/flash_attention.py
0 → 100644
View file @
8d09630a
import
sys
import
os
sys
.
path
.
insert
(
0
,
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
".."
))
import
torch
import
infinicore
from
framework
import
(
BaseOperatorTest
,
TensorSpec
,
TensorInitializer
,
TestCase
,
GenericTestRunner
,
)
# Test cases format: (q_shape, k_shape, v_shape, attn_mask_or_None, dropout_p, is_causal)
# q/k/v typically have shape (..., seq_len, head_dim) or (batch, seq_len, num_heads, head_dim)
_TEST_CASES_DATA
=
[
((
1
,
1
,
2
,
16
),
(
1
,
1
,
8
,
16
),
(
1
,
1
,
8
,
16
),
None
,
0.0
,
False
),
((
1
,
2
,
128
,
16
),
(
1
,
2
,
256
,
16
),
(
1
,
2
,
256
,
16
),
None
,
0.0
,
False
),
((
1
,
1
,
4
,
32
),
(
1
,
1
,
32
,
32
),
(
1
,
1
,
32
,
32
),
None
,
0.0
,
True
),
((
1
,
8
,
256
,
16
),
(
1
,
8
,
512
,
16
),
(
1
,
8
,
512
,
16
),
None
,
0.0
,
True
),
((
1
,
8
,
4
,
16
),
(
1
,
8
,
64
,
16
),
(
1
,
8
,
64
,
16
),
None
,
0.0
,
False
),
((
8
,
28
,
256
,
128
),
(
8
,
28
,
512
,
128
),
(
8
,
28
,
512
,
128
),
None
,
0.0
,
True
),
]
_TOLERANCE_MAP
=
{
infinicore
.
float16
:
{
"atol"
:
1e-2
,
"rtol"
:
1e-2
},
infinicore
.
bfloat16
:
{
"atol"
:
1e-2
,
"rtol"
:
1e-2
},
infinicore
.
float32
:
{
"atol"
:
1e-3
,
"rtol"
:
1e-3
},
}
_TENSOR_DTYPES
=
[
infinicore
.
float16
,
infinicore
.
bfloat16
,
infinicore
.
float32
]
def
parse_test_cases
():
import
random
cases
=
[]
for
q_shape
,
k_shape
,
v_shape
,
attn_mask
,
dropout_p
,
is_causal
in
_TEST_CASES_DATA
:
for
dtype
in
_TENSOR_DTYPES
:
tol
=
_TOLERANCE_MAP
[
dtype
]
q_spec
=
TensorSpec
.
from_tensor
(
q_shape
,
None
,
dtype
)
k_spec
=
TensorSpec
.
from_tensor
(
k_shape
,
None
,
dtype
)
v_spec
=
TensorSpec
.
from_tensor
(
v_shape
,
None
,
dtype
)
len_shape
=
(
q_shape
[
0
],)
total_len
=
random
.
randint
(
1
,
k_shape
[
2
])
total_kv_len_spec
=
TensorSpec
.
from_tensor
(
len_shape
,
None
,
infinicore
.
int64
,
init_mode
=
TensorInitializer
.
RANDINT
,
low
=
total_len
,
high
=
total_len
+
1
,
)
kwargs
=
{
"attn_mask"
:
attn_mask
,
"dropout_p"
:
dropout_p
,
"is_causal"
:
is_causal
,
}
# remove None keys
kwargs
=
{
k
:
v
for
k
,
v
in
kwargs
.
items
()
if
v
is
not
None
}
cases
.
append
(
TestCase
(
inputs
=
[
q_spec
,
k_spec
,
v_spec
,
total_kv_len_spec
,
total_len
],
kwargs
=
kwargs
,
output_spec
=
None
,
comparison_target
=
None
,
tolerance
=
tol
,
description
=
"Flash Attention"
,
)
)
return
cases
def
torch_flash_attn
(
q
,
k
,
v
,
total_kv_len
,
cheat
,
**
kwargs
):
k_slice
=
k
[:,
:,
:
cheat
,
:]
v_slice
=
v
[:,
:,
:
cheat
,
:]
return
torch
.
nn
.
functional
.
scaled_dot_product_attention
(
q
,
k_slice
,
v_slice
,
**
kwargs
)
def
infini_flash_attn
(
q
,
k
,
v
,
total_kv_len
,
cheat
,
**
kwargs
):
return
infinicore
.
nn
.
functional
.
flash_attention
(
q
,
k
,
v
,
total_kv_len
,
**
kwargs
)
class
OpTest
(
BaseOperatorTest
):
"""ScaledDotProductAttention operator test with simplified implementation"""
def
__init__
(
self
):
super
().
__init__
(
"ScaledDotProductAttention"
)
def
get_test_cases
(
self
):
return
parse_test_cases
()
def
torch_operator
(
self
,
*
args
,
**
kwargs
):
return
torch_flash_attn
(
*
args
,
**
kwargs
)
def
infinicore_operator
(
self
,
*
args
,
**
kwargs
):
return
infini_flash_attn
(
*
args
,
**
kwargs
)
def
main
():
"""Main entry point"""
runner
=
GenericTestRunner
(
OpTest
)
runner
.
run_and_exit
()
if
__name__
==
"__main__"
:
main
()
test/infinicore/ops/kv_caching.py
0 → 100644
View file @
8d09630a
import
sys
import
os
sys
.
path
.
insert
(
0
,
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
".."
))
import
torch
import
infinicore
from
framework
import
(
BaseOperatorTest
,
TensorSpec
,
TensorInitializer
,
TestCase
,
GenericTestRunner
,
is_broadcast
,
)
# ==============================================================================
# Operator-specific configuration
# ==============================================================================
# Test cases format: (shape (bs, nkvh, seq_len, hd), strides)
_TEST_CASES_DATA
=
[
((
1
,
1
,
8
,
1
),
None
),
((
1
,
8
,
32
,
32
),
None
),
((
8
,
8
,
64
,
32
),
None
),
((
1
,
32
,
8
,
64
),
(
32768
,
1024
,
64
,
1
)),
((
4
,
8
,
32
,
16
),
(
65536
,
8192
,
256
,
16
)),
((
8
,
16
,
64
,
128
),
(
8388608
,
524288
,
8192
,
1
)),
]
# Tolerance configuration
_TOLERANCE_MAP
=
{
infinicore
.
float16
:
{
"atol"
:
0
,
"rtol"
:
0
},
infinicore
.
bfloat16
:
{
"atol"
:
0
,
"rtol"
:
0
},
infinicore
.
float32
:
{
"atol"
:
0
,
"rtol"
:
0
},
}
# Data types to test
_TENSOR_DTYPES
=
[
infinicore
.
float16
,
infinicore
.
bfloat16
,
infinicore
.
float32
]
def
parse_test_cases
():
test_cases
=
[]
for
data
in
_TEST_CASES_DATA
:
import
random
cache_shape
=
data
[
0
]
kv_shape
=
(
cache_shape
[
0
],
cache_shape
[
1
],
random
.
randint
(
1
,
cache_shape
[
2
]),
cache_shape
[
3
],
)
past_shape
=
(
cache_shape
[
0
],)
strides
=
data
[
1
]
past_length
=
random
.
randint
(
0
,
cache_shape
[
2
]
-
kv_shape
[
2
])
for
dtype
in
_TENSOR_DTYPES
:
tolerance
=
_TOLERANCE_MAP
.
get
(
dtype
,
{
"atol"
:
0
,
"rtol"
:
0
})
cache_spec
=
TensorSpec
.
from_tensor
(
cache_shape
,
strides
,
dtype
)
kv_spec
=
TensorSpec
.
from_tensor
(
kv_shape
,
None
,
dtype
)
past_kv_lengths_spec
=
TensorSpec
.
from_tensor
(
past_shape
,
None
,
infinicore
.
int64
,
init_mode
=
TensorInitializer
.
RANDINT
,
low
=
past_length
,
high
=
past_length
+
1
,
)
test_cases
.
append
(
TestCase
(
inputs
=
[
cache_spec
,
cache_spec
,
kv_spec
,
kv_spec
,
past_kv_lengths_spec
,
],
kwargs
=
{},
output_spec
=
None
,
comparison_target
=
[
0
,
1
],
tolerance
=
tolerance
,
description
=
f
"KV Caching"
,
)
)
return
test_cases
def
torch_kv_caching
(
k_cache
,
v_cache
,
k
,
v
,
past_kv_lengths
):
batch_size
,
num_kv_heads
,
_
,
head_dim
=
k_cache
.
shape
seq_len
=
k
.
shape
[
2
]
for
b
in
range
(
batch_size
):
past_len
=
past_kv_lengths
[
b
].
item
()
for
h
in
range
(
num_kv_heads
):
k_cache
[
b
,
h
,
past_len
:
past_len
+
seq_len
,
:]
=
k
[
b
,
h
,
:,
:]
v_cache
[
b
,
h
,
past_len
:
past_len
+
seq_len
,
:]
=
v
[
b
,
h
,
:,
:]
return
k_cache
,
v_cache
def
infinicore_kv_caching
(
k_cache
,
v_cache
,
k
,
v
,
past_kv_lengths
):
infinicore
.
kv_caching
(
k_cache
,
v_cache
,
k
,
v
,
past_kv_lengths
)
return
k_cache
,
v_cache
class
OpTest
(
BaseOperatorTest
):
def
__init__
(
self
):
super
().
__init__
(
"KV Caching"
)
def
get_test_cases
(
self
):
return
parse_test_cases
()
def
torch_operator
(
self
,
*
args
,
**
kwargs
):
return
torch_kv_caching
(
*
args
,
**
kwargs
)
def
infinicore_operator
(
self
,
*
args
,
**
kwargs
):
return
infinicore_kv_caching
(
*
args
,
**
kwargs
)
def
main
():
test_runner
=
GenericTestRunner
(
OpTest
)
test_runner
.
run_and_exit
()
if
__name__
==
"__main__"
:
main
()
test/infinicore/ops/paged_attention.py
0 → 100644
View file @
8d09630a
import
sys
import
os
sys
.
path
.
insert
(
0
,
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
".."
))
import
torch
import
infinicore
from
framework
import
(
BaseOperatorTest
,
TensorSpec
,
TestCase
,
GenericTestRunner
,
is_broadcast
,
TensorInitializer
,
)
# ==============================================================================
# Operator-specific configuration
# ==============================================================================
# Test cases format:
_TEST_CASES_DATA
=
[
# (num_seqs, num_heads, num_kv_heads, head_size, block_size, max_seq_len, use_alibi)
(
1
,
1
,
1
,
128
,
16
,
15
,
False
),
(
4
,
40
,
40
,
128
,
16
,
1024
,
False
),
(
6
,
40
,
40
,
128
,
16
,
1024
,
False
),
(
3
,
8
,
8
,
128
,
16
,
1024
,
False
),
(
3
,
8
,
8
,
64
,
16
,
1024
,
False
),
(
8
,
64
,
8
,
128
,
16
,
2048
,
False
),
]
# Tolerance configuration
_TOLERANCE_MAP
=
{
infinicore
.
float16
:
{
"atol"
:
0
,
"rtol"
:
1e-2
},
infinicore
.
float32
:
{
"atol"
:
1e-4
,
"rtol"
:
1e-3
},
infinicore
.
bfloat16
:
{
"atol"
:
0
,
"rtol"
:
5e-2
},
}
# Data types to test
_TENSOR_DTYPES
=
[
infinicore
.
float16
,
infinicore
.
bfloat16
]
# ==============================================================================
# Reference Implementation
# ==============================================================================
def
parse_test_cases
():
"""
Parse test case data and return list of TestCase objects for paged_attention operation.
Each test case contains all necessary information for execution and validation.
"""
test_cases
=
[]
for
(
num_seqs
,
num_heads
,
num_kv_heads
,
head_size
,
block_size
,
max_seq_len
,
use_alibi
,
)
in
_TEST_CASES_DATA
:
scale
=
1.0
/
(
head_size
**
0.5
)
max_blocks_per_seq
=
(
max_seq_len
+
block_size
-
1
)
//
block_size
num_blocks
=
num_seqs
*
max_blocks_per_seq
# A reasonable number for testing
cache_lens_torch
=
torch
.
randint
(
1
,
max_seq_len
,
(
num_seqs
,),
dtype
=
torch
.
int64
)
block_tables
=
torch
.
arange
(
0
,
num_seqs
*
max_blocks_per_seq
,
dtype
=
torch
.
int64
).
view
(
num_seqs
,
max_blocks_per_seq
)
q_shape
=
(
num_seqs
,
num_heads
,
head_size
)
k_cache_shape
=
(
num_blocks
,
num_kv_heads
,
block_size
,
head_size
)
v_cache_shape
=
(
num_blocks
,
num_kv_heads
,
block_size
,
head_size
)
block_tables_shape
=
block_tables
.
shape
cache_lens_shape
=
cache_lens_torch
.
shape
# Generate test cases for all data types
for
dtype
in
_TENSOR_DTYPES
:
tolerance
=
_TOLERANCE_MAP
.
get
(
dtype
,
{
"atol"
:
0
,
"rtol"
:
1e-3
})
# Create typed tensor specs
q_spec
=
TensorSpec
.
from_tensor
(
q_shape
,
None
,
dtype
)
k_cache_spec
=
TensorSpec
.
from_tensor
(
k_cache_shape
,
None
,
dtype
)
v_cache_spec
=
TensorSpec
.
from_tensor
(
v_cache_shape
,
None
,
dtype
)
block_tables_spec
=
TensorSpec
.
from_tensor
(
block_tables_shape
,
init_mode
=
TensorInitializer
.
MANUAL
,
set_tensor
=
block_tables
,
dtype
=
infinicore
.
int64
,
)
cache_lens_spec
=
TensorSpec
.
from_tensor
(
cache_lens_shape
,
init_mode
=
TensorInitializer
.
MANUAL
,
set_tensor
=
cache_lens_torch
,
dtype
=
infinicore
.
int64
,
)
# Paged attention operation: returns output tensor
out_shape
=
(
num_seqs
,
num_heads
,
head_size
)
out_spec
=
TensorSpec
.
from_tensor
(
out_shape
,
None
,
dtype
)
test_cases
.
append
(
TestCase
(
inputs
=
[
q_spec
,
k_cache_spec
,
v_cache_spec
,
block_tables_spec
,
cache_lens_spec
,
],
kwargs
=
{
"alibi_slopes"
:
None
,
"scale"
:
scale
},
output_spec
=
None
,
comparison_target
=
0
,
tolerance
=
tolerance
,
description
=
f
"PagedAttention"
,
)
)
return
test_cases
def
ref_masked_attention
(
query
,
key
,
value
,
scale
,
attn_mask
=
None
):
# Reference implementation for a single masked attention head.
attn_weights
=
scale
*
torch
.
einsum
(
"qhd,khd->hqk"
,
query
,
key
).
float
()
if
attn_mask
is
not
None
:
attn_weights
=
attn_weights
+
attn_mask
.
float
()
attn_weights
=
torch
.
nn
.
functional
.
softmax
(
attn_weights
,
dim
=-
1
).
to
(
value
.
dtype
)
out
=
torch
.
einsum
(
"hqk,khd->qhd"
,
attn_weights
,
value
)
return
out
def
ref_single_query_cached_kv_attention
(
query
,
key_cache
,
value_cache
,
block_tables
,
cache_lens
,
alibi_slopes
,
scale
):
# Reference implementation for paged attention, iterating through each sequence.
output
=
torch
.
empty_like
(
query
)
num_query_heads
,
num_kv_heads
=
query
.
shape
[
1
],
value_cache
.
shape
[
1
]
num_queries_per_kv
=
num_query_heads
//
num_kv_heads
head_size
,
block_size
=
value_cache
.
shape
[
3
],
value_cache
.
shape
[
2
]
num_seqs
=
query
.
shape
[
0
]
for
i
in
range
(
num_seqs
):
q
=
query
[
i
].
unsqueeze
(
0
)
seq_len
=
cache_lens
[
i
].
item
()
block_table
=
block_tables
[
i
]
keys_lst
,
values_lst
=
[],
[]
for
j
in
range
(
seq_len
):
block_num
=
block_table
[
j
//
block_size
].
item
()
block_off
=
j
%
block_size
k
=
key_cache
[
block_num
,
:,
block_off
,
:]
v
=
value_cache
[
block_num
,
:,
block_off
,
:]
keys_lst
.
append
(
k
)
values_lst
.
append
(
v
)
keys
=
torch
.
stack
(
keys_lst
,
dim
=
0
)
values
=
torch
.
stack
(
values_lst
,
dim
=
0
)
if
num_queries_per_kv
>
1
:
keys
=
torch
.
repeat_interleave
(
keys
,
num_queries_per_kv
,
dim
=
1
)
values
=
torch
.
repeat_interleave
(
values
,
num_queries_per_kv
,
dim
=
1
)
alibi_bias
=
None
if
alibi_slopes
is
not
None
:
pos
=
torch
.
arange
(
seq_len
,
device
=
query
.
device
).
int
()
alibi_bias
=
(
pos
-
seq_len
+
1
).
float
()
alibi_bias
=
alibi_slopes
.
view
(
-
1
,
1
,
1
)
*
alibi_bias
.
view
(
1
,
1
,
-
1
)
out
=
ref_masked_attention
(
q
,
keys
,
values
,
scale
,
alibi_bias
)
output
[
i
]
=
out
.
view
(
num_query_heads
,
head_size
)
return
output
class
OpTest
(
BaseOperatorTest
):
"""PagedAttention operator test with simplified implementation"""
def
__init__
(
self
):
super
().
__init__
(
"PagedAttention"
)
def
get_test_cases
(
self
):
return
parse_test_cases
()
def
torch_operator
(
self
,
*
args
,
**
kwargs
):
"""PyTorch paged_caching implementation"""
return
ref_single_query_cached_kv_attention
(
*
args
,
**
kwargs
)
def
infinicore_operator
(
self
,
*
args
,
**
kwargs
):
"""InfiniCore paged_attention implementation"""
out
=
infinicore
.
paged_attention
(
*
args
,
**
kwargs
)
infinicore
.
sync_stream
()
return
out
def
main
():
"""Main entry point"""
runner
=
GenericTestRunner
(
OpTest
)
runner
.
run_and_exit
()
if
__name__
==
"__main__"
:
main
()
test/infinicore/ops/paged_attention_prefill.py
0 → 100644
View file @
8d09630a
import
os
import
sys
import
torch
import
infinicore
sys
.
path
.
insert
(
0
,
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
".."
))
from
framework
import
(
BaseOperatorTest
,
GenericTestRunner
,
TensorInitializer
,
TensorSpec
,
TestCase
,
)
# Test Cases: (num_seqs, num_heads, num_kv_heads, head_size, block_size, max_step_len, num_rounds)
_TEST_CASES_DATA
=
[
(
1
,
1
,
1
,
128
,
8
,
16
,
1
),
(
1
,
4
,
4
,
128
,
8
,
16
,
4
),
(
2
,
8
,
8
,
128
,
16
,
32
,
2
),
(
4
,
16
,
16
,
128
,
8
,
64
,
3
),
(
8
,
64
,
64
,
128
,
8
,
16
,
5
),
]
_TOLERANCE_MAP
=
{
infinicore
.
float16
:
{
"atol"
:
1e-2
,
"rtol"
:
1e-2
},
infinicore
.
float32
:
{
"atol"
:
1e-4
,
"rtol"
:
1e-4
},
# float32 调优容限
infinicore
.
bfloat16
:
{
"atol"
:
2e-2
,
"rtol"
:
2e-2
},
}
_TENSOR_DTYPES
=
[
infinicore
.
float16
,
infinicore
.
bfloat16
]
class
SimpleCacheManager
:
def
__init__
(
self
,
num_blocks
,
block_size
):
self
.
num_blocks
=
num_blocks
self
.
block_size
=
block_size
self
.
free_blocks
=
list
(
range
(
num_blocks
))
self
.
request_to_blocks
=
{}
self
.
request_to_len
=
{}
def
allocate_slots
(
self
,
request_id
,
num_new_tokens
):
if
request_id
not
in
self
.
request_to_len
:
self
.
request_to_len
[
request_id
]
=
0
self
.
request_to_blocks
[
request_id
]
=
[]
start_pos
=
self
.
request_to_len
[
request_id
]
new_total_len
=
start_pos
+
num_new_tokens
needed_blocks
=
(
new_total_len
+
self
.
block_size
-
1
)
//
self
.
block_size
added_blocks
=
needed_blocks
-
len
(
self
.
request_to_blocks
[
request_id
])
for
_
in
range
(
added_blocks
):
self
.
request_to_blocks
[
request_id
].
append
(
self
.
free_blocks
.
pop
(
0
))
self
.
request_to_len
[
request_id
]
=
new_total_len
return
self
.
request_to_blocks
[
request_id
],
new_total_len
def
parse_test_cases
():
test_cases
=
[]
for
(
num_seqs
,
num_heads
,
num_kv_heads
,
head_size
,
block_size
,
max_step_len
,
num_rounds
,
)
in
_TEST_CASES_DATA
:
scale
=
head_size
**-
0.5
num_blocks
=
8192
manager
=
SimpleCacheManager
(
num_blocks
,
block_size
)
kv_lens
=
torch
.
zeros
(
num_seqs
,
dtype
=
torch
.
int64
)
persistent_k
=
torch
.
zeros
((
num_blocks
,
num_kv_heads
,
block_size
,
head_size
))
persistent_v
=
torch
.
zeros
((
num_blocks
,
num_kv_heads
,
block_size
,
head_size
))
for
r
in
range
(
num_rounds
):
q_lens
=
torch
.
randint
(
1
,
max_step_len
+
1
,
(
num_seqs
,),
dtype
=
torch
.
int64
)
kv_lens
=
kv_lens
+
q_lens
total_q_tokens
=
q_lens
.
sum
().
item
()
cum_seqlens_q
=
torch
.
zeros
(
num_seqs
+
1
,
dtype
=
torch
.
int64
)
cum_seqlens_q
[
1
:]
=
torch
.
cumsum
(
q_lens
,
dim
=
0
)
query_base
=
torch
.
randn
((
total_q_tokens
,
num_heads
,
head_size
))
round_block_tables_list
=
[]
for
i
in
range
(
num_seqs
):
p_blocks
,
total_len
=
manager
.
allocate_slots
(
i
,
q_lens
[
i
].
item
())
round_block_tables_list
.
append
(
p_blocks
)
h_len
=
kv_lens
[
i
].
item
()
-
q_lens
[
i
].
item
()
for
t
in
range
(
q_lens
[
i
].
item
()):
logical_pos
=
h_len
+
t
b_id
=
p_blocks
[
logical_pos
//
block_size
]
off
=
logical_pos
%
block_size
persistent_k
[
b_id
,
:,
off
,
:]
=
torch
.
randn
(
num_kv_heads
,
head_size
)
persistent_v
[
b_id
,
:,
off
,
:]
=
torch
.
randn
(
num_kv_heads
,
head_size
)
max_blks
=
max
(
len
(
t
)
for
t
in
round_block_tables_list
)
padded_tables
=
torch
.
tensor
(
[
t
+
[
0
]
*
(
max_blks
-
len
(
t
))
for
t
in
round_block_tables_list
]
)
for
dtype
in
_TENSOR_DTYPES
:
tolerance
=
_TOLERANCE_MAP
.
get
(
dtype
)
test_cases
.
append
(
TestCase
(
inputs
=
[
TensorSpec
.
from_tensor
(
query_base
.
shape
,
init_mode
=
TensorInitializer
.
MANUAL
,
set_tensor
=
query_base
.
clone
(),
dtype
=
dtype
,
),
TensorSpec
.
from_tensor
(
persistent_k
.
shape
,
init_mode
=
TensorInitializer
.
MANUAL
,
set_tensor
=
persistent_k
.
clone
(),
dtype
=
dtype
,
),
TensorSpec
.
from_tensor
(
persistent_v
.
shape
,
init_mode
=
TensorInitializer
.
MANUAL
,
set_tensor
=
persistent_v
.
clone
(),
dtype
=
dtype
,
),
TensorSpec
.
from_tensor
(
padded_tables
.
shape
,
init_mode
=
TensorInitializer
.
MANUAL
,
set_tensor
=
padded_tables
.
clone
(),
dtype
=
infinicore
.
int64
,
),
TensorSpec
.
from_tensor
(
kv_lens
.
shape
,
init_mode
=
TensorInitializer
.
MANUAL
,
set_tensor
=
kv_lens
.
clone
(),
dtype
=
infinicore
.
int64
,
),
TensorSpec
.
from_tensor
(
cum_seqlens_q
.
shape
,
init_mode
=
TensorInitializer
.
MANUAL
,
set_tensor
=
cum_seqlens_q
.
clone
(),
dtype
=
infinicore
.
int64
,
),
],
kwargs
=
{
"scale"
:
scale
},
tolerance
=
tolerance
,
description
=
f
"PagedAttentionPrefill_Round_
{
r
}
_
{
str
(
dtype
).
split
(
'.'
)[
-
1
]
}
"
,
)
)
return
test_cases
def
ref_paged_attention_multi_turn
(
query
,
k_cache
,
v_cache
,
block_tables
,
kv_lens
,
cum_seqlens_q
,
scale
):
output
=
torch
.
zeros_like
(
query
)
num_seqs
=
len
(
kv_lens
)
block_size
=
k_cache
.
shape
[
2
]
for
i
in
range
(
num_seqs
):
q_start
,
q_end
=
cum_seqlens_q
[
i
].
item
(),
cum_seqlens_q
[
i
+
1
].
item
()
cur_q
=
query
[
q_start
:
q_end
]
q_len
=
q_end
-
q_start
h_len
=
kv_lens
[
i
].
item
()
-
q_len
total_len
=
h_len
+
q_len
table
=
block_tables
[
i
]
keys
,
values
=
[],
[]
for
j
in
range
(
total_len
):
b_id
=
table
[
j
//
block_size
].
item
()
off
=
j
%
block_size
keys
.
append
(
k_cache
[
b_id
,
:,
off
,
:])
values
.
append
(
v_cache
[
b_id
,
:,
off
,
:])
K
=
torch
.
stack
(
keys
,
dim
=
0
)
V
=
torch
.
stack
(
values
,
dim
=
0
)
scores
=
torch
.
einsum
(
"qhd,khd->hqk"
,
cur_q
.
float
(),
K
.
float
())
*
scale
mask
=
torch
.
full
((
q_len
,
total_len
),
float
(
"-inf"
),
device
=
query
.
device
)
for
t
in
range
(
q_len
):
mask
[
t
,
:
h_len
+
t
+
1
]
=
0.0
attn
=
torch
.
softmax
(
scores
+
mask
.
unsqueeze
(
0
),
dim
=-
1
).
to
(
query
.
dtype
)
output
[
q_start
:
q_end
]
=
torch
.
einsum
(
"hqk,khd->qhd"
,
attn
,
V
)
return
output
class
OpTest
(
BaseOperatorTest
):
def
__init__
(
self
):
super
().
__init__
(
"PagedAttentionPrefill"
)
def
get_test_cases
(
self
):
return
parse_test_cases
()
def
torch_operator
(
self
,
query
,
k_cache
,
v_cache
,
block_tables
,
kv_lens
,
cum_seqlens_q
,
scale
=
1.0
,
):
return
ref_paged_attention_multi_turn
(
query
,
k_cache
,
v_cache
,
block_tables
,
kv_lens
,
cum_seqlens_q
,
scale
)
def
infinicore_operator
(
self
,
query
,
k_cache
,
v_cache
,
block_tables
,
kv_lens
,
cum_seqlens_q
,
scale
=
1.0
,
):
out
=
infinicore
.
paged_attention_prefill
(
query
,
k_cache
,
v_cache
,
block_tables
,
kv_lens
,
cum_seqlens_q
,
alibi_slopes
=
None
,
scale
=
scale
,
)
infinicore
.
sync_stream
()
return
out
def
main
():
"""Main entry point"""
runner
=
GenericTestRunner
(
OpTest
)
runner
.
run_and_exit
()
if
__name__
==
"__main__"
:
main
()
test/infinicore/ops/paged_caching.py
0 → 100644
View file @
8d09630a
import
sys
import
os
sys
.
path
.
insert
(
0
,
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
".."
))
import
torch
import
infinicore
from
framework
import
(
BaseOperatorTest
,
TensorSpec
,
TestCase
,
GenericTestRunner
,
is_broadcast
,
TensorInitializer
,
)
# ==============================================================================
# Operator-specific configuration
# ==============================================================================
# Test cases format: (num_seqs, max_seq_len, num_kv_heads, head_size, block_size)
_TEST_CASES_DATA
=
[
(
1
,
128
,
8
,
128
,
16
),
(
5
,
512
,
40
,
128
,
16
),
(
16
,
1024
,
8
,
64
,
32
),
(
10
,
1024
,
40
,
64
,
32
),
]
# Tolerance configuration
_TOLERANCE_MAP
=
{
infinicore
.
float16
:
{
"atol"
:
0
,
"rtol"
:
1e-5
},
infinicore
.
float32
:
{
"atol"
:
0
,
"rtol"
:
1e-5
},
infinicore
.
bfloat16
:
{
"atol"
:
0
,
"rtol"
:
1e-5
},
}
# Data types to test
_TENSOR_DTYPES
=
[
infinicore
.
float16
,
infinicore
.
bfloat16
,
infinicore
.
float32
]
# ==============================================================================
# Reference Implementation
# ==============================================================================
def
ref_paged_caching
(
key_cache_pool
,
value_cache_pool
,
key
,
value
,
slot_mapping
):
"""
Reference implementation for paged_caching operator.
Args:
key_cache_pool (torch.Tensor): K cache pool, shape [num_blocks, nkvh, block_size, dh]
value_cache_pool (torch.Tensor): V cache pool, shape [num_blocks, nkvh, block_size, dh]
key (torch.Tensor): Keys, shape [ntok, nkvh, dh]
value (torch.Tensor): Values, shape [ntok, nkvh, dh]
slot_mapping (torch.Tensor): Slot mapping, shape [ntok]
"""
ntok
=
key
.
shape
[
0
]
block_size
=
key_cache_pool
.
shape
[
2
]
# This reference implementation operates on a cloned cache to avoid modifying the original input tensor,
# mimicking the behavior where the custom operator writes to its output tensor.
k_cache_ref
=
key_cache_pool
v_cache_ref
=
value_cache_pool
for
i
in
range
(
ntok
):
slot
=
slot_mapping
[
i
].
item
()
block_idx
=
slot
//
block_size
block_offset
=
slot
%
block_size
key_token
=
key
[
i
]
value_token
=
value
[
i
]
k_cache_ref
[
block_idx
,
:,
block_offset
,
:]
=
key_token
v_cache_ref
[
block_idx
,
:,
block_offset
,
:]
=
value_token
return
k_cache_ref
,
v_cache_ref
def
parse_test_cases
():
"""
Parse test case data and return list of TestCase objects for paged_caching operation.
Each test case contains all necessary information for execution and validation.
"""
test_cases
=
[]
for
num_seqs
,
max_seq_len
,
num_kv_heads
,
head_size
,
block_size
in
_TEST_CASES_DATA
:
num_blocks
=
4096
# A reasonably large cache pool for testing
# Create metadata: variable context lengths for each sequence in the batch
context_lens_torch
=
torch
.
randint
(
1
,
max_seq_len
+
1
,
(
num_seqs
,),
dtype
=
torch
.
int64
)
ntok
=
torch
.
sum
(
context_lens_torch
).
item
()
# Simulate the scheduler's behavior to create the slot_mapping
slot_mapping_list
=
[]
current_slot
=
0
for
length
in
context_lens_torch
:
# Find a contiguous chunk of 'length' slots
start_slot
=
current_slot
slot_mapping_list
.
extend
(
range
(
start_slot
,
start_slot
+
length
.
item
()))
current_slot
+=
length
.
item
()
# Ensure we don't exceed the total number of slots in the cache
assert
current_slot
<=
num_blocks
*
block_size
,
(
"Not enough blocks in the cache pool for this test case"
)
slot_mapping
=
torch
.
tensor
(
slot_mapping_list
,
dtype
=
torch
.
int64
)
# print("slot_mapping", slot_mapping)
slot_mapping_shape
=
slot_mapping
.
shape
k_shape
=
(
ntok
,
num_kv_heads
,
head_size
)
v_shape
=
(
ntok
,
num_kv_heads
,
head_size
)
k_cache_shape
=
(
num_blocks
,
num_kv_heads
,
block_size
,
head_size
)
v_cache_shape
=
(
num_blocks
,
num_kv_heads
,
block_size
,
head_size
)
# Generate test cases for all data types
for
dtype
in
_TENSOR_DTYPES
:
tolerance
=
_TOLERANCE_MAP
.
get
(
dtype
,
{
"atol"
:
0
,
"rtol"
:
1e-3
})
# Create typed tensor specs
k_spec
=
TensorSpec
.
from_tensor
(
k_shape
,
None
,
dtype
)
v_spec
=
TensorSpec
.
from_tensor
(
v_shape
,
None
,
dtype
)
k_cache_spec
=
TensorSpec
.
from_tensor
(
k_cache_shape
,
None
,
dtype
,
init_mode
=
TensorInitializer
.
ZEROS
)
v_cache_spec
=
TensorSpec
.
from_tensor
(
v_cache_shape
,
None
,
dtype
,
init_mode
=
TensorInitializer
.
ZEROS
)
slot_mapping_spec
=
TensorSpec
.
from_tensor
(
slot_mapping_shape
,
init_mode
=
TensorInitializer
.
MANUAL
,
set_tensor
=
slot_mapping
,
dtype
=
infinicore
.
int64
,
)
# In-place operation: modifies k_cache (index 2) and v_cache (index 3)
test_cases
.
append
(
TestCase
(
inputs
=
[
k_cache_spec
,
v_cache_spec
,
k_spec
,
v_spec
,
slot_mapping_spec
,
],
kwargs
=
None
,
output_spec
=
None
,
comparison_target
=
0
,
# Only compare k_cache
tolerance
=
tolerance
,
description
=
f
"PagedCaching"
,
)
)
return
test_cases
class
OpTest
(
BaseOperatorTest
):
"""PagedCaching operator test with simplified implementation"""
def
__init__
(
self
):
super
().
__init__
(
"PagedCaching"
)
def
get_test_cases
(
self
):
return
parse_test_cases
()
def
torch_operator
(
self
,
*
args
,
**
kwargs
):
"""PyTorch paged_caching implementation"""
return
ref_paged_caching
(
*
args
,
**
kwargs
)
def
infinicore_operator
(
self
,
*
args
,
**
kwargs
):
"""InfiniCore paged_caching implementation"""
return
infinicore
.
paged_caching
(
*
args
,
**
kwargs
)
def
main
():
"""Main entry point"""
runner
=
GenericTestRunner
(
OpTest
)
runner
.
run_and_exit
()
if
__name__
==
"__main__"
:
main
()
test/infinicore/ops/random_sample.py
View file @
8d09630a
...
...
@@ -222,8 +222,8 @@ class OpTest(BaseOperatorTest):
# Re-run operations with the same logits to get results for comparison
# prepare_pytorch_inputs_and_kwargs will reuse self._current_logits if it exists
from
framework.
base
import
Test
Result
from
framework.utils
import
(
from
framework.
results
import
Case
Result
from
framework.utils
.tensor_utils
import
(
convert_infinicore_to_torch
,
infinicore_tensor_from_torch
,
)
...
...
@@ -268,8 +268,8 @@ class OpTest(BaseOperatorTest):
# Check if indices are equal (standard case)
if
ic_idx
==
ref_idx
:
# Return a successful
Test
Result object
return
Test
Result
(
# Return a successful
Case
Result object
return
Case
Result
(
success
=
True
,
return_code
=
0
,
test_case
=
test_case
,
...
...
@@ -283,8 +283,8 @@ class OpTest(BaseOperatorTest):
logits_ic
=
logits_tensor
[
ic_idx
].
item
()
if
logits_ic
==
logits_ref
:
# Valid: different indices but same logits value
# Return a successful
Test
Result object
return
Test
Result
(
# Return a successful
Case
Result object
return
Case
Result
(
success
=
True
,
return_code
=
0
,
test_case
=
test_case
,
...
...
test/infinicore/ops/silu_and_mul.py
0 → 100644
View file @
8d09630a
import
sys
import
os
sys
.
path
.
insert
(
0
,
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
".."
))
import
torch
import
infinicore
from
framework
import
(
BaseOperatorTest
,
TensorSpec
,
TestCase
,
GenericTestRunner
,
is_broadcast
,
)
# ==============================================================================
# Operator-specific configuration
# ==============================================================================
# Test cases format: (input_shape)
# The operator splits the last dimension: Input (..., 2*d) -> Output (..., d)
_TEST_CASES_DATA
=
[
(
2
,
4
),
(
1024
,
1024
),
(
2
,
4
,
8
),
(
1
,
22016
),
(
2
,
4
,
256
),
(
2
,
4
,
16
,
256
),
]
# Tolerance configuration for different precisions
_TOLERANCE_MAP
=
{
infinicore
.
float16
:
{
"atol"
:
1e-3
,
"rtol"
:
1e-3
},
infinicore
.
float32
:
{
"atol"
:
1e-5
,
"rtol"
:
1e-5
},
infinicore
.
bfloat16
:
{
"atol"
:
5e-3
,
"rtol"
:
1e-2
},
}
_TENSOR_DTYPES
=
[
infinicore
.
float16
,
infinicore
.
bfloat16
,
infinicore
.
float32
]
def
parse_test_cases
():
"""
Parse SiLUAndMul test case data.
Input shape: [..., 2*d], Output shape: [..., d]
Note: In-place is not supported due to shape mismatch between input and output.
"""
test_cases
=
[]
for
input_shape
in
_TEST_CASES_DATA
:
# Calculate output shape based on SwiGLU logic
output_shape
=
list
(
input_shape
)
output_shape
[
-
1
]
//=
2
output_shape
=
tuple
(
output_shape
)
for
dtype
in
_TENSOR_DTYPES
:
tolerance
=
_TOLERANCE_MAP
.
get
(
dtype
,
{
"atol"
:
1e-5
,
"rtol"
:
1e-4
})
input_spec
=
TensorSpec
.
from_tensor
(
input_shape
,
None
,
dtype
)
output_spec
=
TensorSpec
.
from_tensor
(
output_shape
,
None
,
dtype
)
# Case 1: Functional style (allocates new memory for output)
test_cases
.
append
(
TestCase
(
inputs
=
[
input_spec
],
kwargs
=
{},
output_spec
=
None
,
comparison_target
=
None
,
tolerance
=
tolerance
,
description
=
f
"SiLUAndMul_Functional_
{
dtype
}
"
,
)
)
# Case 2: Explicit output tensor style (uses pre-allocated buffer)
test_cases
.
append
(
TestCase
(
inputs
=
[
input_spec
],
kwargs
=
None
,
output_spec
=
output_spec
,
comparison_target
=
"out"
,
tolerance
=
tolerance
,
description
=
f
"SiLUAndMul_OutParam_
{
dtype
}
"
,
)
)
return
test_cases
class
OpTest
(
BaseOperatorTest
):
"""SiLUAndMul operator test (SwiGLU activation)"""
def
__init__
(
self
):
super
().
__init__
(
"SiLUAndMul"
)
def
get_test_cases
(
self
):
return
parse_test_cases
()
def
torch_operator
(
self
,
input
,
out
=
None
,
**
kwargs
):
"""
PyTorch SwiGLU reference implementation:
Formula: SiLU(gate) * up, where [gate, up] = split(input)
"""
d
=
input
.
shape
[
-
1
]
//
2
# Split the last dimension into two equal parts
gate
,
up
=
torch
.
split
(
input
,
[
d
,
d
],
dim
=-
1
)
result
=
torch
.
nn
.
functional
.
silu
(
gate
)
*
up
if
out
is
not
None
:
out
.
copy_
(
result
)
return
out
return
result
def
infinicore_operator
(
self
,
input
,
out
=
None
,
**
kwargs
):
"""InfiniCore SiLUAndMul implementation wrapper"""
import
infinicore.nn.functional
as
F
return
F
.
silu_and_mul
(
input
,
out
=
out
)
def
main
():
"""Main entry point for the test runner"""
runner
=
GenericTestRunner
(
OpTest
)
runner
.
run_and_exit
()
if
__name__
==
"__main__"
:
main
()
test/infinicore/ops/sort.py
View file @
8d09630a
...
...
@@ -7,6 +7,7 @@ import torch
import
infinicore
from
framework
import
(
BaseOperatorTest
,
CaseResult
,
TensorSpec
,
TestCase
,
GenericTestRunner
,
...
...
@@ -180,7 +181,7 @@ class OpTest(BaseOperatorTest):
and
isinstance
(
test_case
.
inputs
[
0
],
TensorSpec
)
and
test_case
.
inputs
[
0
].
strides
is
not
None
):
return
Test
Result
(
return
Case
Result
(
success
=
False
,
return_code
=-
2
,
test_case
=
test_case
,
...
...
@@ -193,7 +194,7 @@ class OpTest(BaseOperatorTest):
)
for
spec
in
output_specs
:
if
isinstance
(
spec
,
TensorSpec
)
and
spec
.
strides
is
not
None
:
return
Test
Result
(
return
Case
Result
(
success
=
False
,
return_code
=-
2
,
test_case
=
test_case
,
...
...
test/infinicore/ops/std.py
View file @
8d09630a
...
...
@@ -7,6 +7,7 @@ import torch
import
infinicore
from
framework
import
(
BaseOperatorTest
,
CaseResult
,
TensorSpec
,
TestCase
,
GenericTestRunner
,
...
...
@@ -122,7 +123,7 @@ class OpTest(BaseOperatorTest):
and
isinstance
(
test_case
.
inputs
[
0
],
TensorSpec
)
and
test_case
.
inputs
[
0
].
strides
is
not
None
):
return
Test
Result
(
return
Case
Result
(
success
=
False
,
return_code
=-
2
,
test_case
=
test_case
,
...
...
@@ -135,7 +136,7 @@ class OpTest(BaseOperatorTest):
and
isinstance
(
test_case
.
output_spec
,
TensorSpec
)
and
test_case
.
output_spec
.
strides
is
not
None
):
return
Test
Result
(
return
Case
Result
(
success
=
False
,
return_code
=-
2
,
test_case
=
test_case
,
...
...
test/infinicore/run.py
View file @
8d09630a
import
os
import
sys
import
argparse
import
traceback
import
json
import
os
from
pathlib
import
Path
import
importlib.util
from
framework
import
get_hardware_args_group
,
add_common_test_args
def
find_ops_directory
(
location
=
None
):
"""
Find the ops directory by searching from location upwards.
Args:
location: Starting directory for search (default: current file's parent)
Returns:
Path: Path to ops directory or None if not found
"""
if
location
is
None
:
location
=
Path
(
__file__
).
parent
/
"ops"
ops_dir
=
location
.
resolve
()
if
ops_dir
.
exists
()
and
any
(
ops_dir
.
glob
(
"*.py"
)):
return
ops_dir
return
None
def
get_available_operators
(
ops_dir
):
"""
Get list of available operators from ops directory.
Args:
ops_dir: Path to ops directory
Returns:
List of operator names
"""
if
not
ops_dir
or
not
ops_dir
.
exists
():
return
[]
test_files
=
list
(
ops_dir
.
glob
(
"*.py"
))
current_script
=
Path
(
__file__
).
name
test_files
=
[
f
for
f
in
test_files
if
f
.
name
!=
current_script
]
operators
=
[]
for
test_file
in
test_files
:
try
:
with
open
(
test_file
,
"r"
,
encoding
=
"utf-8"
)
as
f
:
content
=
f
.
read
()
if
"infinicore"
in
content
and
(
"BaseOperatorTest"
in
content
or
"GenericTestRunner"
in
content
):
operators
.
append
(
test_file
.
stem
)
except
:
continue
return
sorted
(
operators
)
def
import_operator_test
(
test_file_path
):
"""
Import an operator test module and return the test class instance.
Args:
test_file_path: Path to the test file
Returns:
tuple: (success, test_instance_or_error)
"""
try
:
# Create a unique module name
module_name
=
f
"op_test_
{
test_file_path
.
stem
}
"
# Load the module from file
spec
=
importlib
.
util
.
spec_from_file_location
(
module_name
,
test_file_path
)
if
spec
is
None
or
spec
.
loader
is
None
:
return
False
,
f
"Could not load module from
{
test_file_path
}
"
module
=
importlib
.
util
.
module_from_spec
(
spec
)
# Add the module to sys.modules
sys
.
modules
[
module_name
]
=
module
# Execute the module
spec
.
loader
.
exec_module
(
module
)
# Find the test class (usually named OpTest)
test_class
=
None
for
attr_name
in
dir
(
module
):
attr
=
getattr
(
module
,
attr_name
)
if
(
isinstance
(
attr
,
type
)
and
hasattr
(
attr
,
"__bases__"
)
and
any
(
"BaseOperatorTest"
in
str
(
base
)
for
base
in
attr
.
__bases__
)
):
test_class
=
attr
break
if
test_class
is
None
:
return
False
,
f
"No test class found in
{
test_file_path
}
"
# Create an instance
test_instance
=
test_class
()
return
True
,
test_instance
except
Exception
as
e
:
return
False
,
f
"Error importing
{
test_file_path
}
:
{
str
(
e
)
}
"
def
run_all_op_tests
(
ops_dir
=
None
,
specific_ops
=
None
,
bench
=
False
,
bench_mode
=
"both"
,
verbose
=
False
,
debug
=
False
,
):
"""
Run all operator test scripts in the ops directory using direct import.
Args:
ops_dir (str, optional): Path to the ops directory. If None, uses auto-detection.
specific_ops (list, optional): List of specific operator names to test.
bench (bool): Whether benchmarking is enabled
bench_mode (str): Benchmark mode - "host", "device", or "both"
verbose (bool): Whether verbose mode is enabled
Returns:
dict: Results dictionary with test names as keys and (success, test_runner, stdout, stderr) as values.
"""
if
ops_dir
is
None
:
ops_dir
=
find_ops_directory
()
else
:
ops_dir
=
Path
(
ops_dir
)
if
not
ops_dir
or
not
ops_dir
.
exists
():
print
(
f
"Error: Ops directory '
{
ops_dir
}
' does not exist."
)
return
{}
print
(
f
"Looking for test files in:
{
ops_dir
}
"
)
# Find all Python test files
test_files
=
list
(
ops_dir
.
glob
(
"*.py"
))
# Filter out this script itself and non-operator test files
current_script
=
Path
(
__file__
).
name
test_files
=
[
f
for
f
in
test_files
if
f
.
name
!=
current_script
]
# Filter to include only files that look like operator tests
operator_test_files
=
[]
for
test_file
in
test_files
:
try
:
with
open
(
test_file
,
"r"
,
encoding
=
"utf-8"
)
as
f
:
content
=
f
.
read
()
# Look for characteristic patterns of operator tests
if
"infinicore"
in
content
and
(
"BaseOperatorTest"
in
content
or
"GenericTestRunner"
in
content
):
operator_test_files
.
append
(
test_file
)
except
Exception
as
e
:
continue
# Filter for specific operators if requested
if
specific_ops
:
filtered_files
=
[]
for
test_file
in
operator_test_files
:
test_name
=
test_file
.
stem
.
lower
()
if
any
(
op
.
lower
()
==
test_name
for
op
in
specific_ops
):
filtered_files
.
append
(
test_file
)
operator_test_files
=
filtered_files
if
not
operator_test_files
:
print
(
f
"No operator test files found in
{
ops_dir
}
"
)
print
(
f
"Available Python files:
{
[
f
.
name
for
f
in
test_files
]
}
"
)
return
{}
print
(
f
"Found
{
len
(
operator_test_files
)
}
operator test files:"
)
for
test_file
in
operator_test_files
:
print
(
f
" -
{
test_file
.
name
}
"
)
results
=
{}
cumulative_timing
=
{
"total_torch_host_time"
:
0.0
,
"total_torch_device_time"
:
0.0
,
"total_infinicore_host_time"
:
0.0
,
"total_infinicore_device_time"
:
0.0
,
"operators_tested"
:
0
,
}
for
test_file
in
operator_test_files
:
test_name
=
test_file
.
stem
try
:
# Import and run the test directly
success
,
test_instance_or_error
=
import_operator_test
(
test_file
)
if
not
success
:
print
(
f
"💥
{
test_name
}
: ERROR -
{
test_instance_or_error
}
"
)
results
[
test_name
]
=
{
"success"
:
False
,
"return_code"
:
-
1
,
"torch_host_time"
:
0.0
,
"torch_device_time"
:
0.0
,
"infini_host_time"
:
0.0
,
"infini_device_time"
:
0.0
,
"error_message"
:
test_instance_or_error
,
"test_runner"
:
None
,
"stdout"
:
""
,
"stderr"
:
test_instance_or_error
,
}
continue
# Get the test runner class from the module
test_module
=
sys
.
modules
[
f
"op_test_
{
test_file
.
stem
}
"
]
if
not
hasattr
(
test_module
,
"GenericTestRunner"
):
print
(
f
"💥
{
test_name
}
: ERROR - No GenericTestRunner found"
)
results
[
test_name
]
=
{
"success"
:
False
,
"return_code"
:
-
1
,
"torch_host_time"
:
0.0
,
"torch_device_time"
:
0.0
,
"infini_host_time"
:
0.0
,
"infini_device_time"
:
0.0
,
"error_message"
:
"No GenericTestRunner found"
,
"test_runner"
:
None
,
"stdout"
:
""
,
"stderr"
:
"No GenericTestRunner found"
,
}
continue
# Create and run the test runner
test_runner_class
=
test_module
.
GenericTestRunner
runner_instance
=
test_runner_class
(
test_instance_or_error
.
__class__
)
# Temporarily redirect stdout to capture output
from
io
import
StringIO
stdout_capture
=
StringIO
()
stderr_capture
=
StringIO
()
old_stdout
=
sys
.
stdout
old_stderr
=
sys
.
stderr
sys
.
stdout
=
stdout_capture
sys
.
stderr
=
stderr_capture
try
:
# Run the test
test_success
,
test_runner
=
runner_instance
.
run
()
# Get captured output
stdout_output
=
stdout_capture
.
getvalue
()
stderr_output
=
stderr_capture
.
getvalue
()
# Restore stdout/stderr
sys
.
stdout
=
old_stdout
sys
.
stderr
=
old_stderr
# Print the captured output
if
stdout_output
:
print
(
stdout_output
.
rstrip
())
if
stderr_output
:
print
(
"
\n
STDERR:"
)
print
(
stderr_output
.
rstrip
())
# Analyze test results
test_results
=
test_runner
.
get_test_results
()
if
test_runner
else
[]
# Determine overall test status
if
test_success
:
return_code
=
0
status_icon
=
"✅"
status_text
=
"PASSED"
else
:
# Check if there are any failed tests
has_failures
=
any
(
result
.
return_code
==
-
1
for
result
in
test_results
)
has_partial
=
any
(
result
.
return_code
==
-
3
for
result
in
test_results
)
has_skipped
=
any
(
result
.
return_code
==
-
2
for
result
in
test_results
)
if
has_failures
:
return_code
=
-
1
status_icon
=
"❌"
status_text
=
"FAILED"
elif
has_partial
:
return_code
=
-
3
status_icon
=
"⚠️"
status_text
=
"PARTIAL"
elif
has_skipped
:
return_code
=
-
2
status_icon
=
"⏭️"
status_text
=
"SKIPPED"
else
:
return_code
=
-
1
status_icon
=
"❌"
status_text
=
"FAILED"
# Calculate timing for all four metrics
torch_host_time
=
sum
(
result
.
torch_host_time
for
result
in
test_results
)
torch_device_time
=
sum
(
result
.
torch_device_time
for
result
in
test_results
)
infini_host_time
=
sum
(
result
.
infini_host_time
for
result
in
test_results
)
infini_device_time
=
sum
(
result
.
infini_device_time
for
result
in
test_results
)
results
[
test_name
]
=
{
"success"
:
test_success
,
"return_code"
:
return_code
,
"torch_host_time"
:
torch_host_time
,
"torch_device_time"
:
torch_device_time
,
"infini_host_time"
:
infini_host_time
,
"infini_device_time"
:
infini_device_time
,
"error_message"
:
""
,
"test_runner"
:
test_runner
,
"stdout"
:
stdout_output
,
"stderr"
:
stderr_output
,
}
print
(
f
"
{
status_icon
}
{
test_name
}
:
{
status_text
}
(return code:
{
return_code
}
)"
)
# Extract benchmark timing if in bench mode
if
bench
and
test_success
and
return_code
==
0
:
cumulative_timing
[
"total_torch_host_time"
]
+=
torch_host_time
cumulative_timing
[
"total_torch_device_time"
]
+=
torch_device_time
cumulative_timing
[
"total_infinicore_host_time"
]
+=
infini_host_time
cumulative_timing
[
"total_infinicore_device_time"
]
+=
infini_device_time
cumulative_timing
[
"operators_tested"
]
+=
1
except
Exception
as
e
:
# Restore stdout/stderr in case of exception
sys
.
stdout
=
old_stdout
sys
.
stderr
=
old_stderr
raise
e
# In verbose mode, stop execution on first failure
if
verbose
and
not
test_success
and
return_code
!=
0
:
break
except
Exception
as
e
:
print
(
f
"💥
{
test_name
}
: ERROR -
{
str
(
e
)
}
"
)
results
[
test_name
]
=
{
"success"
:
False
,
"return_code"
:
-
1
,
"torch_host_time"
:
0.0
,
"torch_device_time"
:
0.0
,
"infini_host_time"
:
0.0
,
"infini_device_time"
:
0.0
,
"error_message"
:
str
(
e
),
"test_runner"
:
None
,
"stdout"
:
""
,
"stderr"
:
str
(
e
),
}
# In verbose mode, stop execution on any exception
if
verbose
:
print
(
f
"
\n
{
'!'
*
60
}
"
)
print
(
f
"VERBOSE MODE: Stopping execution due to exception in
{
test_name
}
"
)
print
(
f
"
{
'!'
*
60
}
"
)
break
if
debug
:
traceback
.
print_exc
()
break
return
results
,
cumulative_timing
def
print_summary
(
results
,
verbose
=
False
,
total_expected_tests
=
0
,
cumulative_timing
=
None
,
bench_mode
=
"both"
,
):
"""Print a comprehensive summary of test results including benchmark data."""
print
(
f
"
\n
{
'='
*
80
}
"
)
print
(
"CUMULATIVE TEST SUMMARY"
)
print
(
f
"
{
'='
*
80
}
"
)
if
not
results
:
print
(
"No tests were run."
)
return
False
# Count different types of results
passed
=
0
failed
=
0
skipped
=
0
partial
=
0
passed_operators
=
[]
# Store passed operator names
failed_operators
=
[]
# Store failed operator names
skipped_operators
=
[]
# Store skipped operator names
partial_operators
=
[]
# Store partial operator names
for
test_name
,
result_data
in
results
.
items
():
return_code
=
result_data
[
"return_code"
]
if
return_code
==
0
:
passed
+=
1
passed_operators
.
append
(
test_name
)
elif
return_code
==
-
2
:
# Special code for skipped tests
skipped
+=
1
skipped_operators
.
append
(
test_name
)
elif
return_code
==
-
3
:
# Special code for partial tests
partial
+=
1
partial_operators
.
append
(
test_name
)
else
:
failed
+=
1
failed_operators
.
append
(
test_name
)
total
=
len
(
results
)
print
(
f
"Total tests run:
{
total
}
"
)
if
total_expected_tests
>
0
and
total
<
total_expected_tests
:
print
(
f
"Total tests expected:
{
total_expected_tests
}
"
)
print
(
f
"Tests not executed:
{
total_expected_tests
-
total
}
"
)
print
(
f
"Passed:
{
passed
}
"
)
print
(
f
"Failed:
{
failed
}
"
)
if
skipped
>
0
:
print
(
f
"Skipped:
{
skipped
}
"
)
if
partial
>
0
:
print
(
f
"Partial:
{
partial
}
"
)
# Print benchmark summary if cumulative_timing data is available
if
cumulative_timing
and
cumulative_timing
[
"operators_tested"
]
>
0
:
print
(
f
"
{
'-'
*
40
}
"
)
print
(
"BENCHMARK SUMMARY:"
)
print
(
f
" Operators Tested:
{
cumulative_timing
[
'operators_tested'
]
}
"
)
# Display timing based on bench_mode
if
bench_mode
in
[
"host"
,
"both"
]:
print
(
f
" PyTorch Host Total Time:
{
cumulative_timing
[
'total_torch_host_time'
]:
12.3
f
}
ms"
)
print
(
f
" InfiniCore Host Total Time:
{
cumulative_timing
[
'total_infinicore_host_time'
]:
12.3
f
}
ms"
)
if
bench_mode
in
[
"device"
,
"both"
]:
print
(
f
" PyTorch Device Total Time:
{
cumulative_timing
[
'total_torch_device_time'
]:
12.3
f
}
ms"
)
print
(
f
" InfiniCore Device Total Time:
{
cumulative_timing
[
'total_infinicore_device_time'
]:
12.3
f
}
ms"
)
print
(
f
"
{
'-'
*
40
}
"
)
# Display passed operators
if
passed_operators
:
print
(
f
"
\n
✅ PASSED OPERATORS (
{
len
(
passed_operators
)
}
):"
)
# Display operators in groups of 10 per line
for
i
in
range
(
0
,
len
(
passed_operators
),
10
):
line_ops
=
passed_operators
[
i
:
i
+
10
]
print
(
" "
+
", "
.
join
(
line_ops
))
else
:
print
(
f
"
\n
✅ PASSED OPERATORS: None"
)
# Display failed operators (if any)
if
failed_operators
:
print
(
f
"
\n
❌ FAILED OPERATORS (
{
len
(
failed_operators
)
}
):"
)
for
i
in
range
(
0
,
len
(
failed_operators
),
10
):
line_ops
=
failed_operators
[
i
:
i
+
10
]
print
(
" "
+
", "
.
join
(
line_ops
))
# Display skipped operators (if any)
if
skipped_operators
:
print
(
f
"
\n
⏭️ SKIPPED OPERATORS (
{
len
(
skipped_operators
)
}
):"
)
for
i
in
range
(
0
,
len
(
skipped_operators
),
10
):
line_ops
=
skipped_operators
[
i
:
i
+
10
]
print
(
" "
+
", "
.
join
(
line_ops
))
# Display partial operators (if any)
if
partial_operators
:
print
(
f
"
\n
⚠️ PARTIAL OPERATORS (
{
len
(
partial_operators
)
}
):"
)
for
i
in
range
(
0
,
len
(
partial_operators
),
10
):
line_ops
=
partial_operators
[
i
:
i
+
10
]
print
(
" "
+
", "
.
join
(
line_ops
))
if
total
>
0
:
# Calculate success rate based on actual executed tests
executed_tests
=
passed
+
failed
+
partial
if
executed_tests
>
0
:
success_rate
=
passed
/
executed_tests
*
100
print
(
f
"
\n
Success rate:
{
success_rate
:.
1
f
}
%"
)
if
verbose
and
total
<
total_expected_tests
:
print
(
f
"
\n
💡 Verbose mode: Execution stopped after first failure"
)
print
(
f
"
{
total_expected_tests
-
total
}
tests were not executed"
)
if
failed
==
0
:
if
skipped
>
0
or
partial
>
0
:
print
(
f
"
\n
⚠️ Tests completed with some operators not implemented"
)
print
(
f
" -
{
skipped
}
tests skipped (both operators not implemented)"
)
print
(
f
" -
{
partial
}
tests partial (one operator not implemented)"
)
else
:
print
(
f
"
\n
🎉 All tests passed!"
)
return
True
else
:
print
(
f
"
\n
❌
{
failed
}
tests failed"
)
return
False
def
list_available_tests
(
ops_dir
=
None
):
"""List all available operator test files."""
if
ops_dir
is
None
:
ops_dir
=
find_ops_directory
()
else
:
ops_dir
=
Path
(
ops_dir
)
if
not
ops_dir
or
not
ops_dir
.
exists
():
print
(
f
"Error: Ops directory '
{
ops_dir
}
' does not exist."
)
return
operators
=
get_available_operators
(
ops_dir
)
if
operators
:
print
(
f
"Available operator test files in
{
ops_dir
}
:"
)
for
operator
in
operators
:
print
(
f
" -
{
operator
}
"
)
print
(
f
"
\n
Total:
{
len
(
operators
)
}
operators"
)
else
:
print
(
f
"No operator test files found in
{
ops_dir
}
"
)
# Show available Python files for debugging
test_files
=
list
(
ops_dir
.
glob
(
"*.py"
))
current_script
=
Path
(
__file__
).
name
test_files
=
[
f
for
f
in
test_files
if
f
.
name
!=
current_script
]
if
test_files
:
print
(
f
"Available Python files:
{
[
f
.
name
for
f
in
test_files
]
}
"
)
from
framework
import
(
get_hardware_args_group
,
add_common_test_args
,
InfiniDeviceEnum
,
InfiniDeviceNames
,
)
from
framework.test_manager
import
TestCollector
,
TestManager
def
generate_help_epilog
(
ops_dir
):
def
generate_help_epilog
(
ops_dir
=
None
):
"""
Generate dynamic help epilog with available operators and hardware platforms.
Args:
ops_dir: Path to ops directory
Returns:
str: Formatted help text
Generate dynamic help epilog containing available operators and hardware platforms.
Maintains the original output format for backward compatibility.
"""
# Get available operators
operators
=
get_available_operators
(
ops_dir
)
# === Adapter: Use TestCollector to get operator list ===
# Temporarily instantiate a Collector just to fetch the list
collector
=
TestCollector
(
ops_dir
)
operators
=
collector
.
get_available_operators
()
# Build epilog text
# Build epilog text
(fully replicating original logic)
epilog_parts
=
[]
# Examples section
...
...
@@ -627,18 +89,142 @@ def generate_help_epilog(ops_dir):
return
"
\n
"
.
join
(
epilog_parts
)
def
main
():
"""Main entry point with comprehensive command line argument parsing."""
# First, find ops directory for dynamic help generation
ops_dir
=
find_ops_directory
()
def
fill_defaults_for_local_mode
(
args
):
"""
Helper function specifically for Local Scan mode to fill default arguments.
Since parser defaults are set to None (to handle override logic in load mode),
we need to manually fill None with default values in local mode.
"""
# 1. Copy args to avoid modifying the original object and affecting other logic
# argparse.Namespace can be converted to dict and back, or copied directly
local_args
=
argparse
.
Namespace
(
**
vars
(
args
))
# 2. Fill default values for numeric arguments
if
local_args
.
num_prerun
is
None
:
local_args
.
num_prerun
=
10
if
local_args
.
num_iterations
is
None
:
local_args
.
num_iterations
=
1000
return
local_args
def
load_and_override_cases
(
load_paths
,
args
):
"""
Load JSON, apply CLI overrides, and handle all argument logic.
"""
cases
=
[]
files_to_read
=
[]
# 1. Scan
for
p_str
in
load_paths
:
p
=
Path
(
p_str
)
if
p
.
is_dir
():
files_to_read
.
extend
(
p
.
glob
(
"*.json"
))
elif
p
.
is_file
():
files_to_read
.
append
(
p
)
# 2. Read and Validate
loaded_count
=
0
skipped_count
=
0
for
f_path
in
files_to_read
:
try
:
with
open
(
f_path
,
"r"
,
encoding
=
"utf-8"
)
as
f
:
data
=
json
.
load
(
f
)
# Unify as a list to handle both single dict and list of dicts
current_batch
=
data
if
isinstance
(
data
,
list
)
else
[
data
]
valid_batch
=
[]
for
item
in
current_batch
:
# We only require the 'operator' field to identify the test case.
if
isinstance
(
item
,
dict
)
and
"operator"
in
item
:
valid_batch
.
append
(
item
)
else
:
skipped_count
+=
1
if
valid_batch
:
cases
.
extend
(
valid_batch
)
loaded_count
+=
1
except
Exception
as
e
:
# Log warning only; do not crash the program on bad files to ensure flow continuity.
print
(
f
"❌ Error loading
{
f_path
.
name
}
:
{
e
}
"
)
if
skipped_count
>
0
:
print
(
f
"ℹ️ Ignored
{
skipped_count
}
items/files (invalid format)."
)
# ==================================================
# Device Logic using InfiniDeviceEnum
# ==================================================
# 1. Identify active devices from CLI arguments
cli_active_devices
=
[]
# Iterate through the Enum to check corresponding CLI args
# Logic: Enum name (e.g., CAMBRICON) -> lower() -> arg name (cambricon)
# Value: InfiniDeviceNames mapping (e.g., "Cambricon")
for
device_enum
,
device_name
in
InfiniDeviceNames
.
items
():
# device_name is like "CPU", "NVIDIA", "Cambricon"
# arg_name becomes "cpu", "nvidia", "cambricon"
arg_name
=
device_name
.
lower
()
if
getattr
(
args
,
arg_name
,
False
):
cli_active_devices
.
append
(
device_name
)
print
(
f
"
\n
[Config Processing]"
)
for
case
in
cases
:
if
"args"
not
in
case
or
case
[
"args"
]
is
None
:
case
[
"args"
]
=
{}
case_args
=
case
[
"args"
]
# 2. Apply Device Overrides (CLI > JSON)
if
cli_active_devices
:
case
[
"device"
]
=
","
.
join
(
cli_active_devices
)
final_dev_str
=
case
.
get
(
"device"
,
""
).
upper
()
# Uppercase for easier matching
# 3. Set Boolean flags in case_args based on final device string
for
device_enum
,
device_name
in
InfiniDeviceNames
.
items
():
arg_name
=
device_name
.
lower
()
# Check if the standard name (e.g., "Cambricon" or "NVIDIA") is in the device string
# We convert both to upper to ensure case-insensitive matching
is_active
=
device_name
.
upper
()
in
final_dev_str
case_args
[
arg_name
]
=
is_active
case_args
[
"save"
]
=
getattr
(
args
,
"save"
,
None
)
# Standard arguments (CLI > JSON > Default)
case_args
[
"bench"
]
=
(
args
.
bench
if
args
.
bench
is
not
None
else
case_args
.
get
(
"bench"
)
)
# Boolean Flags
case_args
[
"verbose"
]
=
args
.
verbose
or
case_args
.
get
(
"verbose"
,
False
)
case_args
[
"debug"
]
=
args
.
debug
or
case_args
.
get
(
"debug"
,
False
)
case_args
[
"eq_nan"
]
=
args
.
eq_nan
or
case_args
.
get
(
"eq_nan"
,
False
)
case_args
[
"num_prerun"
]
=
(
args
.
num_prerun
if
args
.
num_prerun
is
not
None
else
(
case_args
.
get
(
"num_prerun"
)
or
10
)
)
case_args
[
"num_iterations"
]
=
(
args
.
num_iterations
if
args
.
num_iterations
is
not
None
else
(
case_args
.
get
(
"num_iterations"
)
or
1000
)
)
print
(
f
"📂 Processed
{
len
(
cases
)
}
cases ready for execution.
\n
"
)
return
cases
def
main
():
"""Main entry point for the InfiniCore Operator Test Runner."""
parser
=
argparse
.
ArgumentParser
(
description
=
"Run InfiniCore operator tests across multiple hardware platforms"
,
formatter_class
=
argparse
.
RawDescriptionHelpFormatter
,
epilog
=
generate_help_epilog
(
ops_dir
),
epilog
=
generate_help_epilog
(),
)
# Core options
parser
.
add_argument
(
"--ops-dir"
,
type
=
str
,
help
=
"Path to the ops directory (default: auto-detect)"
)
...
...
@@ -650,119 +236,106 @@ def main():
action
=
"store_true"
,
help
=
"List all available test files without running them"
,
)
# Call common method to add shared arguments (bench, debug, verbose, save...)
add_common_test_args
(
parser
)
parser
.
add_argument
(
"--load"
,
nargs
=
"+"
,
help
=
"Load test cases from JSON"
,
)
# Default value is None to determine if user provided input
parser
.
add_argument
(
"--num_prerun"
,
type
=
lambda
x
:
max
(
0
,
int
(
x
)),
default
=
None
)
parser
.
add_argument
(
"--num_iterations"
,
type
=
lambda
x
:
max
(
0
,
int
(
x
)),
default
=
None
)
# Add common test arguments (including --save, --bench, etc.)
add_common_test_args
(
parser
)
get_hardware_args_group
(
parser
)
# Parse known args first, leave the rest for the test scripts
args
,
unknown_args
=
parser
.
parse_known_args
()
# Show what extra arguments will be passed
if
unknown_args
:
print
(
f
"Passing extra arguments to test scripts:
{
unknown_args
}
"
)
# Handle list command
# 1. Discovery
collector
=
TestCollector
(
args
.
ops_dir
)
if
args
.
list
:
list_available_tests
(
args
.
ops_dir
)
print
(
"Available operators:"
,
collector
.
get_available_operators
()
)
return
# Auto-detect ops directory if not provided
if
args
.
ops_dir
is
None
:
ops_dir
=
find_ops_directory
()
if
not
ops_dir
:
print
(
"Error: Could not auto-detect ops directory. Please specify with --ops-dir"
)
sys
.
exit
(
1
)
else
:
ops_dir
=
Path
(
args
.
ops_dir
)
if
not
ops_dir
.
exists
():
print
(
f
"Error: Ops directory '
{
ops_dir
}
' does not exist."
)
# ==========================================================================
# Branch 1: Load Mode (JSON Data Driven)
# ==========================================================================
if
args
.
load
:
# 1. Load and override arguments
json_cases
=
load_and_override_cases
(
args
.
load
,
args
)
if
not
json_cases
:
sys
.
exit
(
1
)
#
Show what extra arguments will be passed
if
unknown_args
:
print
(
f
"Passing extra arguments to test scripts:
{
unknown_args
}
"
)
#
2. Determine global Bench status (for Summary display)
bench
=
json_cases
[
0
][
"args"
].
get
(
"bench"
)
verbose
=
json_cases
[
0
][
"args"
].
get
(
"verbose
"
)
# Get available operators for display
available_operators
=
get_available_operators
(
ops_dir
)
if
verbose
:
print
(
f
"Verbose mode: ENABLED (will stop on first error with full traceback)"
)
print
(
f
"InfiniCore Operator Test Runner"
)
print
(
f
"Operating directory:
{
ops_dir
}
"
)
print
(
f
"Available operators:
{
len
(
available_operators
)
}
"
)
if
bench
:
print
(
f
"Benchmark mode:
{
args
.
bench
.
upper
()
}
timing"
)
if
args
.
verbose
:
print
(
f
"Verbose mode: ENABLED (will stop on first error with full traceback)"
)
# 3. Initialize and Execute
test_manager
=
TestManager
(
ops_dir
=
args
.
ops_dir
,
verbose
=
verbose
,
bench_mode
=
bench
)
if
args
.
bench
:
bench_mode
=
args
.
bench
if
args
.
bench
!=
"both"
else
"both"
print
(
f
"Benchmark mode:
{
bench_mode
.
upper
()
}
timing"
)
success
,
_
=
test_manager
.
test
(
json_cases_list
=
json_cases
)
if
args
.
ops
:
# Validate requested operators
valid_ops
=
[]
invalid_ops
=
[]
for
op
in
args
.
ops
:
if
op
in
available_operators
:
valid_ops
.
append
(
op
)
else
:
invalid_ops
.
append
(
op
)
if
invalid_ops
:
print
(
f
"Warning: Unknown operators:
{
', '
.
join
(
invalid_ops
)
}
"
)
print
(
f
"Available operators:
{
', '
.
join
(
available_operators
)
}
"
)
if
valid_ops
:
print
(
f
"Testing operators:
{
', '
.
join
(
valid_ops
)
}
"
)
total_expected_tests
=
len
(
valid_ops
)
else
:
print
(
"No valid operators specified. Running all available tests."
)
total_expected_tests
=
len
(
available_operators
)
# ==========================================================================
# Branch 2: Local Scan Mode
# ==========================================================================
else
:
print
(
"Testing all available operators"
)
total_expected_tests
=
len
(
available_operators
)
print
()
# Run all tests
results
,
cumulative_timing
=
run_all_op_tests
(
ops_dir
=
ops_dir
,
specific_ops
=
args
.
ops
,
bench
=
bool
(
args
.
bench
),
bench_mode
=
args
.
bench
if
args
.
bench
else
"both"
,
verbose
=
args
.
verbose
,
debug
=
args
.
debug
,
)
if
args
.
verbose
:
print
(
f
"Verbose mode: ENABLED (will stop on first error with full traceback)"
)
# Print summary and exit with appropriate code
all_passed
=
print_summary
(
results
,
args
.
verbose
,
total_expected_tests
,
cumulative_timing
,
bench_mode
=
args
.
bench
if
args
.
bench
else
"both"
,
)
if
args
.
bench
:
print
(
f
"Benchmark mode:
{
args
.
bench
.
upper
()
}
timing"
)
# 2. Filtering
target_ops
=
None
if
args
.
ops
:
available_ops
=
set
(
collector
.
get_available_operators
())
requested_ops
=
set
(
args
.
ops
)
valid_ops
=
list
(
requested_ops
&
available_ops
)
invalid_ops
=
list
(
requested_ops
-
available_ops
)
if
invalid_ops
:
print
(
f
"⚠️ Warning: The following requested operators were not found:"
)
print
(
f
"
{
', '
.
join
(
invalid_ops
)
}
"
)
print
(
f
" (Use --list to see available operators)"
)
if
not
valid_ops
:
# Case A: User input provided, but ALL were invalid.
print
(
f
"⚠️ No valid operators remained from your list."
)
print
(
f
"🔄 Fallback: Proceeding to run ALL available tests..."
)
else
:
# Case B: At least some valid operators found.
print
(
f
"🎯 Targeted operators:
{
', '
.
join
(
valid_ops
)
}
"
)
target_ops
=
valid_ops
# Check if there were any tests with missing implementations
has_missing_implementations
=
any
(
result_data
[
"return_code"
]
in
[
-
2
,
-
3
]
for
result_data
in
results
.
values
()
)
# 3. Execution Preparation
# Fill defaults for local mode (since parser default is None)
global_exec_args
=
fill_defaults_for_local_mode
(
args
)
if
all_passed
and
has_missing_implementations
:
print
(
f
"
\n
⚠️ Note: Some operators are not fully implemented"
)
print
(
f
" Run individual tests for details on missing implementations"
)
# 4. Initialize API & Execute
test_manager
=
TestManager
(
ops_dir
=
args
.
ops_dir
,
verbose
=
args
.
verbose
,
bench_mode
=
args
.
bench
)
if
args
.
verbose
and
not
all_passed
:
print
(
f
"
\n
💡 Verbose mode tip: Use individual test commands for detailed debugging:"
success
,
_
=
test_manager
.
test
(
target_ops
=
target_ops
,
global_exec_args
=
global_exec_args
)
failed_ops
=
[
name
for
name
,
result_data
in
results
.
items
()
if
result_data
[
"return_code"
]
==
-
1
]
for
op
in
failed_ops
[:
3
]:
# Show first 3 failed operators
print
(
f
" python
{
ops_dir
/
(
op
+
'.py'
)
}
--verbose"
)
sys
.
exit
(
0
if
all_passed
else
1
)
sys
.
exit
(
0
if
success
else
1
)
if
__name__
==
"__main__"
:
...
...
test/infinicore/tensor/narrow.py
View file @
8d09630a
...
...
@@ -5,9 +5,15 @@ sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
import
torch
import
infinicore
from
framework.base
import
BaseOperatorTest
,
TensorSpec
,
TestCase
from
framework.runner
import
GenericTestRunner
from
framework.utils
import
is_broadcast
from
framework
import
(
BaseOperatorTest
,
GenericTestRunner
,
is_broadcast
,
TensorSpec
,
TestCase
)
# ==============================================================================
# Operator-specific configuration
...
...
test/infinicore/tensor/squeeze.py
View file @
8d09630a
...
...
@@ -5,9 +5,15 @@ sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
import
torch
import
infinicore
from
framework.base
import
BaseOperatorTest
,
TensorSpec
,
TestCase
from
framework.runner
import
GenericTestRunner
from
framework.utils
import
is_broadcast
from
framework
import
(
BaseOperatorTest
,
GenericTestRunner
,
TensorSpec
,
TestCase
,
is_broadcast
)
# ==============================================================================
# Operator-specific configuration
...
...
test/infinicore/tensor/unsqueeze.py
View file @
8d09630a
...
...
@@ -5,9 +5,15 @@ sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
import
torch
import
infinicore
from
framework.base
import
BaseOperatorTest
,
TensorSpec
,
TestCase
from
framework.runner
import
GenericTestRunner
from
framework.utils
import
is_broadcast
from
framework
import
(
BaseOperatorTest
,
GenericTestRunner
,
TensorSpec
,
TestCase
,
is_broadcast
)
# ==============================================================================
# Operator-specific configuration
...
...
Prev
1
…
14
15
16
17
18
19
20
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment