Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
infinicore
Commits
0d58c820
".github/git@developer.sourcefind.cn:tsoc/superbenchmark.git" did not exist on "425b9ff8651b683cda504ef610a8577e8ac7332f"
Commit
0d58c820
authored
Dec 16, 2025
by
baominghelly
Browse files
issue/787 - Split run ops test logic and fix kwargs name in report
parent
726eacf8
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
384 additions
and
765 deletions
+384
-765
test/infinicore/framework/datatypes.py
test/infinicore/framework/datatypes.py
+35
-1
test/infinicore/framework/executor.py
test/infinicore/framework/executor.py
+98
-0
test/infinicore/framework/loader.py
test/infinicore/framework/loader.py
+55
-0
test/infinicore/framework/reporter.py
test/infinicore/framework/reporter.py
+144
-16
test/infinicore/run.py
test/infinicore/run.py
+52
-748
No files found.
test/infinicore/framework/datatypes.py
View file @
0d58c820
import
torch
import
infinicore
from
dataclasses
import
dataclass
,
field
def
to_torch_dtype
(
infini_dtype
):
"""Convert infinicore data type to PyTorch data type"""
...
...
@@ -60,3 +60,37 @@ def to_infinicore_dtype(torch_dtype):
return
infinicore
.
complex128
else
:
raise
ValueError
(
f
"Unsupported torch dtype:
{
torch_dtype
}
"
)
@
dataclass
class
TestTiming
:
"""Stores performance testing timing metrics."""
torch_host
:
float
=
0.0
torch_device
:
float
=
0.0
infini_host
:
float
=
0.0
infini_device
:
float
=
0.0
operators_tested
:
int
=
0
@
dataclass
class
SingleTestResult
:
"""Stores the execution results of a single test file."""
name
:
str
success
:
bool
=
False
return_code
:
int
=
-
1
error_message
:
str
=
""
stdout
:
str
=
""
stderr
:
str
=
""
timing
:
TestTiming
=
field
(
default_factory
=
TestTiming
)
@
property
def
status_icon
(
self
):
if
self
.
return_code
==
0
:
return
"✅"
if
self
.
return_code
==
-
2
:
return
"⏭️"
if
self
.
return_code
==
-
3
:
return
"⚠️"
return
"❌"
@
property
def
status_text
(
self
):
if
self
.
return_code
==
0
:
return
"PASSED"
if
self
.
return_code
==
-
2
:
return
"SKIPPED"
if
self
.
return_code
==
-
3
:
return
"PARTIAL"
return
"FAILED"
test/infinicore/framework/executor.py
0 → 100644
View file @
0d58c820
import
sys
import
importlib.util
from
io
import
StringIO
from
contextlib
import
contextmanager
from
.datatypes
import
SingleTestResult
,
TestTiming
@
contextmanager
def
capture_output
():
"""Context manager: captures stdout and stderr."""
new_out
,
new_err
=
StringIO
(),
StringIO
()
old_out
,
old_err
=
sys
.
stdout
,
sys
.
stderr
try
:
sys
.
stdout
,
sys
.
stderr
=
new_out
,
new_err
yield
new_out
,
new_err
finally
:
sys
.
stdout
,
sys
.
stderr
=
old_out
,
old_err
class
SingleTestExecutor
:
def
run
(
self
,
file_path
)
->
SingleTestResult
:
result
=
SingleTestResult
(
name
=
file_path
.
stem
)
try
:
# 1. Dynamically import the module
module
=
self
.
_import_module
(
file_path
)
# 2. Look for TestRunner
if
not
hasattr
(
module
,
"GenericTestRunner"
):
raise
ImportError
(
"No GenericTestRunner found in module"
)
# 3. Look for TestClass (subclass of BaseOperatorTest)
test_class
=
self
.
_find_test_class
(
module
)
if
not
test_class
:
raise
ImportError
(
"No BaseOperatorTest subclass found"
)
test_instance
=
test_class
()
runner_class
=
module
.
GenericTestRunner
runner
=
runner_class
(
test_instance
.
__class__
)
# 4. Execute and capture output
with
capture_output
()
as
(
out
,
err
):
success
,
internal_runner
=
runner
.
run
()
# 5. Populate results
result
.
success
=
success
result
.
stdout
=
out
.
getvalue
()
result
.
stderr
=
err
.
getvalue
()
# Extract detailed results from internal_runner
test_results
=
internal_runner
.
get_test_results
()
if
internal_runner
else
[]
self
.
_analyze_return_code
(
result
,
test_results
)
self
.
_extract_timing
(
result
,
test_results
)
except
Exception
as
e
:
result
.
success
=
False
result
.
error_message
=
str
(
e
)
result
.
stderr
+=
f
"
\n
Executor Error:
{
str
(
e
)
}
"
result
.
return_code
=
-
1
return
result
def
_import_module
(
self
,
path
):
module_name
=
f
"op_test_
{
path
.
stem
}
"
spec
=
importlib
.
util
.
spec_from_file_location
(
module_name
,
path
)
if
not
spec
or
not
spec
.
loader
:
raise
ImportError
(
f
"Could not load spec from
{
path
}
"
)
module
=
importlib
.
util
.
module_from_spec
(
spec
)
sys
.
modules
[
module_name
]
=
module
spec
.
loader
.
exec_module
(
module
)
return
module
def
_find_test_class
(
self
,
module
):
for
attr_name
in
dir
(
module
):
attr
=
getattr
(
module
,
attr_name
)
if
isinstance
(
attr
,
type
)
and
hasattr
(
attr
,
"__bases__"
):
# Simple check for base class name
if
any
(
"BaseOperatorTest"
in
str
(
b
)
for
b
in
attr
.
__bases__
):
return
attr
return
None
def
_analyze_return_code
(
self
,
result
,
test_results
):
# Logic consistent with original code: determine if all passed, partially passed, or skipped
if
not
result
.
success
:
result
.
return_code
=
-
1
return
codes
=
[
r
.
return_code
for
r
in
test_results
]
if
-
1
in
codes
:
result
.
return_code
=
-
1
elif
-
3
in
codes
:
result
.
return_code
=
-
3
elif
-
2
in
codes
:
result
.
return_code
=
-
2
else
:
result
.
return_code
=
0
def
_extract_timing
(
self
,
result
,
test_results
):
# Accumulate timing
t
=
result
.
timing
t
.
torch_host
=
sum
(
r
.
torch_host_time
for
r
in
test_results
)
t
.
torch_device
=
sum
(
r
.
torch_device_time
for
r
in
test_results
)
t
.
infini_host
=
sum
(
r
.
infini_host_time
for
r
in
test_results
)
t
.
infini_device
=
sum
(
r
.
infini_device_time
for
r
in
test_results
)
test/infinicore/framework/loader.py
0 → 100644
View file @
0d58c820
from
pathlib
import
Path
class
TestDiscoverer
:
def
__init__
(
self
,
ops_dir_path
=
None
):
self
.
ops_dir
=
self
.
_resolve_dir
(
ops_dir_path
)
def
_resolve_dir
(
self
,
path
):
if
path
:
p
=
Path
(
path
)
if
p
.
exists
():
return
p
# Default fallback logic: 'ops' directory under the parent of the current file's parent.
# Note: Since this file is in 'framework/', we look at parent.parent.
# It is recommended to pass an explicit path in run.py.
fallback
=
Path
(
__file__
).
parent
.
parent
/
"ops"
return
fallback
if
fallback
.
exists
()
else
None
def
get_available_operators
(
self
):
"""Returns a list of names of all available operators."""
if
not
self
.
ops_dir
:
return
[]
files
=
self
.
scan
()
return
sorted
([
f
.
stem
for
f
in
files
])
def
scan
(
self
,
specific_ops
=
None
):
"""Scans and returns a list of Path objects that meet the criteria."""
if
not
self
.
ops_dir
or
not
self
.
ops_dir
.
exists
():
return
[]
# 1. Find all .py files
files
=
list
(
self
.
ops_dir
.
glob
(
"*.py"
))
# 2. Filter out non-test files (via content check)
valid_files
=
[]
for
f
in
files
:
if
f
.
name
.
startswith
(
"_"
)
or
f
.
name
==
"run.py"
:
continue
if
self
.
_is_operator_test
(
f
):
valid_files
.
append
(
f
)
# 3. If specific operators are specified, filter them
if
specific_ops
:
return
[
f
for
f
in
valid_files
if
f
.
stem
in
specific_ops
]
return
valid_files
def
_is_operator_test
(
self
,
file_path
):
"""Checks if the file content contains operator test characteristics."""
try
:
with
open
(
file_path
,
"r"
,
encoding
=
"utf-8"
)
as
f
:
content
=
f
.
read
()
return
"infinicore"
in
content
and
(
"BaseOperatorTest"
in
content
or
"GenericTestRunner"
in
content
)
except
:
return
False
test/infinicore/framework/reporter.py
View file @
0d58c820
...
...
@@ -61,35 +61,61 @@ class TestReporter:
# --- B. Build Kwargs ---
display_kwargs
=
{}
# B1. Process existing kwargs
for
k
,
v
in
tc
.
kwargs
.
items
():
# Handle Inplace
: "out": index
-> "out": "in
put_name
"
#
1.
Handle Inplace
output index: "out": 0
-> "out": "in
_0" / "a_spec
"
if
k
==
"out"
and
isinstance
(
v
,
int
):
if
0
<=
v
<
len
(
tc
.
inputs
):
display_kwargs
[
k
]
=
tc
.
inputs
[
v
].
name
# Prioritize the input's name; otherwise, default to index-based name
display_kwargs
[
k
]
=
getattr
(
tc
.
inputs
[
v
],
"name"
,
None
)
or
f
"in_
{
v
}
"
else
:
display_kwargs
[
k
]
=
f
"Invalid_Index_
{
v
}
"
# 2. Handle TensorSpec objects
elif
isinstance
(
v
,
TensorSpec
):
spec_dict
=
TestReporter
.
_spec_to_dict
(
v
)
# If the object has a name, explicitly overwrite it; otherwise, keep original
if
getattr
(
v
,
"name"
,
None
):
spec_dict
[
"name"
]
=
v
.
name
display_kwargs
[
k
]
=
spec_dict
# 3. Direct assignment for other types
else
:
display_kwargs
[
k
]
=
(
TestReporter
.
_spec_to_dict
(
v
)
if
isinstance
(
v
,
TensorSpec
)
else
v
)
display_kwargs
[
k
]
=
v
# B2. Inject Outputs into Kwargs
if
hasattr
(
tc
,
"output_specs"
)
and
tc
.
output_specs
:
# --- B2. Inject Outputs ---
# Handle output list (output_specs)
if
getattr
(
tc
,
"output_specs"
,
None
):
for
i
,
spec
in
enumerate
(
tc
.
output_specs
):
display_kwargs
[
f
"out_
{
i
}
"
]
=
TestReporter
.
_spec_to_dict
(
spec
)
elif
tc
.
output_spec
:
if
"out"
not
in
display_kwargs
:
display_kwargs
[
"out"
]
=
TestReporter
.
_spec_to_dict
(
tc
.
output_spec
)
out_dict
=
TestReporter
.
_spec_to_dict
(
spec
)
# Prioritize intrinsic name; otherwise, default to "out_i"
out_dict
[
"name"
]
=
getattr
(
spec
,
"name"
,
None
)
or
f
"out_
{
i
}
"
display_kwargs
[
f
"out_
{
i
}
"
]
=
out_dict
# Handle single output (output_spec), preventing overwrite of existing "out"
elif
tc
.
output_spec
and
"out"
not
in
display_kwargs
:
out_dict
=
TestReporter
.
_spec_to_dict
(
tc
.
output_spec
)
# Prioritize intrinsic name; otherwise, default to "out" (fixes null issue)
out_dict
[
"name"
]
=
getattr
(
tc
.
output_spec
,
"name"
,
"out"
)
display_kwargs
[
"out"
]
=
out_dict
# --- C. Build Inputs ---
# Iterate inputs: prioritize original name, fallback to "in_i"
processed_inputs
=
[]
for
i
,
inp
in
enumerate
(
tc
.
inputs
):
inp_dict
=
TestReporter
.
_spec_to_dict
(
inp
)
# Simplified logic: Use "name" attribute if present and non-empty, else use f"in_{i}"
inp_dict
[
"name"
]
=
getattr
(
inp
,
"name"
,
None
)
or
f
"in_
{
i
}
"
processed_inputs
.
append
(
inp_dict
)
# --- C. Build Test Case Dictionary ---
case_data
=
{
"description"
:
tc
.
description
,
"inputs"
:
[
TestReporter
.
_spec_to_dict
(
i
)
for
i
in
tc
.
inputs
]
,
"inputs"
:
processed_
inputs
,
"kwargs"
:
display_kwargs
,
"comparison_target"
:
tc
.
comparison_target
,
"tolerance"
:
tc
.
tolerance
,
}
# --- D. Inject Result ---
if
res
:
case_data
[
"result"
]
=
TestReporter
.
_fmt_result
(
res
)
...
...
@@ -117,7 +143,7 @@ class TestReporter:
indent_12
=
' '
*
12
indent_16
=
' '
*
16
indent_20
=
' '
*
20
print
(
f
"💾 Saving to:
{
final_path
}
"
)
try
:
with
open
(
final_path
,
"w"
,
encoding
=
"utf-8"
)
as
f
:
...
...
@@ -125,8 +151,8 @@ class TestReporter:
for
i
,
entry
in
enumerate
(
total_results
):
f
.
write
(
f
"
{
indent_4
}
{{
\n
"
)
keys
=
list
(
entry
.
keys
())
keys
=
list
(
entry
.
keys
())
for
j
,
key
in
enumerate
(
keys
):
val
=
entry
[
key
]
comma
=
","
if
j
<
len
(
keys
)
-
1
else
""
...
...
@@ -204,7 +230,109 @@ class TestReporter:
import
traceback
;
traceback
.
print_exc
()
print
(
f
" ❌ Save failed:
{
e
}
"
)
@
staticmethod
def
print_header
(
ops_dir
,
count
):
print
(
f
"InfiniCore Operator Test Runner"
)
print
(
f
"Directory:
{
ops_dir
}
"
)
print
(
f
"Tests found:
{
count
}
\n
"
)
@
staticmethod
def
print_live_result
(
result
,
verbose
=
False
):
"""Print single-line result in real-time."""
print
(
f
"
{
result
.
status_icon
}
{
result
.
name
}
:
{
result
.
status_text
}
(code:
{
result
.
return_code
}
)"
)
if
result
.
stdout
:
print
(
result
.
stdout
.
rstrip
())
if
result
.
stderr
:
print
(
"
\n
STDERR:"
,
result
.
stderr
.
rstrip
())
if
result
.
error_message
:
print
(
f
"💥 Error:
{
result
.
error_message
}
"
)
if
result
.
stdout
or
result
.
stderr
or
verbose
:
print
(
"-"
*
40
)
@
staticmethod
def
print_summary
(
results
,
cumulative_timing
,
total_expected
=
0
):
"""Prints the final comprehensive test summary and statistics, ensuring consistency with original output."""
print
(
f
"
\n
{
'='
*
80
}
\n
CUMULATIVE TEST SUMMARY
\n
{
'='
*
80
}
"
)
passed
=
[
r
for
r
in
results
if
r
.
return_code
==
0
]
failed
=
[
r
for
r
in
results
if
r
.
return_code
==
-
1
]
skipped
=
[
r
for
r
in
results
if
r
.
return_code
==
-
2
]
partial
=
[
r
for
r
in
results
if
r
.
return_code
==
-
3
]
total
=
len
(
results
)
print
(
f
"Total tests run:
{
total
}
"
)
print
(
f
"Passed:
{
len
(
passed
)
}
"
)
print
(
f
"Failed:
{
len
(
failed
)
}
"
)
if
skipped
:
print
(
f
"Skipped:
{
len
(
skipped
)
}
"
)
if
partial
:
print
(
f
"Partial:
{
len
(
partial
)
}
"
)
# 1. Print Benchmark data
if
cumulative_timing
:
# Assuming bench_mode is "both" for simplicity in this file, or passed via a config
# We call the modified _print_timing to handle the display logic.
TestReporter
.
_print_timing
(
cumulative_timing
,
bench_mode
=
"both"
)
# 2. Restore PASSED OPERATORS list
if
passed
:
print
(
f
"
\n
✅ PASSED OPERATORS (
{
len
(
passed
)
}
):"
)
# Print operators, grouped (assuming 10 per line as per the old pattern)
operators
=
[
r
.
name
for
r
in
passed
]
for
i
in
range
(
0
,
len
(
operators
),
10
):
print
(
" "
+
", "
.
join
(
operators
[
i
:
i
+
10
]))
else
:
print
(
f
"
\n
✅ PASSED OPERATORS: None"
)
# 3. Restore Success Rate
if
total
>
0
:
# Calculate success rate based on actually executed tests (excluding skipped)
executed_tests
=
total
-
len
(
skipped
)
if
executed_tests
>
0
:
success_rate
=
len
(
passed
)
/
executed_tests
*
100
print
(
f
"
\n
Success rate:
{
success_rate
:.
1
f
}
%"
)
if
not
failed
:
print
(
f
"
\n
🎉 All tests passed!"
)
else
:
print
(
f
"
\n
❌
{
len
(
failed
)
}
tests failed"
)
return
len
(
failed
)
==
0
# --- Internal Helpers ---
@
staticmethod
def
_print_timing
(
t
,
bench_mode
=
"both"
):
"""Prints detailed timing breakdown for host and device, based on bench_mode."""
print
(
f
"
{
'-'
*
40
}
"
)
# Restore Operators Tested field using the new dataclass field
if
hasattr
(
t
,
'operators_tested'
):
print
(
f
"BENCHMARK SUMMARY:"
)
print
(
f
" Operators Tested:
{
t
.
operators_tested
}
"
)
# Restore detailed Host/Device distinction
if
bench_mode
in
[
"host"
,
"both"
]:
print
(
f
" PyTorch Host Total Time:
{
t
.
torch_host
:
12.3
f
}
ms"
)
print
(
f
" InfiniCore Host Total Time:
{
t
.
infini_host
:
12.3
f
}
ms"
)
if
bench_mode
in
[
"device"
,
"both"
]:
print
(
f
" PyTorch Device Total Time:
{
t
.
torch_device
:
12.3
f
}
ms"
)
print
(
f
" InfiniCore Device Total Time:
{
t
.
infini_device
:
12.3
f
}
ms"
)
print
(
f
"
{
'-'
*
40
}
"
)
@
staticmethod
def
_write_smart_field
(
f
,
key
,
value
,
indent
,
sub_indent
,
close_comma
=
""
):
"""
...
...
test/infinicore/run.py
View file @
0d58c820
import
os
import
sys
import
argparse
import
traceback
from
pathlib
import
Path
import
importlib.util
# Import components from the unified framework package
from
framework.loader
import
TestDiscoverer
from
framework.executor
import
SingleTestExecutor
from
framework.reporter
import
TestReporter
from
framework.datatypes
import
TestTiming
from
framework
import
get_hardware_args_group
,
add_common_test_args
def
find_ops_directory
(
location
=
None
):
"""
Find the ops directory by searching from location upwards.
Args:
location: Starting directory for search (default: current file's parent)
Returns:
Path: Path to ops directory or None if not found
"""
if
location
is
None
:
location
=
Path
(
__file__
).
parent
/
"ops"
ops_dir
=
location
.
resolve
()
if
ops_dir
.
exists
()
and
any
(
ops_dir
.
glob
(
"*.py"
)):
return
ops_dir
return
None
def
get_available_operators
(
ops_dir
):
"""
Get list of available operators from ops directory.
Args:
ops_dir: Path to ops directory
Returns:
List of operator names
"""
if
not
ops_dir
or
not
ops_dir
.
exists
():
return
[]
test_files
=
list
(
ops_dir
.
glob
(
"*.py"
))
current_script
=
Path
(
__file__
).
name
test_files
=
[
f
for
f
in
test_files
if
f
.
name
!=
current_script
]
operators
=
[]
for
test_file
in
test_files
:
try
:
with
open
(
test_file
,
"r"
,
encoding
=
"utf-8"
)
as
f
:
content
=
f
.
read
()
if
"infinicore"
in
content
and
(
"BaseOperatorTest"
in
content
or
"GenericTestRunner"
in
content
):
operators
.
append
(
test_file
.
stem
)
except
:
continue
return
sorted
(
operators
)
def
import_operator_test
(
test_file_path
):
"""
Import an operator test module and return the test class instance.
Args:
test_file_path: Path to the test file
Returns:
tuple: (success, test_instance_or_error)
"""
try
:
# Create a unique module name
module_name
=
f
"op_test_
{
test_file_path
.
stem
}
"
# Load the module from file
spec
=
importlib
.
util
.
spec_from_file_location
(
module_name
,
test_file_path
)
if
spec
is
None
or
spec
.
loader
is
None
:
return
False
,
f
"Could not load module from
{
test_file_path
}
"
module
=
importlib
.
util
.
module_from_spec
(
spec
)
# Add the module to sys.modules
sys
.
modules
[
module_name
]
=
module
# Execute the module
spec
.
loader
.
exec_module
(
module
)
# Find the test class (usually named OpTest)
test_class
=
None
for
attr_name
in
dir
(
module
):
attr
=
getattr
(
module
,
attr_name
)
if
(
isinstance
(
attr
,
type
)
and
hasattr
(
attr
,
"__bases__"
)
and
any
(
"BaseOperatorTest"
in
str
(
base
)
for
base
in
attr
.
__bases__
)
):
test_class
=
attr
break
if
test_class
is
None
:
return
False
,
f
"No test class found in
{
test_file_path
}
"
# Create an instance
test_instance
=
test_class
()
return
True
,
test_instance
except
Exception
as
e
:
return
False
,
f
"Error importing
{
test_file_path
}
:
{
str
(
e
)
}
"
def
run_all_op_tests
(
ops_dir
=
None
,
specific_ops
=
None
,
bench
=
False
,
bench_mode
=
"both"
,
verbose
=
False
,
debug
=
False
,
):
"""
Run all operator test scripts in the ops directory using direct import.
Args:
ops_dir (str, optional): Path to the ops directory. If None, uses auto-detection.
specific_ops (list, optional): List of specific operator names to test.
bench (bool): Whether benchmarking is enabled
bench_mode (str): Benchmark mode - "host", "device", or "both"
verbose (bool): Whether verbose mode is enabled
Returns:
dict: Results dictionary with test names as keys and (success, test_runner, stdout, stderr) as values.
"""
if
ops_dir
is
None
:
ops_dir
=
find_ops_directory
()
else
:
ops_dir
=
Path
(
ops_dir
)
if
not
ops_dir
or
not
ops_dir
.
exists
():
print
(
f
"Error: Ops directory '
{
ops_dir
}
' does not exist."
)
return
{}
print
(
f
"Looking for test files in:
{
ops_dir
}
"
)
# Find all Python test files
test_files
=
list
(
ops_dir
.
glob
(
"*.py"
))
# Filter out this script itself and non-operator test files
current_script
=
Path
(
__file__
).
name
test_files
=
[
f
for
f
in
test_files
if
f
.
name
!=
current_script
]
# Filter to include only files that look like operator tests
operator_test_files
=
[]
for
test_file
in
test_files
:
try
:
with
open
(
test_file
,
"r"
,
encoding
=
"utf-8"
)
as
f
:
content
=
f
.
read
()
# Look for characteristic patterns of operator tests
if
"infinicore"
in
content
and
(
"BaseOperatorTest"
in
content
or
"GenericTestRunner"
in
content
):
operator_test_files
.
append
(
test_file
)
except
Exception
as
e
:
continue
# Filter for specific operators if requested
if
specific_ops
:
filtered_files
=
[]
for
test_file
in
operator_test_files
:
test_name
=
test_file
.
stem
.
lower
()
if
any
(
op
.
lower
()
==
test_name
for
op
in
specific_ops
):
filtered_files
.
append
(
test_file
)
operator_test_files
=
filtered_files
if
not
operator_test_files
:
print
(
f
"No operator test files found in
{
ops_dir
}
"
)
print
(
f
"Available Python files:
{
[
f
.
name
for
f
in
test_files
]
}
"
)
return
{}
print
(
f
"Found
{
len
(
operator_test_files
)
}
operator test files:"
)
for
test_file
in
operator_test_files
:
print
(
f
" -
{
test_file
.
name
}
"
)
results
=
{}
cumulative_timing
=
{
"total_torch_host_time"
:
0.0
,
"total_torch_device_time"
:
0.0
,
"total_infinicore_host_time"
:
0.0
,
"total_infinicore_device_time"
:
0.0
,
"operators_tested"
:
0
,
}
for
test_file
in
operator_test_files
:
test_name
=
test_file
.
stem
try
:
# Import and run the test directly
success
,
test_instance_or_error
=
import_operator_test
(
test_file
)
if
not
success
:
print
(
f
"💥
{
test_name
}
: ERROR -
{
test_instance_or_error
}
"
)
results
[
test_name
]
=
{
"success"
:
False
,
"return_code"
:
-
1
,
"torch_host_time"
:
0.0
,
"torch_device_time"
:
0.0
,
"infini_host_time"
:
0.0
,
"infini_device_time"
:
0.0
,
"error_message"
:
test_instance_or_error
,
"test_runner"
:
None
,
"stdout"
:
""
,
"stderr"
:
test_instance_or_error
,
}
continue
# Get the test runner class from the module
test_module
=
sys
.
modules
[
f
"op_test_
{
test_file
.
stem
}
"
]
if
not
hasattr
(
test_module
,
"GenericTestRunner"
):
print
(
f
"💥
{
test_name
}
: ERROR - No GenericTestRunner found"
)
results
[
test_name
]
=
{
"success"
:
False
,
"return_code"
:
-
1
,
"torch_host_time"
:
0.0
,
"torch_device_time"
:
0.0
,
"infini_host_time"
:
0.0
,
"infini_device_time"
:
0.0
,
"error_message"
:
"No GenericTestRunner found"
,
"test_runner"
:
None
,
"stdout"
:
""
,
"stderr"
:
"No GenericTestRunner found"
,
}
continue
# Create and run the test runner
test_runner_class
=
test_module
.
GenericTestRunner
runner_instance
=
test_runner_class
(
test_instance_or_error
.
__class__
)
# Temporarily redirect stdout to capture output
from
io
import
StringIO
stdout_capture
=
StringIO
()
stderr_capture
=
StringIO
()
old_stdout
=
sys
.
stdout
old_stderr
=
sys
.
stderr
sys
.
stdout
=
stdout_capture
sys
.
stderr
=
stderr_capture
try
:
# Run the test
test_success
,
test_runner
=
runner_instance
.
run
()
# Get captured output
stdout_output
=
stdout_capture
.
getvalue
()
stderr_output
=
stderr_capture
.
getvalue
()
# Restore stdout/stderr
sys
.
stdout
=
old_stdout
sys
.
stderr
=
old_stderr
# Print the captured output
if
stdout_output
:
print
(
stdout_output
.
rstrip
())
if
stderr_output
:
print
(
"
\n
STDERR:"
)
print
(
stderr_output
.
rstrip
())
# Analyze test results
test_results
=
test_runner
.
get_test_results
()
if
test_runner
else
[]
# Determine overall test status
if
test_success
:
return_code
=
0
status_icon
=
"✅"
status_text
=
"PASSED"
else
:
# Check if there are any failed tests
has_failures
=
any
(
result
.
return_code
==
-
1
for
result
in
test_results
)
has_partial
=
any
(
result
.
return_code
==
-
3
for
result
in
test_results
)
has_skipped
=
any
(
result
.
return_code
==
-
2
for
result
in
test_results
)
if
has_failures
:
return_code
=
-
1
status_icon
=
"❌"
status_text
=
"FAILED"
elif
has_partial
:
return_code
=
-
3
status_icon
=
"⚠️"
status_text
=
"PARTIAL"
elif
has_skipped
:
return_code
=
-
2
status_icon
=
"⏭️"
status_text
=
"SKIPPED"
else
:
return_code
=
-
1
status_icon
=
"❌"
status_text
=
"FAILED"
# Calculate timing for all four metrics
torch_host_time
=
sum
(
result
.
torch_host_time
for
result
in
test_results
)
torch_device_time
=
sum
(
result
.
torch_device_time
for
result
in
test_results
)
infini_host_time
=
sum
(
result
.
infini_host_time
for
result
in
test_results
)
infini_device_time
=
sum
(
result
.
infini_device_time
for
result
in
test_results
)
results
[
test_name
]
=
{
"success"
:
test_success
,
"return_code"
:
return_code
,
"torch_host_time"
:
torch_host_time
,
"torch_device_time"
:
torch_device_time
,
"infini_host_time"
:
infini_host_time
,
"infini_device_time"
:
infini_device_time
,
"error_message"
:
""
,
"test_runner"
:
test_runner
,
"stdout"
:
stdout_output
,
"stderr"
:
stderr_output
,
}
print
(
f
"
{
status_icon
}
{
test_name
}
:
{
status_text
}
(return code:
{
return_code
}
)"
)
# Extract benchmark timing if in bench mode
if
bench
and
test_success
and
return_code
==
0
:
cumulative_timing
[
"total_torch_host_time"
]
+=
torch_host_time
cumulative_timing
[
"total_torch_device_time"
]
+=
torch_device_time
cumulative_timing
[
"total_infinicore_host_time"
]
+=
infini_host_time
cumulative_timing
[
"total_infinicore_device_time"
]
+=
infini_device_time
cumulative_timing
[
"operators_tested"
]
+=
1
except
Exception
as
e
:
# Restore stdout/stderr in case of exception
sys
.
stdout
=
old_stdout
sys
.
stderr
=
old_stderr
raise
e
# In verbose mode, stop execution on first failure
if
verbose
and
not
test_success
and
return_code
!=
0
:
break
except
Exception
as
e
:
print
(
f
"💥
{
test_name
}
: ERROR -
{
str
(
e
)
}
"
)
results
[
test_name
]
=
{
"success"
:
False
,
"return_code"
:
-
1
,
"torch_host_time"
:
0.0
,
"torch_device_time"
:
0.0
,
"infini_host_time"
:
0.0
,
"infini_device_time"
:
0.0
,
"error_message"
:
str
(
e
),
"test_runner"
:
None
,
"stdout"
:
""
,
"stderr"
:
str
(
e
),
}
# In verbose mode, stop execution on any exception
if
verbose
:
print
(
f
"
\n
{
'!'
*
60
}
"
)
print
(
f
"VERBOSE MODE: Stopping execution due to exception in
{
test_name
}
"
)
print
(
f
"
{
'!'
*
60
}
"
)
break
if
debug
:
traceback
.
print_exc
()
break
return
results
,
cumulative_timing
def
print_summary
(
results
,
verbose
=
False
,
total_expected_tests
=
0
,
cumulative_timing
=
None
,
bench_mode
=
"both"
,
):
"""Print a comprehensive summary of test results including benchmark data."""
print
(
f
"
\n
{
'='
*
80
}
"
)
print
(
"CUMULATIVE TEST SUMMARY"
)
print
(
f
"
{
'='
*
80
}
"
)
if
not
results
:
print
(
"No tests were run."
)
return
False
# Count different types of results
passed
=
0
failed
=
0
skipped
=
0
partial
=
0
passed_operators
=
[]
# Store passed operator names
failed_operators
=
[]
# Store failed operator names
skipped_operators
=
[]
# Store skipped operator names
partial_operators
=
[]
# Store partial operator names
for
test_name
,
result_data
in
results
.
items
():
return_code
=
result_data
[
"return_code"
]
if
return_code
==
0
:
passed
+=
1
passed_operators
.
append
(
test_name
)
elif
return_code
==
-
2
:
# Special code for skipped tests
skipped
+=
1
skipped_operators
.
append
(
test_name
)
elif
return_code
==
-
3
:
# Special code for partial tests
partial
+=
1
partial_operators
.
append
(
test_name
)
else
:
failed
+=
1
failed_operators
.
append
(
test_name
)
total
=
len
(
results
)
print
(
f
"Total tests run:
{
total
}
"
)
if
total_expected_tests
>
0
and
total
<
total_expected_tests
:
print
(
f
"Total tests expected:
{
total_expected_tests
}
"
)
print
(
f
"Tests not executed:
{
total_expected_tests
-
total
}
"
)
print
(
f
"Passed:
{
passed
}
"
)
print
(
f
"Failed:
{
failed
}
"
)
if
skipped
>
0
:
print
(
f
"Skipped:
{
skipped
}
"
)
if
partial
>
0
:
print
(
f
"Partial:
{
partial
}
"
)
# Print benchmark summary if cumulative_timing data is available
if
cumulative_timing
and
cumulative_timing
[
"operators_tested"
]
>
0
:
print
(
f
"
{
'-'
*
40
}
"
)
print
(
"BENCHMARK SUMMARY:"
)
print
(
f
" Operators Tested:
{
cumulative_timing
[
'operators_tested'
]
}
"
)
# Display timing based on bench_mode
if
bench_mode
in
[
"host"
,
"both"
]:
print
(
f
" PyTorch Host Total Time:
{
cumulative_timing
[
'total_torch_host_time'
]:
12.3
f
}
ms"
)
print
(
f
" InfiniCore Host Total Time:
{
cumulative_timing
[
'total_infinicore_host_time'
]:
12.3
f
}
ms"
)
if
bench_mode
in
[
"device"
,
"both"
]:
print
(
f
" PyTorch Device Total Time:
{
cumulative_timing
[
'total_torch_device_time'
]:
12.3
f
}
ms"
)
print
(
f
" InfiniCore Device Total Time:
{
cumulative_timing
[
'total_infinicore_device_time'
]:
12.3
f
}
ms"
)
print
(
f
"
{
'-'
*
40
}
"
)
# Display passed operators
if
passed_operators
:
print
(
f
"
\n
✅ PASSED OPERATORS (
{
len
(
passed_operators
)
}
):"
)
# Display operators in groups of 10 per line
for
i
in
range
(
0
,
len
(
passed_operators
),
10
):
line_ops
=
passed_operators
[
i
:
i
+
10
]
print
(
" "
+
", "
.
join
(
line_ops
))
else
:
print
(
f
"
\n
✅ PASSED OPERATORS: None"
)
# Display failed operators (if any)
if
failed_operators
:
print
(
f
"
\n
❌ FAILED OPERATORS (
{
len
(
failed_operators
)
}
):"
)
for
i
in
range
(
0
,
len
(
failed_operators
),
10
):
line_ops
=
failed_operators
[
i
:
i
+
10
]
print
(
" "
+
", "
.
join
(
line_ops
))
# Display skipped operators (if any)
if
skipped_operators
:
print
(
f
"
\n
⏭️ SKIPPED OPERATORS (
{
len
(
skipped_operators
)
}
):"
)
for
i
in
range
(
0
,
len
(
skipped_operators
),
10
):
line_ops
=
skipped_operators
[
i
:
i
+
10
]
print
(
" "
+
", "
.
join
(
line_ops
))
# Display partial operators (if any)
if
partial_operators
:
print
(
f
"
\n
⚠️ PARTIAL OPERATORS (
{
len
(
partial_operators
)
}
):"
)
for
i
in
range
(
0
,
len
(
partial_operators
),
10
):
line_ops
=
partial_operators
[
i
:
i
+
10
]
print
(
" "
+
", "
.
join
(
line_ops
))
if
total
>
0
:
# Calculate success rate based on actual executed tests
executed_tests
=
passed
+
failed
+
partial
if
executed_tests
>
0
:
success_rate
=
passed
/
executed_tests
*
100
print
(
f
"
\n
Success rate:
{
success_rate
:.
1
f
}
%"
)
if
verbose
and
total
<
total_expected_tests
:
print
(
f
"
\n
💡 Verbose mode: Execution stopped after first failure"
)
print
(
f
"
{
total_expected_tests
-
total
}
tests were not executed"
)
if
failed
==
0
:
if
skipped
>
0
or
partial
>
0
:
print
(
f
"
\n
⚠️ Tests completed with some operators not implemented"
)
print
(
f
" -
{
skipped
}
tests skipped (both operators not implemented)"
)
print
(
f
" -
{
partial
}
tests partial (one operator not implemented)"
)
else
:
print
(
f
"
\n
🎉 All tests passed!"
)
return
True
else
:
print
(
f
"
\n
❌
{
failed
}
tests failed"
)
return
False
def
list_available_tests
(
ops_dir
=
None
):
"""List all available operator test files."""
if
ops_dir
is
None
:
ops_dir
=
find_ops_directory
()
else
:
ops_dir
=
Path
(
ops_dir
)
if
not
ops_dir
or
not
ops_dir
.
exists
():
print
(
f
"Error: Ops directory '
{
ops_dir
}
' does not exist."
)
return
operators
=
get_available_operators
(
ops_dir
)
if
operators
:
print
(
f
"Available operator test files in
{
ops_dir
}
:"
)
for
operator
in
operators
:
print
(
f
" -
{
operator
}
"
)
print
(
f
"
\n
Total:
{
len
(
operators
)
}
operators"
)
else
:
print
(
f
"No operator test files found in
{
ops_dir
}
"
)
# Show available Python files for debugging
test_files
=
list
(
ops_dir
.
glob
(
"*.py"
))
current_script
=
Path
(
__file__
).
name
test_files
=
[
f
for
f
in
test_files
if
f
.
name
!=
current_script
]
if
test_files
:
print
(
f
"Available Python files:
{
[
f
.
name
for
f
in
test_files
]
}
"
)
def
generate_help_epilog
(
ops_dir
):
"""
Generate dynamic help epilog with available operators and hardware platforms.
Args:
ops_dir: Path to ops directory
Returns:
str: Formatted help text
"""
# Get available operators
operators
=
get_available_operators
(
ops_dir
)
# Build epilog text
epilog_parts
=
[]
# Examples section
epilog_parts
.
append
(
"Examples:"
)
epilog_parts
.
append
(
" # Run all operator tests on CPU"
)
epilog_parts
.
append
(
" python run.py --cpu"
)
epilog_parts
.
append
(
""
)
epilog_parts
.
append
(
" # Run specific operators"
)
epilog_parts
.
append
(
" python run.py --ops add matmul --nvidia"
)
epilog_parts
.
append
(
""
)
epilog_parts
.
append
(
" # Run with debug mode on multiple devices"
)
epilog_parts
.
append
(
" python run.py --cpu --nvidia --debug"
)
epilog_parts
.
append
(
""
)
epilog_parts
.
append
(
" # Run with verbose mode to stop on first error with full traceback"
)
epilog_parts
.
append
(
" python run.py --cpu --nvidia --verbose"
)
epilog_parts
.
append
(
""
)
epilog_parts
.
append
(
" # Run with benchmarking (both host and device timing)"
)
epilog_parts
.
append
(
" python run.py --cpu --bench"
)
epilog_parts
.
append
(
""
)
epilog_parts
.
append
(
" # Run with host timing only"
)
epilog_parts
.
append
(
" python run.py --nvidia --bench host"
)
epilog_parts
.
append
(
""
)
epilog_parts
.
append
(
" # Run with device timing only"
)
epilog_parts
.
append
(
" python run.py --nvidia --bench device"
)
epilog_parts
.
append
(
""
)
epilog_parts
.
append
(
" # List available tests without running"
)
epilog_parts
.
append
(
" python run.py --list"
)
epilog_parts
.
append
(
""
)
# Available operators section
if
operators
:
epilog_parts
.
append
(
"Available Operators:"
)
# Group operators for better display
operators_per_line
=
4
for
i
in
range
(
0
,
len
(
operators
),
operators_per_line
):
line_ops
=
operators
[
i
:
i
+
operators_per_line
]
epilog_parts
.
append
(
f
"
{
', '
.
join
(
line_ops
)
}
"
)
epilog_parts
.
append
(
""
)
else
:
epilog_parts
.
append
(
"Available Operators: (none detected)"
)
epilog_parts
.
append
(
""
)
# Additional notes
epilog_parts
.
append
(
"Note:"
)
epilog_parts
.
append
(
" - Use '--' to pass additional arguments to individual test scripts"
)
epilog_parts
.
append
(
" - Operators are automatically discovered from the ops directory"
)
epilog_parts
.
append
(
" - --bench mode now shows cumulative timing across all operators"
)
epilog_parts
.
append
(
" - --bench host/device/both controls host/device timing measurement"
)
epilog_parts
.
append
(
" - --verbose mode stops execution on first error and shows full traceback"
)
return
"
\n
"
.
join
(
epilog_parts
)
def
main
():
"""Main entry point with comprehensive command line argument parsing."""
# First, find ops directory for dynamic help generation
ops_dir
=
find_ops_directory
()
parser
=
argparse
.
ArgumentParser
(
description
=
"Run InfiniCore operator tests across multiple hardware platforms"
,
formatter_class
=
argparse
.
RawDescriptionHelpFormatter
,
epilog
=
generate_help_epilog
(
ops_dir
),
)
# Core options
parser
.
add_argument
(
"--ops-dir"
,
type
=
str
,
help
=
"Path to the ops directory (default: auto-detect)"
)
parser
.
add_argument
(
"--ops"
,
nargs
=
"+"
,
help
=
"Run specific operators only (e.g., --ops add matmul)"
)
parser
.
add_argument
(
"--list"
,
action
=
"store_true"
,
help
=
"List all available test files without running them"
,
)
"""Main entry point for the InfiniCore Operator Test Runner."""
parser
=
argparse
.
ArgumentParser
(
description
=
"Run InfiniCore operator tests across multiple hardware platforms"
)
parser
.
add_argument
(
"--ops-dir"
,
type
=
str
,
help
=
"Path to the ops directory (default: auto-detect)"
)
parser
.
add_argument
(
"--ops"
,
nargs
=
"+"
,
help
=
"Run specific operators only (e.g., --ops add matmul)"
)
parser
.
add_argument
(
"--list"
,
action
=
"store_true"
,
help
=
"List all available test files without running them"
)
#
Call
common
method to add shared arguments (bench, debug, verbose, save..
.)
#
Add
common
test arguments (including --save, --bench, etc
.)
add_common_test_args
(
parser
)
get_hardware_args_group
(
parser
)
args
,
_
=
parser
.
parse_known_args
()
# Parse known args first, leave the rest for the test scripts
args
,
unknown_args
=
parser
.
parse_known_args
()
# Handle list command
# 1. Discovery
discoverer
=
TestDiscoverer
(
args
.
ops_dir
)
if
args
.
list
:
list_available_tests
(
args
.
ops_dir
)
print
(
"Available operators:"
,
discoverer
.
get_available_operators
()
)
return
# Auto-detect ops directory if not provided
if
args
.
ops_dir
is
None
:
ops_dir
=
find_ops_directory
()
if
not
ops_dir
:
print
(
"Error: Could not auto-detect ops directory. Please specify with --ops-dir"
)
sys
.
exit
(
1
)
else
:
ops_dir
=
Path
(
args
.
ops_dir
)
if
not
ops_dir
.
exists
():
print
(
f
"Error: Ops directory '
{
ops_dir
}
' does not exist."
)
sys
.
exit
(
1
)
# Show what extra arguments will be passed
if
unknown_args
:
print
(
f
"Passing extra arguments to test scripts:
{
unknown_args
}
"
)
test_files
=
discoverer
.
scan
(
args
.
ops
)
if
not
test_files
:
print
(
"No tests found."
)
sys
.
exit
(
0
)
# Get available operators for display
available_operators
=
get_available_operators
(
ops_dir
)
print
(
f
"InfiniCore Operator Test Runner"
)
print
(
f
"Operating directory:
{
ops_dir
}
"
)
print
(
f
"Available operators:
{
len
(
available_operators
)
}
"
)
if
args
.
verbose
:
print
(
f
"Verbose mode: ENABLED (will stop on first error with full traceback)"
)
if
args
.
bench
:
bench_mode
=
args
.
bench
if
args
.
bench
!=
"both"
else
"both"
print
(
f
"Benchmark mode:
{
bench_mode
.
upper
()
}
timing"
)
if
args
.
ops
:
# Validate requested operators
valid_ops
=
[]
invalid_ops
=
[]
for
op
in
args
.
ops
:
if
op
in
available_operators
:
valid_ops
.
append
(
op
)
else
:
invalid_ops
.
append
(
op
)
if
invalid_ops
:
print
(
f
"Warning: Unknown operators:
{
', '
.
join
(
invalid_ops
)
}
"
)
print
(
f
"Available operators:
{
', '
.
join
(
available_operators
)
}
"
)
if
valid_ops
:
print
(
f
"Testing operators:
{
', '
.
join
(
valid_ops
)
}
"
)
total_expected_tests
=
len
(
valid_ops
)
else
:
print
(
"No valid operators specified. Running all available tests."
)
total_expected_tests
=
len
(
available_operators
)
else
:
print
(
"Testing all available operators"
)
total_expected_tests
=
len
(
available_operators
)
print
()
# Run all tests
results
,
cumulative_timing
=
run_all_op_tests
(
ops_dir
=
ops_dir
,
specific_ops
=
args
.
ops
,
bench
=
bool
(
args
.
bench
),
bench_mode
=
args
.
bench
if
args
.
bench
else
"both"
,
verbose
=
args
.
verbose
,
debug
=
args
.
debug
,
)
# Print summary and exit with appropriate code
all_passed
=
print_summary
(
results
,
args
.
verbose
,
total_expected_tests
,
cumulative_timing
,
bench_mode
=
args
.
bench
if
args
.
bench
else
"both"
,
)
# Check if there were any tests with missing implementations
has_missing_implementations
=
any
(
result_data
[
"return_code"
]
in
[
-
2
,
-
3
]
for
result_data
in
results
.
values
()
# 2. Preparation
executor
=
SingleTestExecutor
()
cumulative_timing
=
TestTiming
()
results
=
[]
TestReporter
.
print_header
(
discoverer
.
ops_dir
,
len
(
test_files
))
# 3. Execution Loop
for
f
in
test_files
:
result
=
executor
.
run
(
f
)
results
.
append
(
result
)
# Real-time reporting and printing of stdout
TestReporter
.
print_live_result
(
result
,
verbose
=
args
.
verbose
)
# Accumulate timing
if
result
.
success
:
cumulative_timing
.
torch_host
+=
result
.
timing
.
torch_host
cumulative_timing
.
infini_host
+=
result
.
timing
.
infini_host
cumulative_timing
.
torch_device
+=
result
.
timing
.
torch_device
cumulative_timing
.
infini_device
+=
result
.
timing
.
infini_device
# Fail fast in verbose mode
if
args
.
verbose
and
not
result
.
success
:
print
(
"
\n
Stopping due to failure in verbose mode."
)
break
# 4. Final Report & Save
all_passed
=
TestReporter
.
print_summary
(
results
,
cumulative_timing
if
args
.
bench
else
None
,
total_expected
=
len
(
test_files
)
)
if
all_passed
and
has_missing_implementations
:
print
(
f
"
\n
⚠️ Note: Some operators are not fully implemented"
)
print
(
f
" Run individual tests for details on missing implementations"
)
if
args
.
verbose
and
not
all_passed
:
print
(
f
"
\n
💡 Verbose mode tip: Use individual test commands for detailed debugging:"
)
failed_ops
=
[
name
for
name
,
result_data
in
results
.
items
()
if
result_data
[
"return_code"
]
==
-
1
]
for
op
in
failed_ops
[:
3
]:
# Show first 3 failed operators
print
(
f
" python
{
ops_dir
/
(
op
+
'.py'
)
}
--verbose"
)
sys
.
exit
(
0
if
all_passed
else
1
)
if
__name__
==
"__main__"
:
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment