".github/git@developer.sourcefind.cn:tsoc/superbenchmark.git" did not exist on "425b9ff8651b683cda504ef610a8577e8ac7332f"
Commit 0d58c820 authored by baominghelly's avatar baominghelly
Browse files

issue/787 - Split run ops test logic and fix kwargs name in report

parent 726eacf8
import torch
import infinicore
from dataclasses import dataclass, field
def to_torch_dtype(infini_dtype):
"""Convert infinicore data type to PyTorch data type"""
......@@ -60,3 +60,37 @@ def to_infinicore_dtype(torch_dtype):
return infinicore.complex128
else:
raise ValueError(f"Unsupported torch dtype: {torch_dtype}")
@dataclass
class TestTiming:
"""Stores performance testing timing metrics."""
torch_host: float = 0.0
torch_device: float = 0.0
infini_host: float = 0.0
infini_device: float = 0.0
operators_tested: int = 0
@dataclass
class SingleTestResult:
"""Stores the execution results of a single test file."""
name: str
success: bool = False
return_code: int = -1
error_message: str = ""
stdout: str = ""
stderr: str = ""
timing: TestTiming = field(default_factory=TestTiming)
@property
def status_icon(self):
if self.return_code == 0: return "✅"
if self.return_code == -2: return "⏭️"
if self.return_code == -3: return "⚠️"
return "❌"
@property
def status_text(self):
if self.return_code == 0: return "PASSED"
if self.return_code == -2: return "SKIPPED"
if self.return_code == -3: return "PARTIAL"
return "FAILED"
import sys
import importlib.util
from io import StringIO
from contextlib import contextmanager
from .datatypes import SingleTestResult, TestTiming
@contextmanager
def capture_output():
"""Context manager: captures stdout and stderr."""
new_out, new_err = StringIO(), StringIO()
old_out, old_err = sys.stdout, sys.stderr
try:
sys.stdout, sys.stderr = new_out, new_err
yield new_out, new_err
finally:
sys.stdout, sys.stderr = old_out, old_err
class SingleTestExecutor:
def run(self, file_path) -> SingleTestResult:
result = SingleTestResult(name=file_path.stem)
try:
# 1. Dynamically import the module
module = self._import_module(file_path)
# 2. Look for TestRunner
if not hasattr(module, "GenericTestRunner"):
raise ImportError("No GenericTestRunner found in module")
# 3. Look for TestClass (subclass of BaseOperatorTest)
test_class = self._find_test_class(module)
if not test_class:
raise ImportError("No BaseOperatorTest subclass found")
test_instance = test_class()
runner_class = module.GenericTestRunner
runner = runner_class(test_instance.__class__)
# 4. Execute and capture output
with capture_output() as (out, err):
success, internal_runner = runner.run()
# 5. Populate results
result.success = success
result.stdout = out.getvalue()
result.stderr = err.getvalue()
# Extract detailed results from internal_runner
test_results = internal_runner.get_test_results() if internal_runner else []
self._analyze_return_code(result, test_results)
self._extract_timing(result, test_results)
except Exception as e:
result.success = False
result.error_message = str(e)
result.stderr += f"\nExecutor Error: {str(e)}"
result.return_code = -1
return result
def _import_module(self, path):
module_name = f"op_test_{path.stem}"
spec = importlib.util.spec_from_file_location(module_name, path)
if not spec or not spec.loader:
raise ImportError(f"Could not load spec from {path}")
module = importlib.util.module_from_spec(spec)
sys.modules[module_name] = module
spec.loader.exec_module(module)
return module
def _find_test_class(self, module):
for attr_name in dir(module):
attr = getattr(module, attr_name)
if isinstance(attr, type) and hasattr(attr, "__bases__"):
# Simple check for base class name
if any("BaseOperatorTest" in str(b) for b in attr.__bases__):
return attr
return None
def _analyze_return_code(self, result, test_results):
# Logic consistent with original code: determine if all passed, partially passed, or skipped
if not result.success:
result.return_code = -1
return
codes = [r.return_code for r in test_results]
if -1 in codes: result.return_code = -1
elif -3 in codes: result.return_code = -3
elif -2 in codes: result.return_code = -2
else: result.return_code = 0
def _extract_timing(self, result, test_results):
# Accumulate timing
t = result.timing
t.torch_host = sum(r.torch_host_time for r in test_results)
t.torch_device = sum(r.torch_device_time for r in test_results)
t.infini_host = sum(r.infini_host_time for r in test_results)
t.infini_device = sum(r.infini_device_time for r in test_results)
from pathlib import Path
class TestDiscoverer:
def __init__(self, ops_dir_path=None):
self.ops_dir = self._resolve_dir(ops_dir_path)
def _resolve_dir(self, path):
if path:
p = Path(path)
if p.exists(): return p
# Default fallback logic: 'ops' directory under the parent of the current file's parent.
# Note: Since this file is in 'framework/', we look at parent.parent.
# It is recommended to pass an explicit path in run.py.
fallback = Path(__file__).parent.parent / "ops"
return fallback if fallback.exists() else None
def get_available_operators(self):
"""Returns a list of names of all available operators."""
if not self.ops_dir: return []
files = self.scan()
return sorted([f.stem for f in files])
def scan(self, specific_ops=None):
"""Scans and returns a list of Path objects that meet the criteria."""
if not self.ops_dir or not self.ops_dir.exists():
return []
# 1. Find all .py files
files = list(self.ops_dir.glob("*.py"))
# 2. Filter out non-test files (via content check)
valid_files = []
for f in files:
if f.name.startswith("_") or f.name == "run.py":
continue
if self._is_operator_test(f):
valid_files.append(f)
# 3. If specific operators are specified, filter them
if specific_ops:
return [f for f in valid_files if f.stem in specific_ops]
return valid_files
def _is_operator_test(self, file_path):
"""Checks if the file content contains operator test characteristics."""
try:
with open(file_path, "r", encoding="utf-8") as f:
content = f.read()
return "infinicore" in content and (
"BaseOperatorTest" in content or "GenericTestRunner" in content
)
except:
return False
......@@ -61,35 +61,61 @@ class TestReporter:
# --- B. Build Kwargs ---
display_kwargs = {}
# B1. Process existing kwargs
for k, v in tc.kwargs.items():
# Handle Inplace: "out": index -> "out": "input_name"
# 1. Handle Inplace output index: "out": 0 -> "out": "in_0" / "a_spec"
if k == "out" and isinstance(v, int):
if 0 <= v < len(tc.inputs):
display_kwargs[k] = tc.inputs[v].name
# Prioritize the input's name; otherwise, default to index-based name
display_kwargs[k] = getattr(tc.inputs[v], "name", None) or f"in_{v}"
else:
display_kwargs[k] = f"Invalid_Index_{v}"
# 2. Handle TensorSpec objects
elif isinstance(v, TensorSpec):
spec_dict = TestReporter._spec_to_dict(v)
# If the object has a name, explicitly overwrite it; otherwise, keep original
if getattr(v, "name", None):
spec_dict["name"] = v.name
display_kwargs[k] = spec_dict
# 3. Direct assignment for other types
else:
display_kwargs[k] = (TestReporter._spec_to_dict(v) if isinstance(v, TensorSpec) else v)
display_kwargs[k] = v
# B2. Inject Outputs into Kwargs
if hasattr(tc, "output_specs") and tc.output_specs:
# --- B2. Inject Outputs ---
# Handle output list (output_specs)
if getattr(tc, "output_specs", None):
for i, spec in enumerate(tc.output_specs):
display_kwargs[f"out_{i}"] = TestReporter._spec_to_dict(spec)
elif tc.output_spec:
if "out" not in display_kwargs:
display_kwargs["out"] = TestReporter._spec_to_dict(tc.output_spec)
out_dict = TestReporter._spec_to_dict(spec)
# Prioritize intrinsic name; otherwise, default to "out_i"
out_dict["name"] = getattr(spec, "name", None) or f"out_{i}"
display_kwargs[f"out_{i}"] = out_dict
# Handle single output (output_spec), preventing overwrite of existing "out"
elif tc.output_spec and "out" not in display_kwargs:
out_dict = TestReporter._spec_to_dict(tc.output_spec)
# Prioritize intrinsic name; otherwise, default to "out" (fixes null issue)
out_dict["name"] = getattr(tc.output_spec, "name", "out")
display_kwargs["out"] = out_dict
# --- C. Build Inputs ---
# Iterate inputs: prioritize original name, fallback to "in_i"
processed_inputs = []
for i, inp in enumerate(tc.inputs):
inp_dict = TestReporter._spec_to_dict(inp)
# Simplified logic: Use "name" attribute if present and non-empty, else use f"in_{i}"
inp_dict["name"] = getattr(inp, "name", None) or f"in_{i}"
processed_inputs.append(inp_dict)
# --- C. Build Test Case Dictionary ---
case_data = {
"description": tc.description,
"inputs": [TestReporter._spec_to_dict(i) for i in tc.inputs],
"inputs": processed_inputs,
"kwargs": display_kwargs,
"comparison_target": tc.comparison_target,
"tolerance": tc.tolerance,
}
# --- D. Inject Result ---
if res:
case_data["result"] = TestReporter._fmt_result(res)
......@@ -117,7 +143,7 @@ class TestReporter:
indent_12 = ' ' * 12
indent_16 = ' ' * 16
indent_20 = ' ' * 20
print(f"💾 Saving to: {final_path}")
try:
with open(final_path, "w", encoding="utf-8") as f:
......@@ -125,8 +151,8 @@ class TestReporter:
for i, entry in enumerate(total_results):
f.write(f"{indent_4}{{\n")
keys = list(entry.keys())
keys = list(entry.keys())
for j, key in enumerate(keys):
val = entry[key]
comma = "," if j < len(keys) - 1 else ""
......@@ -204,7 +230,109 @@ class TestReporter:
import traceback; traceback.print_exc()
print(f" ❌ Save failed: {e}")
@staticmethod
def print_header(ops_dir, count):
print(f"InfiniCore Operator Test Runner")
print(f"Directory: {ops_dir}")
print(f"Tests found: {count}\n")
@staticmethod
def print_live_result(result, verbose=False):
"""Print single-line result in real-time."""
print(f"{result.status_icon} {result.name}: {result.status_text} (code: {result.return_code})")
if result.stdout:
print(result.stdout.rstrip())
if result.stderr:
print("\nSTDERR:", result.stderr.rstrip())
if result.error_message:
print(f"💥 Error: {result.error_message}")
if result.stdout or result.stderr or verbose:
print("-" * 40)
@staticmethod
def print_summary(results, cumulative_timing, total_expected=0):
"""Prints the final comprehensive test summary and statistics, ensuring consistency with original output."""
print(f"\n{'='*80}\nCUMULATIVE TEST SUMMARY\n{'='*80}")
passed = [r for r in results if r.return_code == 0]
failed = [r for r in results if r.return_code == -1]
skipped = [r for r in results if r.return_code == -2]
partial = [r for r in results if r.return_code == -3]
total = len(results)
print(f"Total tests run: {total}")
print(f"Passed: {len(passed)}")
print(f"Failed: {len(failed)}")
if skipped: print(f"Skipped: {len(skipped)}")
if partial: print(f"Partial: {len(partial)}")
# 1. Print Benchmark data
if cumulative_timing:
# Assuming bench_mode is "both" for simplicity in this file, or passed via a config
# We call the modified _print_timing to handle the display logic.
TestReporter._print_timing(cumulative_timing, bench_mode="both")
# 2. Restore PASSED OPERATORS list
if passed:
print(f"\n✅ PASSED OPERATORS ({len(passed)}):")
# Print operators, grouped (assuming 10 per line as per the old pattern)
operators = [r.name for r in passed]
for i in range(0, len(operators), 10):
print(" " + ", ".join(operators[i : i + 10]))
else:
print(f"\n✅ PASSED OPERATORS: None")
# 3. Restore Success Rate
if total > 0:
# Calculate success rate based on actually executed tests (excluding skipped)
executed_tests = total - len(skipped)
if executed_tests > 0:
success_rate = len(passed) / executed_tests * 100
print(f"\nSuccess rate: {success_rate:.1f}%")
if not failed:
print(f"\n🎉 All tests passed!")
else:
print(f"\n{len(failed)} tests failed")
return len(failed) == 0
# --- Internal Helpers ---
@staticmethod
def _print_timing(t, bench_mode="both"):
"""Prints detailed timing breakdown for host and device, based on bench_mode."""
print(f"{'-'*40}")
# Restore Operators Tested field using the new dataclass field
if hasattr(t, 'operators_tested'):
print(f"BENCHMARK SUMMARY:")
print(f" Operators Tested: {t.operators_tested}")
# Restore detailed Host/Device distinction
if bench_mode in ["host", "both"]:
print(
f" PyTorch Host Total Time: {t.torch_host:12.3f} ms"
)
print(
f" InfiniCore Host Total Time: {t.infini_host:12.3f} ms"
)
if bench_mode in ["device", "both"]:
print(
f" PyTorch Device Total Time: {t.torch_device:12.3f} ms"
)
print(
f" InfiniCore Device Total Time: {t.infini_device:12.3f} ms"
)
print(f"{'-'*40}")
@staticmethod
def _write_smart_field(f, key, value, indent, sub_indent, close_comma=""):
"""
......
import os
import sys
import argparse
import traceback
from pathlib import Path
import importlib.util
# Import components from the unified framework package
from framework.loader import TestDiscoverer
from framework.executor import SingleTestExecutor
from framework.reporter import TestReporter
from framework.datatypes import TestTiming
from framework import get_hardware_args_group, add_common_test_args
def find_ops_directory(location=None):
"""
Find the ops directory by searching from location upwards.
Args:
location: Starting directory for search (default: current file's parent)
Returns:
Path: Path to ops directory or None if not found
"""
if location is None:
location = Path(__file__).parent / "ops"
ops_dir = location.resolve()
if ops_dir.exists() and any(ops_dir.glob("*.py")):
return ops_dir
return None
def get_available_operators(ops_dir):
"""
Get list of available operators from ops directory.
Args:
ops_dir: Path to ops directory
Returns:
List of operator names
"""
if not ops_dir or not ops_dir.exists():
return []
test_files = list(ops_dir.glob("*.py"))
current_script = Path(__file__).name
test_files = [f for f in test_files if f.name != current_script]
operators = []
for test_file in test_files:
try:
with open(test_file, "r", encoding="utf-8") as f:
content = f.read()
if "infinicore" in content and (
"BaseOperatorTest" in content or "GenericTestRunner" in content
):
operators.append(test_file.stem)
except:
continue
return sorted(operators)
def import_operator_test(test_file_path):
"""
Import an operator test module and return the test class instance.
Args:
test_file_path: Path to the test file
Returns:
tuple: (success, test_instance_or_error)
"""
try:
# Create a unique module name
module_name = f"op_test_{test_file_path.stem}"
# Load the module from file
spec = importlib.util.spec_from_file_location(module_name, test_file_path)
if spec is None or spec.loader is None:
return False, f"Could not load module from {test_file_path}"
module = importlib.util.module_from_spec(spec)
# Add the module to sys.modules
sys.modules[module_name] = module
# Execute the module
spec.loader.exec_module(module)
# Find the test class (usually named OpTest)
test_class = None
for attr_name in dir(module):
attr = getattr(module, attr_name)
if (
isinstance(attr, type)
and hasattr(attr, "__bases__")
and any("BaseOperatorTest" in str(base) for base in attr.__bases__)
):
test_class = attr
break
if test_class is None:
return False, f"No test class found in {test_file_path}"
# Create an instance
test_instance = test_class()
return True, test_instance
except Exception as e:
return False, f"Error importing {test_file_path}: {str(e)}"
def run_all_op_tests(
ops_dir=None,
specific_ops=None,
bench=False,
bench_mode="both",
verbose=False,
debug=False,
):
"""
Run all operator test scripts in the ops directory using direct import.
Args:
ops_dir (str, optional): Path to the ops directory. If None, uses auto-detection.
specific_ops (list, optional): List of specific operator names to test.
bench (bool): Whether benchmarking is enabled
bench_mode (str): Benchmark mode - "host", "device", or "both"
verbose (bool): Whether verbose mode is enabled
Returns:
dict: Results dictionary with test names as keys and (success, test_runner, stdout, stderr) as values.
"""
if ops_dir is None:
ops_dir = find_ops_directory()
else:
ops_dir = Path(ops_dir)
if not ops_dir or not ops_dir.exists():
print(f"Error: Ops directory '{ops_dir}' does not exist.")
return {}
print(f"Looking for test files in: {ops_dir}")
# Find all Python test files
test_files = list(ops_dir.glob("*.py"))
# Filter out this script itself and non-operator test files
current_script = Path(__file__).name
test_files = [f for f in test_files if f.name != current_script]
# Filter to include only files that look like operator tests
operator_test_files = []
for test_file in test_files:
try:
with open(test_file, "r", encoding="utf-8") as f:
content = f.read()
# Look for characteristic patterns of operator tests
if "infinicore" in content and (
"BaseOperatorTest" in content or "GenericTestRunner" in content
):
operator_test_files.append(test_file)
except Exception as e:
continue
# Filter for specific operators if requested
if specific_ops:
filtered_files = []
for test_file in operator_test_files:
test_name = test_file.stem.lower()
if any(op.lower() == test_name for op in specific_ops):
filtered_files.append(test_file)
operator_test_files = filtered_files
if not operator_test_files:
print(f"No operator test files found in {ops_dir}")
print(f"Available Python files: {[f.name for f in test_files]}")
return {}
print(f"Found {len(operator_test_files)} operator test files:")
for test_file in operator_test_files:
print(f" - {test_file.name}")
results = {}
cumulative_timing = {
"total_torch_host_time": 0.0,
"total_torch_device_time": 0.0,
"total_infinicore_host_time": 0.0,
"total_infinicore_device_time": 0.0,
"operators_tested": 0,
}
for test_file in operator_test_files:
test_name = test_file.stem
try:
# Import and run the test directly
success, test_instance_or_error = import_operator_test(test_file)
if not success:
print(f"💥 {test_name}: ERROR - {test_instance_or_error}")
results[test_name] = {
"success": False,
"return_code": -1,
"torch_host_time": 0.0,
"torch_device_time": 0.0,
"infini_host_time": 0.0,
"infini_device_time": 0.0,
"error_message": test_instance_or_error,
"test_runner": None,
"stdout": "",
"stderr": test_instance_or_error,
}
continue
# Get the test runner class from the module
test_module = sys.modules[f"op_test_{test_file.stem}"]
if not hasattr(test_module, "GenericTestRunner"):
print(f"💥 {test_name}: ERROR - No GenericTestRunner found")
results[test_name] = {
"success": False,
"return_code": -1,
"torch_host_time": 0.0,
"torch_device_time": 0.0,
"infini_host_time": 0.0,
"infini_device_time": 0.0,
"error_message": "No GenericTestRunner found",
"test_runner": None,
"stdout": "",
"stderr": "No GenericTestRunner found",
}
continue
# Create and run the test runner
test_runner_class = test_module.GenericTestRunner
runner_instance = test_runner_class(test_instance_or_error.__class__)
# Temporarily redirect stdout to capture output
from io import StringIO
stdout_capture = StringIO()
stderr_capture = StringIO()
old_stdout = sys.stdout
old_stderr = sys.stderr
sys.stdout = stdout_capture
sys.stderr = stderr_capture
try:
# Run the test
test_success, test_runner = runner_instance.run()
# Get captured output
stdout_output = stdout_capture.getvalue()
stderr_output = stderr_capture.getvalue()
# Restore stdout/stderr
sys.stdout = old_stdout
sys.stderr = old_stderr
# Print the captured output
if stdout_output:
print(stdout_output.rstrip())
if stderr_output:
print("\nSTDERR:")
print(stderr_output.rstrip())
# Analyze test results
test_results = test_runner.get_test_results() if test_runner else []
# Determine overall test status
if test_success:
return_code = 0
status_icon = "✅"
status_text = "PASSED"
else:
# Check if there are any failed tests
has_failures = any(
result.return_code == -1 for result in test_results
)
has_partial = any(
result.return_code == -3 for result in test_results
)
has_skipped = any(
result.return_code == -2 for result in test_results
)
if has_failures:
return_code = -1
status_icon = "❌"
status_text = "FAILED"
elif has_partial:
return_code = -3
status_icon = "⚠️"
status_text = "PARTIAL"
elif has_skipped:
return_code = -2
status_icon = "⏭️"
status_text = "SKIPPED"
else:
return_code = -1
status_icon = "❌"
status_text = "FAILED"
# Calculate timing for all four metrics
torch_host_time = sum(result.torch_host_time for result in test_results)
torch_device_time = sum(
result.torch_device_time for result in test_results
)
infini_host_time = sum(
result.infini_host_time for result in test_results
)
infini_device_time = sum(
result.infini_device_time for result in test_results
)
results[test_name] = {
"success": test_success,
"return_code": return_code,
"torch_host_time": torch_host_time,
"torch_device_time": torch_device_time,
"infini_host_time": infini_host_time,
"infini_device_time": infini_device_time,
"error_message": "",
"test_runner": test_runner,
"stdout": stdout_output,
"stderr": stderr_output,
}
print(
f"{status_icon} {test_name}: {status_text} (return code: {return_code})"
)
# Extract benchmark timing if in bench mode
if bench and test_success and return_code == 0:
cumulative_timing["total_torch_host_time"] += torch_host_time
cumulative_timing["total_torch_device_time"] += torch_device_time
cumulative_timing["total_infinicore_host_time"] += infini_host_time
cumulative_timing[
"total_infinicore_device_time"
] += infini_device_time
cumulative_timing["operators_tested"] += 1
except Exception as e:
# Restore stdout/stderr in case of exception
sys.stdout = old_stdout
sys.stderr = old_stderr
raise e
# In verbose mode, stop execution on first failure
if verbose and not test_success and return_code != 0:
break
except Exception as e:
print(f"💥 {test_name}: ERROR - {str(e)}")
results[test_name] = {
"success": False,
"return_code": -1,
"torch_host_time": 0.0,
"torch_device_time": 0.0,
"infini_host_time": 0.0,
"infini_device_time": 0.0,
"error_message": str(e),
"test_runner": None,
"stdout": "",
"stderr": str(e),
}
# In verbose mode, stop execution on any exception
if verbose:
print(f"\n{'!'*60}")
print(
f"VERBOSE MODE: Stopping execution due to exception in {test_name}"
)
print(f"{'!'*60}")
break
if debug:
traceback.print_exc()
break
return results, cumulative_timing
def print_summary(
results,
verbose=False,
total_expected_tests=0,
cumulative_timing=None,
bench_mode="both",
):
"""Print a comprehensive summary of test results including benchmark data."""
print(f"\n{'='*80}")
print("CUMULATIVE TEST SUMMARY")
print(f"{'='*80}")
if not results:
print("No tests were run.")
return False
# Count different types of results
passed = 0
failed = 0
skipped = 0
partial = 0
passed_operators = [] # Store passed operator names
failed_operators = [] # Store failed operator names
skipped_operators = [] # Store skipped operator names
partial_operators = [] # Store partial operator names
for test_name, result_data in results.items():
return_code = result_data["return_code"]
if return_code == 0:
passed += 1
passed_operators.append(test_name)
elif return_code == -2: # Special code for skipped tests
skipped += 1
skipped_operators.append(test_name)
elif return_code == -3: # Special code for partial tests
partial += 1
partial_operators.append(test_name)
else:
failed += 1
failed_operators.append(test_name)
total = len(results)
print(f"Total tests run: {total}")
if total_expected_tests > 0 and total < total_expected_tests:
print(f"Total tests expected: {total_expected_tests}")
print(f"Tests not executed: {total_expected_tests - total}")
print(f"Passed: {passed}")
print(f"Failed: {failed}")
if skipped > 0:
print(f"Skipped: {skipped}")
if partial > 0:
print(f"Partial: {partial}")
# Print benchmark summary if cumulative_timing data is available
if cumulative_timing and cumulative_timing["operators_tested"] > 0:
print(f"{'-'*40}")
print("BENCHMARK SUMMARY:")
print(f" Operators Tested: {cumulative_timing['operators_tested']}")
# Display timing based on bench_mode
if bench_mode in ["host", "both"]:
print(
f" PyTorch Host Total Time: {cumulative_timing['total_torch_host_time']:12.3f} ms"
)
print(
f" InfiniCore Host Total Time: {cumulative_timing['total_infinicore_host_time']:12.3f} ms"
)
if bench_mode in ["device", "both"]:
print(
f" PyTorch Device Total Time: {cumulative_timing['total_torch_device_time']:12.3f} ms"
)
print(
f" InfiniCore Device Total Time: {cumulative_timing['total_infinicore_device_time']:12.3f} ms"
)
print(f"{'-'*40}")
# Display passed operators
if passed_operators:
print(f"\n✅ PASSED OPERATORS ({len(passed_operators)}):")
# Display operators in groups of 10 per line
for i in range(0, len(passed_operators), 10):
line_ops = passed_operators[i : i + 10]
print(" " + ", ".join(line_ops))
else:
print(f"\n✅ PASSED OPERATORS: None")
# Display failed operators (if any)
if failed_operators:
print(f"\n❌ FAILED OPERATORS ({len(failed_operators)}):")
for i in range(0, len(failed_operators), 10):
line_ops = failed_operators[i : i + 10]
print(" " + ", ".join(line_ops))
# Display skipped operators (if any)
if skipped_operators:
print(f"\n⏭️ SKIPPED OPERATORS ({len(skipped_operators)}):")
for i in range(0, len(skipped_operators), 10):
line_ops = skipped_operators[i : i + 10]
print(" " + ", ".join(line_ops))
# Display partial operators (if any)
if partial_operators:
print(f"\n⚠️ PARTIAL OPERATORS ({len(partial_operators)}):")
for i in range(0, len(partial_operators), 10):
line_ops = partial_operators[i : i + 10]
print(" " + ", ".join(line_ops))
if total > 0:
# Calculate success rate based on actual executed tests
executed_tests = passed + failed + partial
if executed_tests > 0:
success_rate = passed / executed_tests * 100
print(f"\nSuccess rate: {success_rate:.1f}%")
if verbose and total < total_expected_tests:
print(f"\n💡 Verbose mode: Execution stopped after first failure")
print(f" {total_expected_tests - total} tests were not executed")
if failed == 0:
if skipped > 0 or partial > 0:
print(f"\n⚠️ Tests completed with some operators not implemented")
print(f" - {skipped} tests skipped (both operators not implemented)")
print(f" - {partial} tests partial (one operator not implemented)")
else:
print(f"\n🎉 All tests passed!")
return True
else:
print(f"\n{failed} tests failed")
return False
def list_available_tests(ops_dir=None):
"""List all available operator test files."""
if ops_dir is None:
ops_dir = find_ops_directory()
else:
ops_dir = Path(ops_dir)
if not ops_dir or not ops_dir.exists():
print(f"Error: Ops directory '{ops_dir}' does not exist.")
return
operators = get_available_operators(ops_dir)
if operators:
print(f"Available operator test files in {ops_dir}:")
for operator in operators:
print(f" - {operator}")
print(f"\nTotal: {len(operators)} operators")
else:
print(f"No operator test files found in {ops_dir}")
# Show available Python files for debugging
test_files = list(ops_dir.glob("*.py"))
current_script = Path(__file__).name
test_files = [f for f in test_files if f.name != current_script]
if test_files:
print(f"Available Python files: {[f.name for f in test_files]}")
def generate_help_epilog(ops_dir):
"""
Generate dynamic help epilog with available operators and hardware platforms.
Args:
ops_dir: Path to ops directory
Returns:
str: Formatted help text
"""
# Get available operators
operators = get_available_operators(ops_dir)
# Build epilog text
epilog_parts = []
# Examples section
epilog_parts.append("Examples:")
epilog_parts.append(" # Run all operator tests on CPU")
epilog_parts.append(" python run.py --cpu")
epilog_parts.append("")
epilog_parts.append(" # Run specific operators")
epilog_parts.append(" python run.py --ops add matmul --nvidia")
epilog_parts.append("")
epilog_parts.append(" # Run with debug mode on multiple devices")
epilog_parts.append(" python run.py --cpu --nvidia --debug")
epilog_parts.append("")
epilog_parts.append(
" # Run with verbose mode to stop on first error with full traceback"
)
epilog_parts.append(" python run.py --cpu --nvidia --verbose")
epilog_parts.append("")
epilog_parts.append(" # Run with benchmarking (both host and device timing)")
epilog_parts.append(" python run.py --cpu --bench")
epilog_parts.append("")
epilog_parts.append(" # Run with host timing only")
epilog_parts.append(" python run.py --nvidia --bench host")
epilog_parts.append("")
epilog_parts.append(" # Run with device timing only")
epilog_parts.append(" python run.py --nvidia --bench device")
epilog_parts.append("")
epilog_parts.append(" # List available tests without running")
epilog_parts.append(" python run.py --list")
epilog_parts.append("")
# Available operators section
if operators:
epilog_parts.append("Available Operators:")
# Group operators for better display
operators_per_line = 4
for i in range(0, len(operators), operators_per_line):
line_ops = operators[i : i + operators_per_line]
epilog_parts.append(f" {', '.join(line_ops)}")
epilog_parts.append("")
else:
epilog_parts.append("Available Operators: (none detected)")
epilog_parts.append("")
# Additional notes
epilog_parts.append("Note:")
epilog_parts.append(
" - Use '--' to pass additional arguments to individual test scripts"
)
epilog_parts.append(
" - Operators are automatically discovered from the ops directory"
)
epilog_parts.append(
" - --bench mode now shows cumulative timing across all operators"
)
epilog_parts.append(
" - --bench host/device/both controls host/device timing measurement"
)
epilog_parts.append(
" - --verbose mode stops execution on first error and shows full traceback"
)
return "\n".join(epilog_parts)
def main():
"""Main entry point with comprehensive command line argument parsing."""
# First, find ops directory for dynamic help generation
ops_dir = find_ops_directory()
parser = argparse.ArgumentParser(
description="Run InfiniCore operator tests across multiple hardware platforms",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=generate_help_epilog(ops_dir),
)
# Core options
parser.add_argument(
"--ops-dir", type=str, help="Path to the ops directory (default: auto-detect)"
)
parser.add_argument(
"--ops", nargs="+", help="Run specific operators only (e.g., --ops add matmul)"
)
parser.add_argument(
"--list",
action="store_true",
help="List all available test files without running them",
)
"""Main entry point for the InfiniCore Operator Test Runner."""
parser = argparse.ArgumentParser(description="Run InfiniCore operator tests across multiple hardware platforms")
parser.add_argument("--ops-dir", type=str, help="Path to the ops directory (default: auto-detect)")
parser.add_argument("--ops", nargs="+", help="Run specific operators only (e.g., --ops add matmul)")
parser.add_argument("--list", action="store_true", help="List all available test files without running them")
# Call common method to add shared arguments (bench, debug, verbose, save...)
# Add common test arguments (including --save, --bench, etc.)
add_common_test_args(parser)
get_hardware_args_group(parser)
args, _ = parser.parse_known_args()
# Parse known args first, leave the rest for the test scripts
args, unknown_args = parser.parse_known_args()
# Handle list command
# 1. Discovery
discoverer = TestDiscoverer(args.ops_dir)
if args.list:
list_available_tests(args.ops_dir)
print("Available operators:", discoverer.get_available_operators())
return
# Auto-detect ops directory if not provided
if args.ops_dir is None:
ops_dir = find_ops_directory()
if not ops_dir:
print(
"Error: Could not auto-detect ops directory. Please specify with --ops-dir"
)
sys.exit(1)
else:
ops_dir = Path(args.ops_dir)
if not ops_dir.exists():
print(f"Error: Ops directory '{ops_dir}' does not exist.")
sys.exit(1)
# Show what extra arguments will be passed
if unknown_args:
print(f"Passing extra arguments to test scripts: {unknown_args}")
test_files = discoverer.scan(args.ops)
if not test_files:
print("No tests found.")
sys.exit(0)
# Get available operators for display
available_operators = get_available_operators(ops_dir)
print(f"InfiniCore Operator Test Runner")
print(f"Operating directory: {ops_dir}")
print(f"Available operators: {len(available_operators)}")
if args.verbose:
print(f"Verbose mode: ENABLED (will stop on first error with full traceback)")
if args.bench:
bench_mode = args.bench if args.bench != "both" else "both"
print(f"Benchmark mode: {bench_mode.upper()} timing")
if args.ops:
# Validate requested operators
valid_ops = []
invalid_ops = []
for op in args.ops:
if op in available_operators:
valid_ops.append(op)
else:
invalid_ops.append(op)
if invalid_ops:
print(f"Warning: Unknown operators: {', '.join(invalid_ops)}")
print(f"Available operators: {', '.join(available_operators)}")
if valid_ops:
print(f"Testing operators: {', '.join(valid_ops)}")
total_expected_tests = len(valid_ops)
else:
print("No valid operators specified. Running all available tests.")
total_expected_tests = len(available_operators)
else:
print("Testing all available operators")
total_expected_tests = len(available_operators)
print()
# Run all tests
results, cumulative_timing = run_all_op_tests(
ops_dir=ops_dir,
specific_ops=args.ops,
bench=bool(args.bench),
bench_mode=args.bench if args.bench else "both",
verbose=args.verbose,
debug=args.debug,
)
# Print summary and exit with appropriate code
all_passed = print_summary(
results,
args.verbose,
total_expected_tests,
cumulative_timing,
bench_mode=args.bench if args.bench else "both",
)
# Check if there were any tests with missing implementations
has_missing_implementations = any(
result_data["return_code"] in [-2, -3] for result_data in results.values()
# 2. Preparation
executor = SingleTestExecutor()
cumulative_timing = TestTiming()
results = []
TestReporter.print_header(discoverer.ops_dir, len(test_files))
# 3. Execution Loop
for f in test_files:
result = executor.run(f)
results.append(result)
# Real-time reporting and printing of stdout
TestReporter.print_live_result(result, verbose=args.verbose)
# Accumulate timing
if result.success:
cumulative_timing.torch_host += result.timing.torch_host
cumulative_timing.infini_host += result.timing.infini_host
cumulative_timing.torch_device += result.timing.torch_device
cumulative_timing.infini_device += result.timing.infini_device
# Fail fast in verbose mode
if args.verbose and not result.success:
print("\nStopping due to failure in verbose mode.")
break
# 4. Final Report & Save
all_passed = TestReporter.print_summary(
results,
cumulative_timing if args.bench else None,
total_expected=len(test_files)
)
if all_passed and has_missing_implementations:
print(f"\n⚠️ Note: Some operators are not fully implemented")
print(f" Run individual tests for details on missing implementations")
if args.verbose and not all_passed:
print(
f"\n💡 Verbose mode tip: Use individual test commands for detailed debugging:"
)
failed_ops = [
name
for name, result_data in results.items()
if result_data["return_code"] == -1
]
for op in failed_ops[:3]: # Show first 3 failed operators
print(f" python {ops_dir / (op + '.py')} --verbose")
sys.exit(0 if all_passed else 1)
if __name__ == "__main__":
main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment