issue/594 - cumulating total time in tests

5c88cbbd · wooway777 · d2afb65b · 5c88cbbd · 5c88cbbd · 5c88cbbd
Commit 5c88cbbd authored Nov 13, 2025 by wooway777
4 changed files
--- a/test/infinicore/framework/base.py
+++ b/test/infinicore/framework/base.py
 import torch
 import infinicore
-import traceback  # Add import for traceback
+import traceback
 from abc import ABC, abstractmethod
 from typing import List, Dict, Any, Optional
@@ -12,8 +12,6 @@ from .utils import (
    create_test_comparator,
    infinicore_tensor_from_torch,
    profile_operation,
-    synchronize_device,
-    convert_infinicore_to_torch,
 )
@@ -244,6 +242,12 @@ class TestRunner:
        self.passed_tests = (
            []
        )  # Track passed tests (both operators implemented and passed)
+        # Add benchmark timing statistics
+        self.benchmark_times = {
+            "torch_total": 0.0,
+            "infinicore_total": 0.0,
+            "per_test_case": {},  # Store timing per test case
+        }
    def run_tests(self, devices, test_func, test_type="Test"):
        """
@@ -344,9 +348,35 @@ class TestRunner:
            else:
                print(f"\n\033[92mAll tests passed!\033[0m")
+        # Print benchmark summary if benchmarking was enabled
+        if self.config.bench and (
+            self.benchmark_times["torch_total"] > 0
+            or self.benchmark_times["infinicore_total"] > 0
+        ):
+            self._print_benchmark_summary()
        print(f"{'='*60}")
        return result
+    def _print_benchmark_summary(self):
+        """Print benchmark timing summary"""
+        print(f"{'-'*60}")
+        print("BENCHMARK SUMMARY")
+        torch_total = self.benchmark_times["torch_total"]
+        infinicore_total = self.benchmark_times["infinicore_total"]
+        if torch_total > 0:
+            print(f"PyTorch Total Time: {torch_total * 1000:.3f} ms")
+        if infinicore_total > 0:
+            print(f"InfiniCore Total Time: {infinicore_total * 1000:.3f} ms")
+        if torch_total > 0 and infinicore_total > 0:
+            speedup = (
+                torch_total / infinicore_total if infinicore_total > 0 else float("inf")
+            )
+            print(f"Speedup (PyTorch/InfiniCore): {speedup:.2f}x")
 class BaseOperatorTest(ABC):
    """Base operator test"""
@@ -711,8 +741,13 @@ class BaseOperatorTest(ABC):
        comparison_target,
    ):
        """
-        Unified benchmarking logic
+        Unified benchmarking logic with timing accumulation
        """
+        # Initialize timing variables
+        torch_time = 0.0
+        infini_time = 0.0
        if torch_implemented:
            if output_count > 1:
                # For multiple outputs, just call the operator
@@ -735,12 +770,13 @@ class BaseOperatorTest(ABC):
                            else inputs[comparison_target]
                        )
-            profile_operation(
+            torch_time = profile_operation(
                "PyTorch   ",
                torch_op,
                device_str,
                config.num_prerun,
                config.num_iterations,
+                total=True,
            )
        if infini_implemented:
@@ -759,10 +795,17 @@ class BaseOperatorTest(ABC):
                        else infini_inputs[comparison_target]
                    )
-            profile_operation(
+            infini_time = profile_operation(
                "InfiniCore",
                infini_op,
                device_str,
                config.num_prerun,
                config.num_iterations,
+                total=True,
            )
+        # Store timing information in the test runner
+        if hasattr(config, "_test_runner") and config._test_runner:
+            # Accumulate total times
+            config._test_runner.benchmark_times["torch_total"] += torch_time
+            config._test_runner.benchmark_times["infinicore_total"] += infini_time
--- a/test/infinicore/framework/runner.py
+++ b/test/infinicore/framework/runner.py
@@ -32,6 +32,10 @@ class GenericTestRunner:
        )
        runner = TestRunner(self.operator_test.test_cases, config)
+        # Pass the test runner instance to config for benchmark timing accumulation
+        config._test_runner = runner
        devices = get_test_devices(self.args)
        # Run unified tests - returns True if no tests failed

--- a/test/infinicore/framework/utils.py
+++ b/test/infinicore/framework/utils.py
@@ -22,10 +22,12 @@ def timed_op(func, num_iterations, device):
    for _ in range(num_iterations):
        func()
    synchronize_device(device)
-    return (time.time() - start) / num_iterations
+    return time.time() - start
-def profile_operation(desc, func, torch_device, num_prerun, num_iterations):
+def profile_operation(
+    desc, func, torch_device, num_prerun, num_iterations, total=False
+):
    """
    Performance profiling workflow
    """
@@ -35,7 +37,11 @@ def profile_operation(desc, func, torch_device, num_prerun, num_iterations):
    # Timed execution
    elapsed = timed_op(lambda: func(), num_iterations, torch_device)
-    print(f"    {desc} time: {elapsed * 1000 :6f} ms")
+    print(f"    {desc} time: {elapsed / num_iterations * 1000 :6f} ms")
+    if total:
+        return elapsed
+    else:
+        return elapsed / num_iterations
 def debug(actual, desired, atol=0, rtol=1e-2, equal_nan=False, verbose=True):

--- a/test/infinicore/run.py
+++ b/test/infinicore/run.py
@@ -125,6 +125,14 @@ def run_all_op_tests(ops_dir=None, specific_ops=None, extra_args=None):
    # Check if verbose mode is enabled
    verbose_mode = extra_args and "--verbose" in extra_args
+    # Check if bench mode is enabled for cumulative timing
+    bench_mode = extra_args and "--bench" in extra_args
+    cumulative_timing = {
+        "total_torch_time": 0.0,
+        "total_infinicore_time": 0.0,
+        "operators_tested": 0,
+    }
    for test_file in operator_test_files:
        test_name = test_file.stem
@@ -157,7 +165,7 @@ def run_all_op_tests(ops_dir=None, specific_ops=None, extra_args=None):
                # Both operators not implemented - skipped test
                success = False  # Not a failure, but skipped
                returncode = -2  # Special code for skipped
-            elif "one operator not implemented" in stdout_lower:
+            elif "operator not implemented" in stdout_lower:
                # One operator not implemented - partial test
                success = False  # Not fully successful
                returncode = -3  # Special code for partial
@@ -202,6 +210,34 @@ def run_all_op_tests(ops_dir=None, specific_ops=None, extra_args=None):
                f"{status_icon}  {test_name}: {status_text} (return code: {returncode})"
            )
+            # Extract benchmark timing if in bench mode
+            if bench_mode and success:
+                # Look for benchmark summary in stdout
+                lines = result.stdout.split("\n")
+                torch_time = 0.0
+                infini_time = 0.0
+                for line in lines:
+                    if "PyTorch Total Time:" in line:
+                        try:
+                            # Extract time value (e.g., "PyTorch Total Time: 123.456 ms")
+                            torch_time = (
+                                float(line.split(":")[1].strip().split()[0]) / 1000.0
+                            )  # Convert to seconds
+                        except:
+                            pass
+                    elif "InfiniCore Total Time:" in line:
+                        try:
+                            infini_time = (
+                                float(line.split(":")[1].strip().split()[0]) / 1000.0
+                            )  # Convert to seconds
+                        except:
+                            pass
+                cumulative_timing["total_torch_time"] += torch_time
+                cumulative_timing["total_infinicore_time"] += infini_time
+                cumulative_timing["operators_tested"] += 1
            # In verbose mode, stop execution on first failure
            if verbose_mode and not success and returncode not in [-2, -3]:
                break
@@ -219,11 +255,13 @@ def run_all_op_tests(ops_dir=None, specific_ops=None, extra_args=None):
                print(f"{'!'*60}")
                break
-    return results
+    return results, cumulative_timing
-def print_summary(results, verbose_mode=False, total_expected_tests=0):
+def print_summary(
-    """Print a comprehensive summary of test results."""
+    results, verbose_mode=False, total_expected_tests=0, cumulative_timing=None
+):
+    """Print a comprehensive summary of test results including benchmark data."""
    print(f"\n{'='*80}")
    print("CUMULATIVE TEST SUMMARY")
    print(f"{'='*80}")
@@ -272,6 +310,19 @@ def print_summary(results, verbose_mode=False, total_expected_tests=0):
    if partial > 0:
        print(f"Partial: {partial}")
+    # Print benchmark summary if cumulative_timing data is available
+    if cumulative_timing and cumulative_timing["operators_tested"] > 0:
+        print(f"{'-'*40}")
+        print("BENCHMARK SUMMARY:")
+        print(f"  Operators Tested: {cumulative_timing['operators_tested']}")
+        print(
+            f"  Total PyTorch Time: {cumulative_timing['total_torch_time'] * 1000:.3f} ms"
+        )
+        print(
+            f"  Total InfiniCore Time: {cumulative_timing['total_infinicore_time'] * 1000:.3f} ms"
+        )
+        print(f"{'-'*40}")
    # Display passed operators
    if passed_operators:
        print(f"\n✅ PASSED OPERATORS ({len(passed_operators)}):")
@@ -304,7 +355,7 @@ def print_summary(results, verbose_mode=False, total_expected_tests=0):
            print("  " + ", ".join(line_ops))
    if total > 0:
-        # Calculate success rate based on executed tests only
+        # Calculate success rate based on actual executed tests
        executed_tests = passed + failed + partial
        if executed_tests > 0:
            success_rate = passed / executed_tests * 100
@@ -387,6 +438,9 @@ def generate_help_epilog(ops_dir):
    )
    epilog_parts.append("  python run.py --cpu --nvidia --verbose")
    epilog_parts.append("")
+    epilog_parts.append("  # Run with benchmarking to get cumulative timing")
+    epilog_parts.append("  python run.py --cpu --bench")
+    epilog_parts.append("")
    epilog_parts.append("  # List available tests without running")
    epilog_parts.append("  python run.py --list")
    epilog_parts.append("")
@@ -413,7 +467,7 @@ def generate_help_epilog(ops_dir):
        "  - Operators are automatically discovered from the ops directory"
    )
    epilog_parts.append(
-        "  - --bench option is disabled in batch mode (run individual tests for benchmarking)"
+        "  - --bench mode now shows cumulative timing across all operators"
    )
    epilog_parts.append(
        "  - --verbose mode stops execution on first error and shows full traceback"
@@ -527,14 +581,16 @@ def main():
    print()
    # Run all tests
-    results = run_all_op_tests(
+    results, cumulative_timing = run_all_op_tests(
        ops_dir=ops_dir,
        specific_ops=args.ops,
        extra_args=unknown_args,
    )
    # Print summary and exit with appropriate code
-    all_passed = print_summary(results, args.verbose, total_expected_tests)
+    all_passed = print_summary(
+        results, args.verbose, total_expected_tests, cumulative_timing
+    )
    # Check if there were any tests with missing implementations
    has_missing_implementations = any(