Unverified Commit edc36489 authored by Yanan Cao's avatar Yanan Cao Committed by GitHub
Browse files

[Kernel][Helion] Fix inductor fusion of Helion HOP (#39944)


Signed-off-by: default avatarYanan Cao <gmagogsfm@gmail.com>
Co-authored-by: default avatarClaude Opus 4.6 (1M context) <noreply@anthropic.com>
parent 9965f501
...@@ -769,7 +769,7 @@ steps: ...@@ -769,7 +769,7 @@ steps:
- tests/kernels/helion/ - tests/kernels/helion/
- vllm/platforms/rocm.py - vllm/platforms/rocm.py
commands: commands:
- pip install helion==0.3.3 - pip install helion==1.0.0
- pytest -v -s kernels/helion/ - pytest -v -s kernels/helion/
......
...@@ -155,7 +155,7 @@ steps: ...@@ -155,7 +155,7 @@ steps:
- vllm/utils/import_utils.py - vllm/utils/import_utils.py
- tests/kernels/helion/ - tests/kernels/helion/
commands: commands:
- pip install helion==0.3.3 - pip install helion==1.0.0
- pytest -v -s kernels/helion/ - pytest -v -s kernels/helion/
......
...@@ -1104,7 +1104,7 @@ setup( ...@@ -1104,7 +1104,7 @@ setup(
# NOTE: When updating helion version, also update CI files: # NOTE: When updating helion version, also update CI files:
# - .buildkite/test_areas/kernels.yaml # - .buildkite/test_areas/kernels.yaml
# - .buildkite/test-amd.yaml # - .buildkite/test-amd.yaml
"helion": ["helion==0.3.3"], "helion": ["helion==1.0.0"],
# Optional deps for gRPC server (vllm serve --grpc) # Optional deps for gRPC server (vllm serve --grpc)
"grpc": ["smg-grpc-servicer[vllm] >= 0.5.0"], "grpc": ["smg-grpc-servicer[vllm] >= 0.5.0"],
# Optional deps for OpenTelemetry tracing # Optional deps for OpenTelemetry tracing
......
...@@ -36,9 +36,11 @@ from vllm.kernels.helion.register import ( ...@@ -36,9 +36,11 @@ from vllm.kernels.helion.register import (
) )
if _HOP_AVAILABLE: if _HOP_AVAILABLE:
from helion._compat import supports_torch_compile_fusion
from helion._compiler._dynamo.higher_order_ops import ( from helion._compiler._dynamo.higher_order_ops import (
helion_kernel_wrapper_mutation, helion_kernel_wrapper_mutation,
) )
from torch._inductor.utils import run_and_get_code
def _add_kernel(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor: def _add_kernel(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
...@@ -1003,3 +1005,49 @@ class TestTorchCompileHOP: ...@@ -1003,3 +1005,49 @@ class TestTorchCompileHOP:
"Compiled execution result doesn't match eager execution. " "Compiled execution result doesn't match eager execution. "
f"Max difference: {torch.max(torch.abs(compiled_result - eager_result))}" f"Max difference: {torch.max(torch.abs(compiled_result - eager_result))}"
) )
@pytest.mark.skipif(
not (_HOP_AVAILABLE and supports_torch_compile_fusion()),
reason="Requires PyTorch with Helion inductor fusion support",
)
def test_inductor_backend_compiles_helion_hop(self):
"""Test torch.compile with inductor backend and Helion fusion enabled."""
configs = {"default": helion.Config(block_sizes=[4, 4])}
with dummy_kernel_registry(configs=configs) as register:
add_helion_kernel = register(
op_name="test_inductor_add_kernel",
config_picker=lambda args, keys: "default",
helion_settings=helion.Settings(
torch_compile_fusion=True, static_shapes=False
),
)(_add_kernel)
def f(x, y):
x = x * 2.0
y = y + 1.0
out = add_helion_kernel(x, y)
return out.relu()
torch._dynamo.reset()
compiled_f = torch.compile(f, backend="inductor", fullgraph=True)
x = torch.randn(4, 4, device="cuda")
y = torch.randn(4, 4, device="cuda")
compiled_result, source_codes = run_and_get_code(compiled_f, x, y)
eager_result = f(x, y)
assert torch.allclose(compiled_result, eager_result, atol=1e-5, rtol=1e-5), (
"Inductor-compiled result doesn't match eager execution. "
f"Max difference: {torch.max(torch.abs(compiled_result - eager_result))}"
)
# With fusion enabled, prologue/epilogue ops should be fused into
# a single triton kernel rather than generating separate kernels.
kernel_count = sum(code.count("@triton.jit") for code in source_codes)
assert kernel_count == 1, (
f"Expected 1 fused triton kernel, got {kernel_count}. "
"Prologue/epilogue ops were not fused into the Helion kernel."
)
...@@ -63,8 +63,10 @@ from helion.runtime.settings import default_autotuner_fn ...@@ -63,8 +63,10 @@ from helion.runtime.settings import default_autotuner_fn
_HOP_AVAILABLE = requires_torch_version("2.11") _HOP_AVAILABLE = requires_torch_version("2.11")
if _HOP_AVAILABLE: if _HOP_AVAILABLE:
from helion._compat import supports_torch_compile_fusion
from helion._compiler._dynamo.higher_order_ops import helion_kernel_side_table from helion._compiler._dynamo.higher_order_ops import helion_kernel_side_table
from helion._compiler._dynamo.variables import HelionKernelVariable from helion._compiler._dynamo.variables import HelionKernelVariable
from helion.runtime.kernel import Kernel
from torch._dynamo.guards import GuardBuilder from torch._dynamo.guards import GuardBuilder
from torch._dynamo.variables.builder import VariableBuilder from torch._dynamo.variables.builder import VariableBuilder
...@@ -475,19 +477,22 @@ if _HOP_AVAILABLE: ...@@ -475,19 +477,22 @@ if _HOP_AVAILABLE:
"""Register HelionKernelWrapper with Dynamo's VariableBuilder. """Register HelionKernelWrapper with Dynamo's VariableBuilder.
When Dynamo encounters a HelionKernelWrapper during tracing, this When Dynamo encounters a HelionKernelWrapper during tracing, this
extracts the underlying Helion Kernel, registers it in the side table, extracts the underlying Helion Kernel and delegates to Helion's own
and returns Helion's own HelionKernelVariable to handle HOP emission. registered Kernel handler, which handles HOP emission, side table
registration, and inductor lowering setup.
""" """
def wrap_helion_kernel_wrapper( def wrap_helion_kernel_wrapper(
builder: VariableBuilder, value: HelionKernelWrapper builder: VariableBuilder, value: HelionKernelWrapper
): ):
kernel = value.get_configured_op()._decorated_kernel kernel = value.get_configured_op()._decorated_kernel
if supports_torch_compile_fusion():
helion_handler = VariableBuilder._type_dispatch()[Kernel]
return helion_handler(builder, kernel)
kernel_idx = helion_kernel_side_table.add_kernel(kernel) kernel_idx = helion_kernel_side_table.add_kernel(kernel)
builder.install_guards(GuardBuilder.ID_MATCH) builder.install_guards(GuardBuilder.ID_MATCH)
return HelionKernelVariable(kernel, kernel_idx, source=builder.source) return HelionKernelVariable(kernel, kernel_idx, source=builder.source)
# Register with Dynamo's type dispatch system
dispatch = VariableBuilder._type_dispatch() dispatch = VariableBuilder._type_dispatch()
dispatch[HelionKernelWrapper] = wrap_helion_kernel_wrapper dispatch[HelionKernelWrapper] = wrap_helion_kernel_wrapper
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment