"vscode:/vscode.git/clone" did not exist on "9129c728d9c969e5f7d35d258c22a43e16bd24f2"
test_pattern_matching.py 6.71 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Test make_fx tracing and inductor pattern matching with HelionKernelWrapper."""

import contextlib
from unittest.mock import Mock, patch

import pytest
import torch

from vllm.utils.import_utils import has_helion

if not has_helion():
    pytest.skip(
        "Helion is not installed. Install with: pip install vllm[helion]",
        allow_module_level=True,
    )

import helion
import helion.language as hl
from helion._compat import requires_torch_version

if not requires_torch_version("2.11"):
    pytest.skip(
        "HigherOrderOp requires PyTorch >= 2.11",
        allow_module_level=True,
    )

from helion._compiler._dynamo.higher_order_ops import (
    helion_kernel_side_table,
    helion_kernel_wrapper_mutation,
)
from torch._inductor.pattern_matcher import (
    PatternMatcherPass,
    fwd_only,
    register_replacement,
    select_decomp_table,
)
from torch.fx.experimental.proxy_tensor import make_fx

from vllm.kernels.helion.config_manager import ConfigManager
from vllm.kernels.helion.register import HelionKernelWrapper


@contextlib.contextmanager
def _helion_mock_context():
    configs = {
        "default": helion.Config(block_sizes=[64], num_warps=2, num_stages=2),
    }
    mock_config_manager = Mock(spec=ConfigManager)
    mock_config_manager.get_platform_configs = Mock(return_value=configs)

    with (
        patch(
55
            "vllm.kernels.helion.config_manager.ConfigManager",
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
            return_value=mock_config_manager,
        ),
        patch(
            "vllm.kernels.helion.utils.get_canonical_gpu_name",
            return_value="nvidia_h200",
        ),
    ):
        yield


class TestMakeFxHop:
    def setup_method(self):
        helion_kernel_side_table.reset_table()

    def test_make_fx_symbolic(self):
        def raw_add_scale(
            x: torch.Tensor, y: torch.Tensor, scale: float
        ) -> tuple[torch.Tensor, int, torch.Tensor]:
            out_x = torch.empty_like(x)
            out_y = torch.empty_like(x)
            for tile in hl.tile(x.size()):
                out_x[tile] = x[tile] + y[tile] * scale
                out_y[tile] = out_x[tile] * 2.0
            return out_x, 42, out_y

        input_x = torch.randn(7, 13)
        input_y = torch.randn(7, 13)
        scale = 0.5

        with _helion_mock_context():
            wrapper = HelionKernelWrapper(
                raw_kernel_func=raw_add_scale,
                op_name="test_make_fx",
                fake_impl=lambda *a, **kw: None,
90
                config_picker=lambda args, keys: "default",
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
            )

            def fn(x, y):
                return wrapper(x, y, scale)

            gm = make_fx(fn, tracing_mode="symbolic")(input_x, input_y)

        hop_nodes = [
            n
            for n in gm.graph.nodes
            if n.op == "call_function" and n.target is helion_kernel_wrapper_mutation
        ]
        assert len(hop_nodes) == 1
        node = hop_nodes[0]

        assert node.kwargs["constant_args"]["scale"] == scale
        assert set(node.kwargs["tensor_args"]) == {"x", "y"}

        specs = node.kwargs["output_spec"]["leaf_specs"]
        tensor_specs = [s for s in specs if s["type"] == "tensor"]
        scalar_specs = [s for s in specs if s["type"] == "scalar"]
        assert len(tensor_specs) == 2
        assert len(scalar_specs) == 1

        for spec in tensor_specs:
            assert spec["dtype"] == input_x.dtype

        assert scalar_specs[0]["scalar_value"] == 42

        for val in node.meta["val"]:
            assert all(isinstance(s, torch.SymInt) for s in val.shape)

        # Both out_x and out_y are empty_like(x), so output shapes == input shape
        input_node = next(n for n in gm.graph.nodes if n.op == "placeholder")
        input_shape = input_node.meta["val"].shape
        for val in node.meta["val"]:
            assert len(val.shape) == len(input_shape)
            for out_s, in_s in zip(val.shape, input_shape):
                assert out_s == in_s

    def test_pattern_matcher_replaces_with_helion_hop(self):
        def raw_silu_mul(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
            M, N = x.size()
            out = torch.empty_like(x)
            for tile_m, tile_n in hl.tile([M, N]):
                out[tile_m, tile_n] = (
                    torch.nn.functional.silu(x[tile_m, tile_n]) * y[tile_m, tile_n]
                )
            return out

        with _helion_mock_context():
            wrapper = HelionKernelWrapper(
                raw_kernel_func=raw_silu_mul,
                op_name="test_pm_silu_mul",
                fake_impl=lambda *a, **kw: None,
146
                config_picker=lambda args, keys: "default",
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
            )

            def pattern(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
                return torch.nn.functional.silu(x) * y

            def replacement(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
                return wrapper(x, y)

            inputs = [torch.randn(8, 16), torch.randn(8, 16)]

            pm_pass = PatternMatcherPass(pass_name="test_helion_replacement")
            register_replacement(pattern, replacement, inputs, fwd_only, pm_pass)

            def model(x, y):
                return torch.nn.functional.silu(x) * y

            decompositions = select_decomp_table()
            input_x = torch.randn(8, 16)
            input_y = torch.randn(8, 16)
            gm = make_fx(model, decompositions, tracing_mode="symbolic")(
                input_x, input_y
            )

            def count_hop_nodes(graph):
                return sum(
                    1
                    for n in graph.nodes
                    if n.op == "call_function"
                    and n.target is helion_kernel_wrapper_mutation
                )

            assert count_hop_nodes(gm.graph) == 0

            match_count = pm_pass.apply(gm.graph)
            gm.graph.lint()
            gm.recompile()

            assert match_count == 1
            assert count_hop_nodes(gm.graph) == 1

            hop_node = next(
                n
                for n in gm.graph.nodes
                if n.op == "call_function"
                and n.target is helion_kernel_wrapper_mutation
            )

            # raw_silu_mul returns empty_like(x), so output shape == input shape
            for val in hop_node.meta["val"]:
                assert all(isinstance(s, torch.SymInt) for s in val.shape)

            input_node = next(n for n in gm.graph.nodes if n.op == "placeholder")
            input_shape = input_node.meta["val"].shape
            output_shape = hop_node.meta["val"][0].shape
            assert len(output_shape) == len(input_shape)
            for out_s, in_s in zip(output_shape, input_shape):
                assert out_s == in_s