test_pp_cudagraph.py 1.03 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
zhuwenwen's avatar
zhuwenwen committed
3
import os
4
import pytest
5
from typing_extensions import LiteralString
6

zhuwenwen's avatar
zhuwenwen committed
7
from ..utils import compare_two_settings, create_new_process_for_each_test, models_path_prefix
8
9


10
11
12
@pytest.mark.parametrize(
    "PP_SIZE, MODEL_NAME",
    [
13
        (2, os.path.join(models_path_prefix, "JackFram/llama-160m")),
14
15
16
17
18
19
20
21
    ],
)
@pytest.mark.parametrize(
    "ATTN_BACKEND",
    [
        "FLASH_ATTN",
    ],
)
22
@create_new_process_for_each_test()
23
24
25
26
27
def test_pp_cudagraph(
    PP_SIZE: int,
    MODEL_NAME: str,
    ATTN_BACKEND: LiteralString,
):
28
29
30
31
32
33
34
35
36
37
    cudagraph_args = [
        # use half precision for speed and memory savings in CI environment
        "--dtype",
        "float16",
        "--pipeline-parallel-size",
        str(PP_SIZE),
        "--distributed-executor-backend",
        "mp",
        f"--attention-backend={ATTN_BACKEND}",
    ]
38

39
    eager_args = cudagraph_args + ["--enforce-eager"]
40

41
    compare_two_settings(MODEL_NAME, eager_args, cudagraph_args)