test_seed.py 2.26 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3

4
import pytest
5
import os
6
7

from .conftest import run_equality_correctness_test
8
from ...utils import models_path_prefix
9

10
# main model
11
MAIN_MODEL = os.path.join(models_path_prefix, "JackFram/llama-68m")
12
13

# speculative model
14
SPEC_MODEL = os.path.join(models_path_prefix, "JackFram/llama-160m")
15

16
17
18
19

@pytest.mark.parametrize(
    "common_llm_kwargs",
    [{
20
        "model_name": os.path.join(models_path_prefix, "JackFram/llama-68m"),
21
22
23
24

        # Skip cuda graph recording for fast test.
        "enforce_eager": True,

25
26
        # speculative config
        "speculative_config": {
zhuwenwen's avatar
zhuwenwen committed
27
            "model": os.path.join(models_path_prefix, "JackFram/llama-160m"),
28
29
            "num_speculative_tokens": 3,
        },
30
31
    }])
@pytest.mark.parametrize("per_test_common_llm_kwargs", [{}])
32
33
@pytest.mark.parametrize("baseline_llm_kwargs", [{"seed": 1}])
@pytest.mark.parametrize("test_llm_kwargs", [{"seed": 5}])
34
35
36
37
38
39
@pytest.mark.parametrize("batch_size", [1, 8, 32])
@pytest.mark.parametrize("temperature", [0.1, 1.0])
@pytest.mark.parametrize(
    "output_len",
    [
        # Use smaller output len for fast test.
40
        20,
41
    ])
42
43
44
45
def test_seeded_consistency(vllm_runner, common_llm_kwargs,
                            per_test_common_llm_kwargs, baseline_llm_kwargs,
                            test_llm_kwargs, batch_size: int,
                            temperature: float, output_len: int):
46
47
    """Verify outputs are consistent across multiple runs with same seed
    """
48
49
50
51
52
53
54
55
56
57
58
    run_equality_correctness_test(
        vllm_runner,
        common_llm_kwargs,
        per_test_common_llm_kwargs,
        baseline_llm_kwargs,
        test_llm_kwargs,
        batch_size,
        max_output_len=output_len,
        temperature=temperature,
        disable_seed=False,
    )
59
60
61

    # Ensure this same test does fail if we _don't_ include per-request seeds
    with pytest.raises(AssertionError):
62
63
64
65
66
67
68
69
70
71
72
        run_equality_correctness_test(
            vllm_runner,
            common_llm_kwargs,
            per_test_common_llm_kwargs,
            baseline_llm_kwargs,
            test_llm_kwargs,
            batch_size,
            max_output_len=output_len,
            temperature=temperature,
            disable_seed=True,
        )