test_config.py 18.1 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3

4
import os
5
from dataclasses import MISSING, Field, asdict, dataclass, field
6
from unittest.mock import patch
7

8
9
import pytest

10
from vllm.compilation.backends import VllmBackend
11
from vllm.config import ModelConfig, PoolerConfig, VllmConfig, update_config
12
from vllm.config.load import LoadConfig
13
from vllm.config.utils import get_field
14
15
from vllm.model_executor.layers.pooler import PoolingType
from vllm.platforms import current_platform
16

17

18
19
20
21
22
23
24
25
def test_compile_config_repr_succeeds():
    # setup: VllmBackend mutates the config object
    config = VllmConfig()
    backend = VllmBackend(config)
    backend.configure_post_pass()

    # test that repr(config) succeeds
    val = repr(config)
26
27
    assert "VllmConfig" in val
    assert "inductor_passes" in val
28
29


30
31
32
33
34
@dataclass
class _TestConfigFields:
    a: int
    b: dict = field(default_factory=dict)
    c: str = "default"
35
36


37
def test_get_field():
38
    with pytest.raises(ValueError):
39
        get_field(_TestConfigFields, "a")
40

41
    b = get_field(_TestConfigFields, "b")
42
43
44
45
    assert isinstance(b, Field)
    assert b.default is MISSING
    assert b.default_factory is dict

46
    c = get_field(_TestConfigFields, "c")
47
48
49
50
51
    assert isinstance(c, Field)
    assert c.default == "default"
    assert c.default_factory is MISSING


52
53
@dataclass
class _TestNestedConfig:
54
    a: _TestConfigFields = field(default_factory=lambda: _TestConfigFields(a=0))
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78


def test_update_config():
    # Simple update
    config1 = _TestConfigFields(a=0)
    new_config1 = update_config(config1, {"a": 42})
    assert new_config1.a == 42
    # Nonexistent field
    with pytest.raises(AssertionError):
        new_config1 = update_config(config1, {"nonexistent": 1})
    # Nested update with dataclass
    config2 = _TestNestedConfig()
    new_inner_config = _TestConfigFields(a=1, c="new_value")
    new_config2 = update_config(config2, {"a": new_inner_config})
    assert new_config2.a == new_inner_config
    # Nested update with dict
    config3 = _TestNestedConfig()
    new_config3 = update_config(config3, {"a": {"c": "new_value"}})
    assert new_config3.a.c == "new_value"
    # Nested update with invalid type
    with pytest.raises(AssertionError):
        new_config3 = update_config(config3, {"a": "new_value"})


79
# Can remove once --task option is fully deprecated
80
@pytest.mark.parametrize(
81
    ("model_id", "expected_runner_type", "expected_convert_type", "expected_task"),
82
    [
83
84
85
        ("distilbert/distilgpt2", "generate", "none", "generate"),
        ("intfloat/multilingual-e5-small", "pooling", "none", "embed"),
        ("jason9693/Qwen2.5-1.5B-apeach", "pooling", "classify", "classify"),
86
        ("cross-encoder/ms-marco-MiniLM-L-6-v2", "pooling", "none", "classify"),
87
88
        ("Qwen/Qwen2.5-Math-RM-72B", "pooling", "none", "reward"),
        ("openai/whisper-small", "generate", "none", "transcription"),
89
90
    ],
)
91
92
93
def test_auto_task(
    model_id, expected_runner_type, expected_convert_type, expected_task
):
94
    config = ModelConfig(model_id, task="auto")
95
96

    assert config.runner_type == expected_runner_type
97
    assert config.convert_type == expected_convert_type
98

99

100
101
# Can remove once --task option is fully deprecated
@pytest.mark.parametrize(
102
    ("model_id", "expected_runner_type", "expected_convert_type", "expected_task"),
103
104
105
106
    [
        ("distilbert/distilgpt2", "pooling", "embed", "embed"),
        ("intfloat/multilingual-e5-small", "pooling", "embed", "embed"),
        ("jason9693/Qwen2.5-1.5B-apeach", "pooling", "classify", "classify"),
107
        ("cross-encoder/ms-marco-MiniLM-L-6-v2", "pooling", "classify", "classify"),
108
109
110
111
        ("Qwen/Qwen2.5-Math-RM-72B", "pooling", "embed", "embed"),
        ("openai/whisper-small", "pooling", "embed", "embed"),
    ],
)
112
113
114
def test_score_task(
    model_id, expected_runner_type, expected_convert_type, expected_task
):
115
    config = ModelConfig(model_id, task="score")
116

117
118
119
120
121
    assert config.runner_type == expected_runner_type
    assert config.convert_type == expected_convert_type


# Can remove once --task option is fully deprecated
122
@pytest.mark.parametrize(
123
    ("model_id", "expected_runner_type", "expected_convert_type", "expected_task"),
124
    [
125
        ("openai/whisper-small", "generate", "none", "transcription"),
126
127
    ],
)
128
129
130
def test_transcription_task(
    model_id, expected_runner_type, expected_convert_type, expected_task
):
131
    config = ModelConfig(model_id, task="transcription")
132

133
    assert config.runner_type == expected_runner_type
134
    assert config.convert_type == expected_convert_type
135
136


137
138
139
140
141
142
143
144
145
146
147
148
149
@pytest.mark.parametrize(
    ("model_id", "expected_runner_type", "expected_convert_type"),
    [
        ("distilbert/distilgpt2", "generate", "none"),
        ("intfloat/multilingual-e5-small", "pooling", "none"),
        ("jason9693/Qwen2.5-1.5B-apeach", "pooling", "classify"),
        ("cross-encoder/ms-marco-MiniLM-L-6-v2", "pooling", "none"),
        ("Qwen/Qwen2.5-Math-RM-72B", "pooling", "none"),
        ("openai/whisper-small", "generate", "none"),
    ],
)
def test_auto_runner(model_id, expected_runner_type, expected_convert_type):
    config = ModelConfig(model_id, runner="auto")
150
151

    assert config.runner_type == expected_runner_type
152
    assert config.convert_type == expected_convert_type
153
154
155


@pytest.mark.parametrize(
156
    ("model_id", "expected_runner_type", "expected_convert_type"),
157
    [
158
159
160
161
162
163
        ("distilbert/distilgpt2", "pooling", "embed"),
        ("intfloat/multilingual-e5-small", "pooling", "none"),
        ("jason9693/Qwen2.5-1.5B-apeach", "pooling", "classify"),
        ("cross-encoder/ms-marco-MiniLM-L-6-v2", "pooling", "none"),
        ("Qwen/Qwen2.5-Math-RM-72B", "pooling", "none"),
        ("openai/whisper-small", "pooling", "embed"),
164
165
    ],
)
166
167
def test_pooling_runner(model_id, expected_runner_type, expected_convert_type):
    config = ModelConfig(model_id, runner="pooling")
168
169

    assert config.runner_type == expected_runner_type
170
    assert config.convert_type == expected_convert_type
171
172


173
174
175
176
177
178
179
180
181
182
183
@pytest.mark.parametrize(
    ("model_id", "expected_runner_type", "expected_convert_type"),
    [
        ("Qwen/Qwen2.5-1.5B-Instruct", "draft", "none"),
    ],
)
def test_draft_runner(model_id, expected_runner_type, expected_convert_type):
    config = ModelConfig(model_id, runner="draft")

    assert config.runner_type == expected_runner_type
    assert config.convert_type == expected_convert_type
184
185


186
187
188
189
190
191
192
193
194
195
MODEL_IDS_EXPECTED = [
    ("Qwen/Qwen1.5-7B", 32768),
    ("mistralai/Mistral-7B-v0.1", 4096),
    ("mistralai/Mistral-7B-Instruct-v0.2", 32768),
]


@pytest.mark.parametrize("model_id_expected", MODEL_IDS_EXPECTED)
def test_disable_sliding_window(model_id_expected):
    model_id, expected = model_id_expected
196
    model_config = ModelConfig(model_id, disable_sliding_window=True)
197
198
    assert model_config.max_model_len == expected

199

200
201
202
@pytest.mark.skipif(
    current_platform.is_rocm(), reason="Xformers backend is not supported on ROCm."
)
203
204
def test_get_pooling_config():
    model_id = "sentence-transformers/all-MiniLM-L12-v2"
205
    model_config = ModelConfig(model_id)
206

207
208
209
    assert model_config.pooler_config is not None
    assert model_config.pooler_config.normalize
    assert model_config.pooler_config.pooling_type == PoolingType.MEAN.name
210
211


212
213
214
@pytest.mark.skipif(
    current_platform.is_rocm(), reason="Xformers backend is not supported on ROCm."
)
215
216
def test_get_pooling_config_from_args():
    model_id = "sentence-transformers/all-MiniLM-L12-v2"
217
218
    pooler_config = PoolerConfig(pooling_type="CLS", normalize=True)
    model_config = ModelConfig(model_id, pooler_config=pooler_config)
219

220
    assert asdict(model_config.pooler_config) == asdict(pooler_config)
221
222


223
224
225
226
227
228
@pytest.mark.parametrize(
    ("model_id", "default_pooling_type", "pooling_type"),
    [
        ("tomaarsen/Qwen3-Reranker-0.6B-seq-cls", "LAST", "LAST"),  # LLM
        ("intfloat/e5-small", "CLS", "MEAN"),  # BertModel
        ("Qwen/Qwen2.5-Math-RM-72B", "ALL", "ALL"),  # reward
229
230
231
        ("Qwen/Qwen2.5-Math-PRM-7B", "STEP", "STEP"),  # step reward
    ],
)
232
233
234
235
236
237
def test_default_pooling_type(model_id, default_pooling_type, pooling_type):
    model_config = ModelConfig(model_id)
    assert model_config._model_info.default_pooling_type == default_pooling_type
    assert model_config.pooler_config.pooling_type == pooling_type


238
239
240
@pytest.mark.skipif(
    current_platform.is_rocm(), reason="Xformers backend is not supported on ROCm."
)
241
def test_get_bert_tokenization_sentence_transformer_config():
242
243
    model_id = "BAAI/bge-base-en-v1.5"
    bge_model_config = ModelConfig(model_id)
244
245
246
247
248
249
250

    bert_bge_model_config = bge_model_config._get_encoder_config()

    assert bert_bge_model_config["max_seq_length"] == 512
    assert bert_bge_model_config["do_lower_case"]


251
def test_rope_customization():
252
    TEST_ROPE_SCALING = {"rope_type": "dynamic", "factor": 2.0}
253
    TEST_ROPE_THETA = 16_000_000.0
254
    LONGCHAT_ROPE_SCALING = {"rope_type": "linear", "factor": 8.0}
255

256
    llama_model_config = ModelConfig("meta-llama/Meta-Llama-3-8B-Instruct")
257
    assert getattr(llama_model_config.hf_config, "rope_scaling", None) is None
258
    assert getattr(llama_model_config.hf_config, "rope_theta", None) == 500_000
259
260
261
262
    assert llama_model_config.max_model_len == 8192

    llama_model_config = ModelConfig(
        "meta-llama/Meta-Llama-3-8B-Instruct",
263
264
265
266
        hf_overrides={
            "rope_scaling": TEST_ROPE_SCALING,
            "rope_theta": TEST_ROPE_THETA,
        },
267
    )
268
269
270
271
    assert (
        getattr(llama_model_config.hf_config, "rope_scaling", None) == TEST_ROPE_SCALING
    )
    assert getattr(llama_model_config.hf_config, "rope_theta", None) == TEST_ROPE_THETA
272
273
    assert llama_model_config.max_model_len == 16384

274
    longchat_model_config = ModelConfig("lmsys/longchat-13b-16k")
275
276
277
    # Check if LONGCHAT_ROPE_SCALING entries are in longchat_model_config
    assert all(
        longchat_model_config.hf_config.rope_scaling.get(key) == value
278
279
        for key, value in LONGCHAT_ROPE_SCALING.items()
    )
280
281
282
283
    assert longchat_model_config.max_model_len == 16384

    longchat_model_config = ModelConfig(
        "lmsys/longchat-13b-16k",
284
285
286
        hf_overrides={
            "rope_scaling": TEST_ROPE_SCALING,
        },
287
    )
288
289
290
291
    assert (
        getattr(longchat_model_config.hf_config, "rope_scaling", None)
        == TEST_ROPE_SCALING
    )
292
    assert longchat_model_config.max_model_len == 4096
293
294


295
296
297
298
299
300
301
302
303
304
305
@pytest.mark.skipif(
    current_platform.is_rocm(), reason="Encoder Decoder models not supported on ROCm."
)
@pytest.mark.parametrize(
    ("model_id", "is_encoder_decoder"),
    [
        ("facebook/opt-125m", False),
        ("openai/whisper-tiny", True),
        ("meta-llama/Llama-3.2-1B-Instruct", False),
    ],
)
306
def test_is_encoder_decoder(model_id, is_encoder_decoder):
307
    config = ModelConfig(model_id)
308
309
310
311

    assert config.is_encoder_decoder == is_encoder_decoder


312
313
314
315
316
317
318
@pytest.mark.parametrize(
    ("model_id", "uses_mrope"),
    [
        ("facebook/opt-125m", False),
        ("Qwen/Qwen2-VL-2B-Instruct", True),
    ],
)
319
def test_uses_mrope(model_id, uses_mrope):
320
    config = ModelConfig(model_id)
321
322

    assert config.uses_mrope == uses_mrope
323
324
325
326
327


def test_generation_config_loading():
    model_id = "Qwen/Qwen2.5-1.5B-Instruct"

328
    # When set generation_config to "vllm", the default generation config
329
    # will not be loaded.
330
    model_config = ModelConfig(model_id, generation_config="vllm")
331
332
333
334
    assert model_config.get_diff_sampling_param() == {}

    # When set generation_config to "auto", the default generation config
    # should be loaded.
335
    model_config = ModelConfig(model_id, generation_config="auto")
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351

    correct_generation_config = {
        "repetition_penalty": 1.1,
        "temperature": 0.7,
        "top_p": 0.8,
        "top_k": 20,
    }

    assert model_config.get_diff_sampling_param() == correct_generation_config

    # The generation config could be overridden by the user.
    override_generation_config = {"temperature": 0.5, "top_k": 5}

    model_config = ModelConfig(
        model_id,
        generation_config="auto",
352
353
        override_generation_config=override_generation_config,
    )
354
355
356
357
358
359

    override_result = correct_generation_config.copy()
    override_result.update(override_generation_config)

    assert model_config.get_diff_sampling_param() == override_result

360
    # When generation_config is set to "vllm" and override_generation_config
361
362
363
    # is set, the override_generation_config should be used directly.
    model_config = ModelConfig(
        model_id,
364
        generation_config="vllm",
365
366
        override_generation_config=override_generation_config,
    )
367
368

    assert model_config.get_diff_sampling_param() == override_generation_config
369
370


371
372
373
374
375
376
377
@pytest.mark.parametrize(
    "pt_load_map_location",
    [
        "cuda",
        {"": "cuda"},
    ],
)
378
379
380
381
382
def test_load_config_pt_load_map_location(pt_load_map_location):
    load_config = LoadConfig(pt_load_map_location=pt_load_map_location)
    config = VllmConfig(load_config=load_config)

    assert config.load_config.pt_load_map_location == pt_load_map_location
383
384
385


@pytest.mark.parametrize(
386
387
    ("model_id", "max_model_len", "expected_max_len", "should_raise"),
    [
388
389
390
        ("BAAI/bge-reranker-base", None, 512, False),
        ("BAAI/bge-reranker-base", 256, 256, False),
        ("BAAI/bge-reranker-base", 513, 512, True),
391
392
        ("deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", None, 131072, False),
        ("deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", 131073, 131072, True),
393
394
395
396
397
    ],
)
def test_get_and_verify_max_len(
    model_id, max_model_len, expected_max_len, should_raise
):
398
    """Test get_and_verify_max_len with different configurations."""
399
    model_config = ModelConfig(model_id)
400
401
402
403
404
405
406

    if should_raise:
        with pytest.raises(ValueError):
            model_config.get_and_verify_max_len(max_model_len)
    else:
        actual_max_len = model_config.get_and_verify_max_len(max_model_len)
        assert actual_max_len == expected_max_len
407
408
409
410
411
412
413
414
415
416
417


class MockConfig:
    """Simple mock object for testing maybe_pull_model_tokenizer_for_runai"""

    def __init__(self, model: str, tokenizer: str):
        self.model = model
        self.tokenizer = tokenizer
        self.model_weights = None


418
419
420
421
422
423
424
425
@pytest.mark.parametrize(
    "s3_url",
    [
        "s3://example-bucket-1/model/",
        "s3://example-bucket-2/model/",
    ],
)
@patch("vllm.transformers_utils.runai_utils.ObjectStorageModel.pull_files")
426
427
428
429
430
431
432
433
434
435
436
def test_s3_url_model_tokenizer_paths(mock_pull_files, s3_url):
    """Test that S3 URLs create deterministic local directories for model and
    tokenizer."""
    # Mock pull_files to avoid actually downloading files during tests
    mock_pull_files.return_value = None

    # Create first mock and run the method
    config1 = MockConfig(model=s3_url, tokenizer=s3_url)
    ModelConfig.maybe_pull_model_tokenizer_for_runai(config1, s3_url, s3_url)

    # Check that model and tokenizer point to existing directories
437
438
439
440
441
442
443
444
445
446
447
448
    assert os.path.exists(config1.model), (
        f"Model directory does not exist: {config1.model}"
    )
    assert os.path.isdir(config1.model), (
        f"Model path is not a directory: {config1.model}"
    )
    assert os.path.exists(config1.tokenizer), (
        f"Tokenizer directory does not exist: {config1.tokenizer}"
    )
    assert os.path.isdir(config1.tokenizer), (
        f"Tokenizer path is not a directory: {config1.tokenizer}"
    )
449
450

    # Verify that the paths are different from the original S3 URL
451
    assert config1.model != s3_url, "Model path should be converted to local directory"
452
    assert config1.tokenizer != s3_url, (
453
454
        "Tokenizer path should be converted to local directory"
    )
455
456
457
458
459
460
461
462
463
464

    # Store the original paths
    created_model_dir = config1.model
    create_tokenizer_dir = config1.tokenizer

    # Create a new mock and run the method with the same S3 URL
    config2 = MockConfig(model=s3_url, tokenizer=s3_url)
    ModelConfig.maybe_pull_model_tokenizer_for_runai(config2, s3_url, s3_url)

    # Check that the new directories exist
465
466
467
468
469
470
471
472
473
474
475
476
    assert os.path.exists(config2.model), (
        f"Model directory does not exist: {config2.model}"
    )
    assert os.path.isdir(config2.model), (
        f"Model path is not a directory: {config2.model}"
    )
    assert os.path.exists(config2.tokenizer), (
        f"Tokenizer directory does not exist: {config2.tokenizer}"
    )
    assert os.path.isdir(config2.tokenizer), (
        f"Tokenizer path is not a directory: {config2.tokenizer}"
    )
477
478
479
480

    # Verify that the paths are deterministic (same as before)
    assert config2.model == created_model_dir, (
        f"Model paths are not deterministic. "
481
482
        f"Original: {created_model_dir}, New: {config2.model}"
    )
483
484
    assert config2.tokenizer == create_tokenizer_dir, (
        f"Tokenizer paths are not deterministic. "
485
486
        f"Original: {create_tokenizer_dir}, New: {config2.tokenizer}"
    )
487
488


489
@patch("vllm.transformers_utils.runai_utils.ObjectStorageModel.pull_files")
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
def test_s3_url_different_models_create_different_directories(mock_pull_files):
    """Test that different S3 URLs create different local directories."""
    # Mock pull_files to avoid actually downloading files during tests
    mock_pull_files.return_value = None

    s3_url1 = "s3://example-bucket-1/model/"
    s3_url2 = "s3://example-bucket-2/model/"

    # Create mocks with different S3 URLs and run the method
    config1 = MockConfig(model=s3_url1, tokenizer=s3_url1)
    ModelConfig.maybe_pull_model_tokenizer_for_runai(config1, s3_url1, s3_url1)

    config2 = MockConfig(model=s3_url2, tokenizer=s3_url2)
    ModelConfig.maybe_pull_model_tokenizer_for_runai(config2, s3_url2, s3_url2)

    # Verify that different URLs produce different directories
    assert config1.model != config2.model, (
        f"Different S3 URLs should create different model directories. "
508
509
        f"URL1 model: {config1.model}, URL2 model: {config2.model}"
    )
510
511
512
    assert config1.tokenizer != config2.tokenizer, (
        f"Different S3 URLs should create different tokenizer directories. "
        f"URL1 tokenizer: {config1.tokenizer}, "
513
514
        f"URL2 tokenizer: {config2.tokenizer}"
    )
515
516
517

    # Verify that both sets of directories exist
    assert os.path.exists(config1.model) and os.path.isdir(config1.model)
518
    assert os.path.exists(config1.tokenizer) and os.path.isdir(config1.tokenizer)
519
    assert os.path.exists(config2.model) and os.path.isdir(config2.model)
520
    assert os.path.exists(config2.tokenizer) and os.path.isdir(config2.tokenizer)