test_config.py 19 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3

4
import os
5
from dataclasses import MISSING, Field, asdict, dataclass, field
6
from unittest.mock import patch
7

8
9
import pytest

10
from vllm.compilation.backends import VllmBackend
11
from vllm.config import ModelConfig, PoolerConfig, VllmConfig, update_config
12
from vllm.config.load import LoadConfig
13
from vllm.config.utils import get_field
14
15
from vllm.model_executor.layers.pooler import PoolingType
from vllm.platforms import current_platform
16

17

18
19
20
21
22
23
24
25
def test_compile_config_repr_succeeds():
    # setup: VllmBackend mutates the config object
    config = VllmConfig()
    backend = VllmBackend(config)
    backend.configure_post_pass()

    # test that repr(config) succeeds
    val = repr(config)
26
27
    assert "VllmConfig" in val
    assert "inductor_passes" in val
28
29


30
31
32
33
34
@dataclass
class _TestConfigFields:
    a: int
    b: dict = field(default_factory=dict)
    c: str = "default"
35
36


37
def test_get_field():
38
    with pytest.raises(ValueError):
39
        get_field(_TestConfigFields, "a")
40

41
    b = get_field(_TestConfigFields, "b")
42
43
44
45
    assert isinstance(b, Field)
    assert b.default is MISSING
    assert b.default_factory is dict

46
    c = get_field(_TestConfigFields, "c")
47
48
49
50
51
    assert isinstance(c, Field)
    assert c.default == "default"
    assert c.default_factory is MISSING


52
53
@dataclass
class _TestNestedConfig:
54
    a: _TestConfigFields = field(default_factory=lambda: _TestConfigFields(a=0))
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78


def test_update_config():
    # Simple update
    config1 = _TestConfigFields(a=0)
    new_config1 = update_config(config1, {"a": 42})
    assert new_config1.a == 42
    # Nonexistent field
    with pytest.raises(AssertionError):
        new_config1 = update_config(config1, {"nonexistent": 1})
    # Nested update with dataclass
    config2 = _TestNestedConfig()
    new_inner_config = _TestConfigFields(a=1, c="new_value")
    new_config2 = update_config(config2, {"a": new_inner_config})
    assert new_config2.a == new_inner_config
    # Nested update with dict
    config3 = _TestNestedConfig()
    new_config3 = update_config(config3, {"a": {"c": "new_value"}})
    assert new_config3.a.c == "new_value"
    # Nested update with invalid type
    with pytest.raises(AssertionError):
        new_config3 = update_config(config3, {"a": "new_value"})


79
# Can remove once --task option is fully deprecated
80
@pytest.mark.parametrize(
81
    ("model_id", "expected_runner_type", "expected_convert_type", "expected_task"),
82
    [
83
84
85
        ("distilbert/distilgpt2", "generate", "none", "generate"),
        ("intfloat/multilingual-e5-small", "pooling", "none", "embed"),
        ("jason9693/Qwen2.5-1.5B-apeach", "pooling", "classify", "classify"),
86
        ("cross-encoder/ms-marco-MiniLM-L-6-v2", "pooling", "none", "classify"),
87
88
        ("Qwen/Qwen2.5-Math-RM-72B", "pooling", "none", "reward"),
        ("openai/whisper-small", "generate", "none", "transcription"),
89
90
    ],
)
91
92
93
def test_auto_task(
    model_id, expected_runner_type, expected_convert_type, expected_task
):
94
    config = ModelConfig(model_id, task="auto")
95
96

    assert config.runner_type == expected_runner_type
97
    assert config.convert_type == expected_convert_type
98

99

100
101
# Can remove once --task option is fully deprecated
@pytest.mark.parametrize(
102
    ("model_id", "expected_runner_type", "expected_convert_type", "expected_task"),
103
104
105
106
    [
        ("distilbert/distilgpt2", "pooling", "embed", "embed"),
        ("intfloat/multilingual-e5-small", "pooling", "embed", "embed"),
        ("jason9693/Qwen2.5-1.5B-apeach", "pooling", "classify", "classify"),
107
        ("cross-encoder/ms-marco-MiniLM-L-6-v2", "pooling", "classify", "classify"),
108
109
110
111
        ("Qwen/Qwen2.5-Math-RM-72B", "pooling", "embed", "embed"),
        ("openai/whisper-small", "pooling", "embed", "embed"),
    ],
)
112
113
114
def test_score_task(
    model_id, expected_runner_type, expected_convert_type, expected_task
):
115
    config = ModelConfig(model_id, task="score")
116

117
118
119
120
121
    assert config.runner_type == expected_runner_type
    assert config.convert_type == expected_convert_type


# Can remove once --task option is fully deprecated
122
@pytest.mark.parametrize(
123
    ("model_id", "expected_runner_type", "expected_convert_type", "expected_task"),
124
    [
125
        ("openai/whisper-small", "generate", "none", "transcription"),
126
127
    ],
)
128
129
130
def test_transcription_task(
    model_id, expected_runner_type, expected_convert_type, expected_task
):
131
    config = ModelConfig(model_id, task="transcription")
132

133
    assert config.runner_type == expected_runner_type
134
    assert config.convert_type == expected_convert_type
135
136


137
138
139
140
141
142
143
144
145
146
147
148
149
@pytest.mark.parametrize(
    ("model_id", "expected_runner_type", "expected_convert_type"),
    [
        ("distilbert/distilgpt2", "generate", "none"),
        ("intfloat/multilingual-e5-small", "pooling", "none"),
        ("jason9693/Qwen2.5-1.5B-apeach", "pooling", "classify"),
        ("cross-encoder/ms-marco-MiniLM-L-6-v2", "pooling", "none"),
        ("Qwen/Qwen2.5-Math-RM-72B", "pooling", "none"),
        ("openai/whisper-small", "generate", "none"),
    ],
)
def test_auto_runner(model_id, expected_runner_type, expected_convert_type):
    config = ModelConfig(model_id, runner="auto")
150
151

    assert config.runner_type == expected_runner_type
152
    assert config.convert_type == expected_convert_type
153
154
155


@pytest.mark.parametrize(
156
    ("model_id", "expected_runner_type", "expected_convert_type"),
157
    [
158
159
160
161
162
163
        ("distilbert/distilgpt2", "pooling", "embed"),
        ("intfloat/multilingual-e5-small", "pooling", "none"),
        ("jason9693/Qwen2.5-1.5B-apeach", "pooling", "classify"),
        ("cross-encoder/ms-marco-MiniLM-L-6-v2", "pooling", "none"),
        ("Qwen/Qwen2.5-Math-RM-72B", "pooling", "none"),
        ("openai/whisper-small", "pooling", "embed"),
164
165
    ],
)
166
167
def test_pooling_runner(model_id, expected_runner_type, expected_convert_type):
    config = ModelConfig(model_id, runner="pooling")
168
169

    assert config.runner_type == expected_runner_type
170
    assert config.convert_type == expected_convert_type
171
172


173
174
175
176
177
178
179
180
181
182
183
@pytest.mark.parametrize(
    ("model_id", "expected_runner_type", "expected_convert_type"),
    [
        ("Qwen/Qwen2.5-1.5B-Instruct", "draft", "none"),
    ],
)
def test_draft_runner(model_id, expected_runner_type, expected_convert_type):
    config = ModelConfig(model_id, runner="draft")

    assert config.runner_type == expected_runner_type
    assert config.convert_type == expected_convert_type
184
185


186
187
188
189
190
191
192
193
194
195
MODEL_IDS_EXPECTED = [
    ("Qwen/Qwen1.5-7B", 32768),
    ("mistralai/Mistral-7B-v0.1", 4096),
    ("mistralai/Mistral-7B-Instruct-v0.2", 32768),
]


@pytest.mark.parametrize("model_id_expected", MODEL_IDS_EXPECTED)
def test_disable_sliding_window(model_id_expected):
    model_id, expected = model_id_expected
196
    model_config = ModelConfig(model_id, disable_sliding_window=True)
197
198
    assert model_config.max_model_len == expected

199

200
201
202
@pytest.mark.skipif(
    current_platform.is_rocm(), reason="Xformers backend is not supported on ROCm."
)
203
204
def test_get_pooling_config():
    model_id = "sentence-transformers/all-MiniLM-L12-v2"
205
    model_config = ModelConfig(model_id)
206

207
208
209
    assert model_config.pooler_config is not None
    assert model_config.pooler_config.normalize
    assert model_config.pooler_config.pooling_type == PoolingType.MEAN.name
210
211


212
213
214
@pytest.mark.skipif(
    current_platform.is_rocm(), reason="Xformers backend is not supported on ROCm."
)
215
216
def test_get_pooling_config_from_args():
    model_id = "sentence-transformers/all-MiniLM-L12-v2"
217
218
    pooler_config = PoolerConfig(pooling_type="CLS", normalize=True)
    model_config = ModelConfig(model_id, pooler_config=pooler_config)
219

220
    assert asdict(model_config.pooler_config) == asdict(pooler_config)
221
222


223
224
225
226
227
228
@pytest.mark.parametrize(
    ("model_id", "default_pooling_type", "pooling_type"),
    [
        ("tomaarsen/Qwen3-Reranker-0.6B-seq-cls", "LAST", "LAST"),  # LLM
        ("intfloat/e5-small", "CLS", "MEAN"),  # BertModel
        ("Qwen/Qwen2.5-Math-RM-72B", "ALL", "ALL"),  # reward
229
230
231
        ("Qwen/Qwen2.5-Math-PRM-7B", "STEP", "STEP"),  # step reward
    ],
)
232
233
234
235
236
237
def test_default_pooling_type(model_id, default_pooling_type, pooling_type):
    model_config = ModelConfig(model_id)
    assert model_config._model_info.default_pooling_type == default_pooling_type
    assert model_config.pooler_config.pooling_type == pooling_type


238
239
240
@pytest.mark.skipif(
    current_platform.is_rocm(), reason="Xformers backend is not supported on ROCm."
)
241
def test_get_bert_tokenization_sentence_transformer_config():
242
243
    model_id = "BAAI/bge-base-en-v1.5"
    bge_model_config = ModelConfig(model_id)
244
245
246
247
248
249
250

    bert_bge_model_config = bge_model_config._get_encoder_config()

    assert bert_bge_model_config["max_seq_length"] == 512
    assert bert_bge_model_config["do_lower_case"]


251
def test_rope_customization():
252
253
254
255
256
257
258
    TEST_ROPE_PARAMETERS = {
        "rope_theta": 16_000_000.0,
        "rope_type": "dynamic",
        "factor": 2.0,
    }
    LLAMA_ROPE_PARAMETERS = {"rope_theta": 500000.0, "rope_type": "default"}
    LONGCHAT_ROPE_PARAMETERS = {"rope_type": "linear", "factor": 8.0}
259

260
    llama_model_config = ModelConfig("meta-llama/Meta-Llama-3-8B-Instruct")
261
262
263
264
    assert (
        getattr(llama_model_config.hf_config, "rope_parameters", None)
        == LLAMA_ROPE_PARAMETERS
    )
265
266
267
268
    assert llama_model_config.max_model_len == 8192

    llama_model_config = ModelConfig(
        "meta-llama/Meta-Llama-3-8B-Instruct",
269
        hf_overrides={"rope_parameters": TEST_ROPE_PARAMETERS},
270
    )
271
    assert (
272
273
        getattr(llama_model_config.hf_config, "rope_parameters", None)
        == TEST_ROPE_PARAMETERS
274
    )
275
276
    assert llama_model_config.max_model_len == 16384

277
    longchat_model_config = ModelConfig("lmsys/longchat-13b-16k")
278
    # Check if LONGCHAT_ROPE_PARAMETERS entries are in longchat_model_config
279
    assert all(
280
281
        longchat_model_config.hf_config.rope_parameters.get(key) == value
        for key, value in LONGCHAT_ROPE_PARAMETERS.items()
282
    )
283
284
285
286
    assert longchat_model_config.max_model_len == 16384

    longchat_model_config = ModelConfig(
        "lmsys/longchat-13b-16k",
287
        hf_overrides={
288
            "rope_parameters": TEST_ROPE_PARAMETERS,
289
        },
290
    )
291
    assert (
292
293
        getattr(longchat_model_config.hf_config, "rope_parameters", None)
        == TEST_ROPE_PARAMETERS
294
    )
295
    assert longchat_model_config.max_model_len == 4096
296
297


298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
def test_nested_hf_overrides():
    """Test that nested hf_overrides work correctly."""
    # Test with a model that has text_config
    model_config = ModelConfig(
        "Qwen/Qwen2-VL-2B-Instruct",
        hf_overrides={
            "text_config": {
                "hidden_size": 1024,
            },
        },
    )
    assert model_config.hf_config.text_config.hidden_size == 1024

    # Test with deeply nested overrides
    model_config = ModelConfig(
        "Qwen/Qwen2-VL-2B-Instruct",
        hf_overrides={
            "text_config": {
                "hidden_size": 2048,
                "num_attention_heads": 16,
            },
            "vision_config": {
                "hidden_size": 512,
            },
        },
    )
    assert model_config.hf_config.text_config.hidden_size == 2048
    assert model_config.hf_config.text_config.num_attention_heads == 16
    assert model_config.hf_config.vision_config.hidden_size == 512


329
330
331
332
333
334
335
336
337
338
339
@pytest.mark.skipif(
    current_platform.is_rocm(), reason="Encoder Decoder models not supported on ROCm."
)
@pytest.mark.parametrize(
    ("model_id", "is_encoder_decoder"),
    [
        ("facebook/opt-125m", False),
        ("openai/whisper-tiny", True),
        ("meta-llama/Llama-3.2-1B-Instruct", False),
    ],
)
340
def test_is_encoder_decoder(model_id, is_encoder_decoder):
341
    config = ModelConfig(model_id)
342
343
344
345

    assert config.is_encoder_decoder == is_encoder_decoder


346
347
348
349
350
351
352
@pytest.mark.parametrize(
    ("model_id", "uses_mrope"),
    [
        ("facebook/opt-125m", False),
        ("Qwen/Qwen2-VL-2B-Instruct", True),
    ],
)
353
def test_uses_mrope(model_id, uses_mrope):
354
    config = ModelConfig(model_id)
355
356

    assert config.uses_mrope == uses_mrope
357
358
359
360
361


def test_generation_config_loading():
    model_id = "Qwen/Qwen2.5-1.5B-Instruct"

362
    # When set generation_config to "vllm", the default generation config
363
    # will not be loaded.
364
    model_config = ModelConfig(model_id, generation_config="vllm")
365
366
367
368
    assert model_config.get_diff_sampling_param() == {}

    # When set generation_config to "auto", the default generation config
    # should be loaded.
369
    model_config = ModelConfig(model_id, generation_config="auto")
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385

    correct_generation_config = {
        "repetition_penalty": 1.1,
        "temperature": 0.7,
        "top_p": 0.8,
        "top_k": 20,
    }

    assert model_config.get_diff_sampling_param() == correct_generation_config

    # The generation config could be overridden by the user.
    override_generation_config = {"temperature": 0.5, "top_k": 5}

    model_config = ModelConfig(
        model_id,
        generation_config="auto",
386
387
        override_generation_config=override_generation_config,
    )
388
389
390
391
392
393

    override_result = correct_generation_config.copy()
    override_result.update(override_generation_config)

    assert model_config.get_diff_sampling_param() == override_result

394
    # When generation_config is set to "vllm" and override_generation_config
395
396
397
    # is set, the override_generation_config should be used directly.
    model_config = ModelConfig(
        model_id,
398
        generation_config="vllm",
399
400
        override_generation_config=override_generation_config,
    )
401
402

    assert model_config.get_diff_sampling_param() == override_generation_config
403
404


405
406
407
408
409
410
411
@pytest.mark.parametrize(
    "pt_load_map_location",
    [
        "cuda",
        {"": "cuda"},
    ],
)
412
413
414
415
416
def test_load_config_pt_load_map_location(pt_load_map_location):
    load_config = LoadConfig(pt_load_map_location=pt_load_map_location)
    config = VllmConfig(load_config=load_config)

    assert config.load_config.pt_load_map_location == pt_load_map_location
417
418
419


@pytest.mark.parametrize(
420
421
    ("model_id", "max_model_len", "expected_max_len", "should_raise"),
    [
422
423
424
        ("BAAI/bge-reranker-base", None, 512, False),
        ("BAAI/bge-reranker-base", 256, 256, False),
        ("BAAI/bge-reranker-base", 513, 512, True),
425
426
        ("deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", None, 131072, False),
        ("deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", 131073, 131072, True),
427
428
429
430
431
    ],
)
def test_get_and_verify_max_len(
    model_id, max_model_len, expected_max_len, should_raise
):
432
    """Test get_and_verify_max_len with different configurations."""
433
    model_config = ModelConfig(model_id)
434
435
436
437
438
439
440

    if should_raise:
        with pytest.raises(ValueError):
            model_config.get_and_verify_max_len(max_model_len)
    else:
        actual_max_len = model_config.get_and_verify_max_len(max_model_len)
        assert actual_max_len == expected_max_len
441
442
443
444
445
446
447
448
449
450
451


class MockConfig:
    """Simple mock object for testing maybe_pull_model_tokenizer_for_runai"""

    def __init__(self, model: str, tokenizer: str):
        self.model = model
        self.tokenizer = tokenizer
        self.model_weights = None


452
453
454
455
456
457
458
459
@pytest.mark.parametrize(
    "s3_url",
    [
        "s3://example-bucket-1/model/",
        "s3://example-bucket-2/model/",
    ],
)
@patch("vllm.transformers_utils.runai_utils.ObjectStorageModel.pull_files")
460
461
462
463
464
465
466
467
468
469
470
def test_s3_url_model_tokenizer_paths(mock_pull_files, s3_url):
    """Test that S3 URLs create deterministic local directories for model and
    tokenizer."""
    # Mock pull_files to avoid actually downloading files during tests
    mock_pull_files.return_value = None

    # Create first mock and run the method
    config1 = MockConfig(model=s3_url, tokenizer=s3_url)
    ModelConfig.maybe_pull_model_tokenizer_for_runai(config1, s3_url, s3_url)

    # Check that model and tokenizer point to existing directories
471
472
473
474
475
476
477
478
479
480
481
482
    assert os.path.exists(config1.model), (
        f"Model directory does not exist: {config1.model}"
    )
    assert os.path.isdir(config1.model), (
        f"Model path is not a directory: {config1.model}"
    )
    assert os.path.exists(config1.tokenizer), (
        f"Tokenizer directory does not exist: {config1.tokenizer}"
    )
    assert os.path.isdir(config1.tokenizer), (
        f"Tokenizer path is not a directory: {config1.tokenizer}"
    )
483
484

    # Verify that the paths are different from the original S3 URL
485
    assert config1.model != s3_url, "Model path should be converted to local directory"
486
    assert config1.tokenizer != s3_url, (
487
488
        "Tokenizer path should be converted to local directory"
    )
489
490
491
492
493
494
495
496
497
498

    # Store the original paths
    created_model_dir = config1.model
    create_tokenizer_dir = config1.tokenizer

    # Create a new mock and run the method with the same S3 URL
    config2 = MockConfig(model=s3_url, tokenizer=s3_url)
    ModelConfig.maybe_pull_model_tokenizer_for_runai(config2, s3_url, s3_url)

    # Check that the new directories exist
499
500
501
502
503
504
505
506
507
508
509
510
    assert os.path.exists(config2.model), (
        f"Model directory does not exist: {config2.model}"
    )
    assert os.path.isdir(config2.model), (
        f"Model path is not a directory: {config2.model}"
    )
    assert os.path.exists(config2.tokenizer), (
        f"Tokenizer directory does not exist: {config2.tokenizer}"
    )
    assert os.path.isdir(config2.tokenizer), (
        f"Tokenizer path is not a directory: {config2.tokenizer}"
    )
511
512
513
514

    # Verify that the paths are deterministic (same as before)
    assert config2.model == created_model_dir, (
        f"Model paths are not deterministic. "
515
516
        f"Original: {created_model_dir}, New: {config2.model}"
    )
517
518
    assert config2.tokenizer == create_tokenizer_dir, (
        f"Tokenizer paths are not deterministic. "
519
520
        f"Original: {create_tokenizer_dir}, New: {config2.tokenizer}"
    )
521
522


523
@patch("vllm.transformers_utils.runai_utils.ObjectStorageModel.pull_files")
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
def test_s3_url_different_models_create_different_directories(mock_pull_files):
    """Test that different S3 URLs create different local directories."""
    # Mock pull_files to avoid actually downloading files during tests
    mock_pull_files.return_value = None

    s3_url1 = "s3://example-bucket-1/model/"
    s3_url2 = "s3://example-bucket-2/model/"

    # Create mocks with different S3 URLs and run the method
    config1 = MockConfig(model=s3_url1, tokenizer=s3_url1)
    ModelConfig.maybe_pull_model_tokenizer_for_runai(config1, s3_url1, s3_url1)

    config2 = MockConfig(model=s3_url2, tokenizer=s3_url2)
    ModelConfig.maybe_pull_model_tokenizer_for_runai(config2, s3_url2, s3_url2)

    # Verify that different URLs produce different directories
    assert config1.model != config2.model, (
        f"Different S3 URLs should create different model directories. "
542
543
        f"URL1 model: {config1.model}, URL2 model: {config2.model}"
    )
544
545
546
    assert config1.tokenizer != config2.tokenizer, (
        f"Different S3 URLs should create different tokenizer directories. "
        f"URL1 tokenizer: {config1.tokenizer}, "
547
548
        f"URL2 tokenizer: {config2.tokenizer}"
    )
549
550
551

    # Verify that both sets of directories exist
    assert os.path.exists(config1.model) and os.path.isdir(config1.model)
552
    assert os.path.exists(config1.tokenizer) and os.path.isdir(config1.tokenizer)
553
    assert os.path.exists(config2.model) and os.path.isdir(config2.model)
554
    assert os.path.exists(config2.tokenizer) and os.path.isdir(config2.tokenizer)