test_config.py 19 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3

4
import os
5
from dataclasses import MISSING, Field, asdict, dataclass, field
6
from unittest.mock import patch
7

8
9
import pytest

10
from vllm.compilation.backends import VllmBackend
11
from vllm.config import ModelConfig, PoolerConfig, VllmConfig, update_config
12
from vllm.config.load import LoadConfig
13
from vllm.config.utils import get_field
14
15
from vllm.model_executor.layers.pooler import PoolingType
from vllm.platforms import current_platform
16

17

18
19
20
21
22
23
24
25
def test_compile_config_repr_succeeds():
    # setup: VllmBackend mutates the config object
    config = VllmConfig()
    backend = VllmBackend(config)
    backend.configure_post_pass()

    # test that repr(config) succeeds
    val = repr(config)
26
27
    assert "VllmConfig" in val
    assert "inductor_passes" in val
28
29


30
31
32
33
34
@dataclass
class _TestConfigFields:
    a: int
    b: dict = field(default_factory=dict)
    c: str = "default"
35
36


37
def test_get_field():
38
    with pytest.raises(ValueError):
39
        get_field(_TestConfigFields, "a")
40

41
    b = get_field(_TestConfigFields, "b")
42
43
44
45
    assert isinstance(b, Field)
    assert b.default is MISSING
    assert b.default_factory is dict

46
    c = get_field(_TestConfigFields, "c")
47
48
49
50
51
    assert isinstance(c, Field)
    assert c.default == "default"
    assert c.default_factory is MISSING


52
53
@dataclass
class _TestNestedConfig:
54
    a: _TestConfigFields = field(default_factory=lambda: _TestConfigFields(a=0))
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78


def test_update_config():
    # Simple update
    config1 = _TestConfigFields(a=0)
    new_config1 = update_config(config1, {"a": 42})
    assert new_config1.a == 42
    # Nonexistent field
    with pytest.raises(AssertionError):
        new_config1 = update_config(config1, {"nonexistent": 1})
    # Nested update with dataclass
    config2 = _TestNestedConfig()
    new_inner_config = _TestConfigFields(a=1, c="new_value")
    new_config2 = update_config(config2, {"a": new_inner_config})
    assert new_config2.a == new_inner_config
    # Nested update with dict
    config3 = _TestNestedConfig()
    new_config3 = update_config(config3, {"a": {"c": "new_value"}})
    assert new_config3.a.c == "new_value"
    # Nested update with invalid type
    with pytest.raises(AssertionError):
        new_config3 = update_config(config3, {"a": "new_value"})


79
# Can remove once --task option is fully deprecated
80
@pytest.mark.parametrize(
81
    ("model_id", "expected_runner_type", "expected_convert_type", "expected_task"),
82
    [
83
84
85
        ("distilbert/distilgpt2", "generate", "none", "generate"),
        ("intfloat/multilingual-e5-small", "pooling", "none", "embed"),
        ("jason9693/Qwen2.5-1.5B-apeach", "pooling", "classify", "classify"),
86
        ("cross-encoder/ms-marco-MiniLM-L-6-v2", "pooling", "none", "classify"),
87
88
        ("Qwen/Qwen2.5-Math-RM-72B", "pooling", "none", "reward"),
        ("openai/whisper-small", "generate", "none", "transcription"),
89
90
    ],
)
91
92
93
def test_auto_task(
    model_id, expected_runner_type, expected_convert_type, expected_task
):
94
    config = ModelConfig(model_id, task="auto")
95
96

    assert config.runner_type == expected_runner_type
97
    assert config.convert_type == expected_convert_type
98

99

100
101
# Can remove once --task option is fully deprecated
@pytest.mark.parametrize(
102
    ("model_id", "expected_runner_type", "expected_convert_type", "expected_task"),
103
104
105
106
    [
        ("distilbert/distilgpt2", "pooling", "embed", "embed"),
        ("intfloat/multilingual-e5-small", "pooling", "embed", "embed"),
        ("jason9693/Qwen2.5-1.5B-apeach", "pooling", "classify", "classify"),
107
        ("cross-encoder/ms-marco-MiniLM-L-6-v2", "pooling", "classify", "classify"),
108
109
110
111
        ("Qwen/Qwen2.5-Math-RM-72B", "pooling", "embed", "embed"),
        ("openai/whisper-small", "pooling", "embed", "embed"),
    ],
)
112
113
114
def test_score_task(
    model_id, expected_runner_type, expected_convert_type, expected_task
):
115
    config = ModelConfig(model_id, task="score")
116

117
118
119
120
121
    assert config.runner_type == expected_runner_type
    assert config.convert_type == expected_convert_type


# Can remove once --task option is fully deprecated
122
@pytest.mark.parametrize(
123
    ("model_id", "expected_runner_type", "expected_convert_type", "expected_task"),
124
    [
125
        ("openai/whisper-small", "generate", "none", "transcription"),
126
127
    ],
)
128
129
130
def test_transcription_task(
    model_id, expected_runner_type, expected_convert_type, expected_task
):
131
    config = ModelConfig(model_id, task="transcription")
132

133
    assert config.runner_type == expected_runner_type
134
    assert config.convert_type == expected_convert_type
135
136


137
138
139
140
141
142
143
144
145
146
147
148
149
@pytest.mark.parametrize(
    ("model_id", "expected_runner_type", "expected_convert_type"),
    [
        ("distilbert/distilgpt2", "generate", "none"),
        ("intfloat/multilingual-e5-small", "pooling", "none"),
        ("jason9693/Qwen2.5-1.5B-apeach", "pooling", "classify"),
        ("cross-encoder/ms-marco-MiniLM-L-6-v2", "pooling", "none"),
        ("Qwen/Qwen2.5-Math-RM-72B", "pooling", "none"),
        ("openai/whisper-small", "generate", "none"),
    ],
)
def test_auto_runner(model_id, expected_runner_type, expected_convert_type):
    config = ModelConfig(model_id, runner="auto")
150
151

    assert config.runner_type == expected_runner_type
152
    assert config.convert_type == expected_convert_type
153
154
155


@pytest.mark.parametrize(
156
    ("model_id", "expected_runner_type", "expected_convert_type"),
157
    [
158
159
160
161
162
163
        ("distilbert/distilgpt2", "pooling", "embed"),
        ("intfloat/multilingual-e5-small", "pooling", "none"),
        ("jason9693/Qwen2.5-1.5B-apeach", "pooling", "classify"),
        ("cross-encoder/ms-marco-MiniLM-L-6-v2", "pooling", "none"),
        ("Qwen/Qwen2.5-Math-RM-72B", "pooling", "none"),
        ("openai/whisper-small", "pooling", "embed"),
164
165
    ],
)
166
167
def test_pooling_runner(model_id, expected_runner_type, expected_convert_type):
    config = ModelConfig(model_id, runner="pooling")
168
169

    assert config.runner_type == expected_runner_type
170
    assert config.convert_type == expected_convert_type
171
172


173
174
175
176
177
178
179
180
181
182
183
@pytest.mark.parametrize(
    ("model_id", "expected_runner_type", "expected_convert_type"),
    [
        ("Qwen/Qwen2.5-1.5B-Instruct", "draft", "none"),
    ],
)
def test_draft_runner(model_id, expected_runner_type, expected_convert_type):
    config = ModelConfig(model_id, runner="draft")

    assert config.runner_type == expected_runner_type
    assert config.convert_type == expected_convert_type
184
185


186
187
188
189
190
191
192
193
194
195
MODEL_IDS_EXPECTED = [
    ("Qwen/Qwen1.5-7B", 32768),
    ("mistralai/Mistral-7B-v0.1", 4096),
    ("mistralai/Mistral-7B-Instruct-v0.2", 32768),
]


@pytest.mark.parametrize("model_id_expected", MODEL_IDS_EXPECTED)
def test_disable_sliding_window(model_id_expected):
    model_id, expected = model_id_expected
196
    model_config = ModelConfig(model_id, disable_sliding_window=True)
197
198
    assert model_config.max_model_len == expected

199

200
201
202
@pytest.mark.skipif(
    current_platform.is_rocm(), reason="Xformers backend is not supported on ROCm."
)
203
204
def test_get_pooling_config():
    model_id = "sentence-transformers/all-MiniLM-L12-v2"
205
    model_config = ModelConfig(model_id)
206

207
208
209
    assert model_config.pooler_config is not None
    assert model_config.pooler_config.normalize
    assert model_config.pooler_config.pooling_type == PoolingType.MEAN.name
210
211


212
213
214
@pytest.mark.skipif(
    current_platform.is_rocm(), reason="Xformers backend is not supported on ROCm."
)
215
216
def test_get_pooling_config_from_args():
    model_id = "sentence-transformers/all-MiniLM-L12-v2"
217
218
    pooler_config = PoolerConfig(pooling_type="CLS", normalize=True)
    model_config = ModelConfig(model_id, pooler_config=pooler_config)
219

220
    assert asdict(model_config.pooler_config) == asdict(pooler_config)
221
222


223
224
225
226
227
228
@pytest.mark.parametrize(
    ("model_id", "default_pooling_type", "pooling_type"),
    [
        ("tomaarsen/Qwen3-Reranker-0.6B-seq-cls", "LAST", "LAST"),  # LLM
        ("intfloat/e5-small", "CLS", "MEAN"),  # BertModel
        ("Qwen/Qwen2.5-Math-RM-72B", "ALL", "ALL"),  # reward
229
230
231
        ("Qwen/Qwen2.5-Math-PRM-7B", "STEP", "STEP"),  # step reward
    ],
)
232
233
234
235
236
237
def test_default_pooling_type(model_id, default_pooling_type, pooling_type):
    model_config = ModelConfig(model_id)
    assert model_config._model_info.default_pooling_type == default_pooling_type
    assert model_config.pooler_config.pooling_type == pooling_type


238
239
240
@pytest.mark.skipif(
    current_platform.is_rocm(), reason="Xformers backend is not supported on ROCm."
)
241
def test_get_bert_tokenization_sentence_transformer_config():
242
243
    model_id = "BAAI/bge-base-en-v1.5"
    bge_model_config = ModelConfig(model_id)
244
245
246
247
248
249
250

    bert_bge_model_config = bge_model_config._get_encoder_config()

    assert bert_bge_model_config["max_seq_length"] == 512
    assert bert_bge_model_config["do_lower_case"]


251
def test_rope_customization():
252
    TEST_ROPE_SCALING = {"rope_type": "dynamic", "factor": 2.0}
253
    TEST_ROPE_THETA = 16_000_000.0
254
    LONGCHAT_ROPE_SCALING = {"rope_type": "linear", "factor": 8.0}
255

256
    llama_model_config = ModelConfig("meta-llama/Meta-Llama-3-8B-Instruct")
257
    assert getattr(llama_model_config.hf_config, "rope_scaling", None) is None
258
    assert getattr(llama_model_config.hf_config, "rope_theta", None) == 500_000
259
260
261
262
    assert llama_model_config.max_model_len == 8192

    llama_model_config = ModelConfig(
        "meta-llama/Meta-Llama-3-8B-Instruct",
263
264
265
266
        hf_overrides={
            "rope_scaling": TEST_ROPE_SCALING,
            "rope_theta": TEST_ROPE_THETA,
        },
267
    )
268
269
270
271
    assert (
        getattr(llama_model_config.hf_config, "rope_scaling", None) == TEST_ROPE_SCALING
    )
    assert getattr(llama_model_config.hf_config, "rope_theta", None) == TEST_ROPE_THETA
272
273
    assert llama_model_config.max_model_len == 16384

274
    longchat_model_config = ModelConfig("lmsys/longchat-13b-16k")
275
276
277
    # Check if LONGCHAT_ROPE_SCALING entries are in longchat_model_config
    assert all(
        longchat_model_config.hf_config.rope_scaling.get(key) == value
278
279
        for key, value in LONGCHAT_ROPE_SCALING.items()
    )
280
281
282
283
    assert longchat_model_config.max_model_len == 16384

    longchat_model_config = ModelConfig(
        "lmsys/longchat-13b-16k",
284
285
286
        hf_overrides={
            "rope_scaling": TEST_ROPE_SCALING,
        },
287
    )
288
289
290
291
    assert (
        getattr(longchat_model_config.hf_config, "rope_scaling", None)
        == TEST_ROPE_SCALING
    )
292
    assert longchat_model_config.max_model_len == 4096
293
294


295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
def test_nested_hf_overrides():
    """Test that nested hf_overrides work correctly."""
    # Test with a model that has text_config
    model_config = ModelConfig(
        "Qwen/Qwen2-VL-2B-Instruct",
        hf_overrides={
            "text_config": {
                "hidden_size": 1024,
            },
        },
    )
    assert model_config.hf_config.text_config.hidden_size == 1024

    # Test with deeply nested overrides
    model_config = ModelConfig(
        "Qwen/Qwen2-VL-2B-Instruct",
        hf_overrides={
            "text_config": {
                "hidden_size": 2048,
                "num_attention_heads": 16,
            },
            "vision_config": {
                "hidden_size": 512,
            },
        },
    )
    assert model_config.hf_config.text_config.hidden_size == 2048
    assert model_config.hf_config.text_config.num_attention_heads == 16
    assert model_config.hf_config.vision_config.hidden_size == 512


326
327
328
329
330
331
332
333
334
335
336
@pytest.mark.skipif(
    current_platform.is_rocm(), reason="Encoder Decoder models not supported on ROCm."
)
@pytest.mark.parametrize(
    ("model_id", "is_encoder_decoder"),
    [
        ("facebook/opt-125m", False),
        ("openai/whisper-tiny", True),
        ("meta-llama/Llama-3.2-1B-Instruct", False),
    ],
)
337
def test_is_encoder_decoder(model_id, is_encoder_decoder):
338
    config = ModelConfig(model_id)
339
340
341
342

    assert config.is_encoder_decoder == is_encoder_decoder


343
344
345
346
347
348
349
@pytest.mark.parametrize(
    ("model_id", "uses_mrope"),
    [
        ("facebook/opt-125m", False),
        ("Qwen/Qwen2-VL-2B-Instruct", True),
    ],
)
350
def test_uses_mrope(model_id, uses_mrope):
351
    config = ModelConfig(model_id)
352
353

    assert config.uses_mrope == uses_mrope
354
355
356
357
358


def test_generation_config_loading():
    model_id = "Qwen/Qwen2.5-1.5B-Instruct"

359
    # When set generation_config to "vllm", the default generation config
360
    # will not be loaded.
361
    model_config = ModelConfig(model_id, generation_config="vllm")
362
363
364
365
    assert model_config.get_diff_sampling_param() == {}

    # When set generation_config to "auto", the default generation config
    # should be loaded.
366
    model_config = ModelConfig(model_id, generation_config="auto")
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382

    correct_generation_config = {
        "repetition_penalty": 1.1,
        "temperature": 0.7,
        "top_p": 0.8,
        "top_k": 20,
    }

    assert model_config.get_diff_sampling_param() == correct_generation_config

    # The generation config could be overridden by the user.
    override_generation_config = {"temperature": 0.5, "top_k": 5}

    model_config = ModelConfig(
        model_id,
        generation_config="auto",
383
384
        override_generation_config=override_generation_config,
    )
385
386
387
388
389
390

    override_result = correct_generation_config.copy()
    override_result.update(override_generation_config)

    assert model_config.get_diff_sampling_param() == override_result

391
    # When generation_config is set to "vllm" and override_generation_config
392
393
394
    # is set, the override_generation_config should be used directly.
    model_config = ModelConfig(
        model_id,
395
        generation_config="vllm",
396
397
        override_generation_config=override_generation_config,
    )
398
399

    assert model_config.get_diff_sampling_param() == override_generation_config
400
401


402
403
404
405
406
407
408
@pytest.mark.parametrize(
    "pt_load_map_location",
    [
        "cuda",
        {"": "cuda"},
    ],
)
409
410
411
412
413
def test_load_config_pt_load_map_location(pt_load_map_location):
    load_config = LoadConfig(pt_load_map_location=pt_load_map_location)
    config = VllmConfig(load_config=load_config)

    assert config.load_config.pt_load_map_location == pt_load_map_location
414
415
416


@pytest.mark.parametrize(
417
418
    ("model_id", "max_model_len", "expected_max_len", "should_raise"),
    [
419
420
421
        ("BAAI/bge-reranker-base", None, 512, False),
        ("BAAI/bge-reranker-base", 256, 256, False),
        ("BAAI/bge-reranker-base", 513, 512, True),
422
423
        ("deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", None, 131072, False),
        ("deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", 131073, 131072, True),
424
425
426
427
428
    ],
)
def test_get_and_verify_max_len(
    model_id, max_model_len, expected_max_len, should_raise
):
429
    """Test get_and_verify_max_len with different configurations."""
430
    model_config = ModelConfig(model_id)
431
432
433
434
435
436
437

    if should_raise:
        with pytest.raises(ValueError):
            model_config.get_and_verify_max_len(max_model_len)
    else:
        actual_max_len = model_config.get_and_verify_max_len(max_model_len)
        assert actual_max_len == expected_max_len
438
439
440
441
442
443
444
445
446
447
448


class MockConfig:
    """Simple mock object for testing maybe_pull_model_tokenizer_for_runai"""

    def __init__(self, model: str, tokenizer: str):
        self.model = model
        self.tokenizer = tokenizer
        self.model_weights = None


449
450
451
452
453
454
455
456
@pytest.mark.parametrize(
    "s3_url",
    [
        "s3://example-bucket-1/model/",
        "s3://example-bucket-2/model/",
    ],
)
@patch("vllm.transformers_utils.runai_utils.ObjectStorageModel.pull_files")
457
458
459
460
461
462
463
464
465
466
467
def test_s3_url_model_tokenizer_paths(mock_pull_files, s3_url):
    """Test that S3 URLs create deterministic local directories for model and
    tokenizer."""
    # Mock pull_files to avoid actually downloading files during tests
    mock_pull_files.return_value = None

    # Create first mock and run the method
    config1 = MockConfig(model=s3_url, tokenizer=s3_url)
    ModelConfig.maybe_pull_model_tokenizer_for_runai(config1, s3_url, s3_url)

    # Check that model and tokenizer point to existing directories
468
469
470
471
472
473
474
475
476
477
478
479
    assert os.path.exists(config1.model), (
        f"Model directory does not exist: {config1.model}"
    )
    assert os.path.isdir(config1.model), (
        f"Model path is not a directory: {config1.model}"
    )
    assert os.path.exists(config1.tokenizer), (
        f"Tokenizer directory does not exist: {config1.tokenizer}"
    )
    assert os.path.isdir(config1.tokenizer), (
        f"Tokenizer path is not a directory: {config1.tokenizer}"
    )
480
481

    # Verify that the paths are different from the original S3 URL
482
    assert config1.model != s3_url, "Model path should be converted to local directory"
483
    assert config1.tokenizer != s3_url, (
484
485
        "Tokenizer path should be converted to local directory"
    )
486
487
488
489
490
491
492
493
494
495

    # Store the original paths
    created_model_dir = config1.model
    create_tokenizer_dir = config1.tokenizer

    # Create a new mock and run the method with the same S3 URL
    config2 = MockConfig(model=s3_url, tokenizer=s3_url)
    ModelConfig.maybe_pull_model_tokenizer_for_runai(config2, s3_url, s3_url)

    # Check that the new directories exist
496
497
498
499
500
501
502
503
504
505
506
507
    assert os.path.exists(config2.model), (
        f"Model directory does not exist: {config2.model}"
    )
    assert os.path.isdir(config2.model), (
        f"Model path is not a directory: {config2.model}"
    )
    assert os.path.exists(config2.tokenizer), (
        f"Tokenizer directory does not exist: {config2.tokenizer}"
    )
    assert os.path.isdir(config2.tokenizer), (
        f"Tokenizer path is not a directory: {config2.tokenizer}"
    )
508
509
510
511

    # Verify that the paths are deterministic (same as before)
    assert config2.model == created_model_dir, (
        f"Model paths are not deterministic. "
512
513
        f"Original: {created_model_dir}, New: {config2.model}"
    )
514
515
    assert config2.tokenizer == create_tokenizer_dir, (
        f"Tokenizer paths are not deterministic. "
516
517
        f"Original: {create_tokenizer_dir}, New: {config2.tokenizer}"
    )
518
519


520
@patch("vllm.transformers_utils.runai_utils.ObjectStorageModel.pull_files")
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
def test_s3_url_different_models_create_different_directories(mock_pull_files):
    """Test that different S3 URLs create different local directories."""
    # Mock pull_files to avoid actually downloading files during tests
    mock_pull_files.return_value = None

    s3_url1 = "s3://example-bucket-1/model/"
    s3_url2 = "s3://example-bucket-2/model/"

    # Create mocks with different S3 URLs and run the method
    config1 = MockConfig(model=s3_url1, tokenizer=s3_url1)
    ModelConfig.maybe_pull_model_tokenizer_for_runai(config1, s3_url1, s3_url1)

    config2 = MockConfig(model=s3_url2, tokenizer=s3_url2)
    ModelConfig.maybe_pull_model_tokenizer_for_runai(config2, s3_url2, s3_url2)

    # Verify that different URLs produce different directories
    assert config1.model != config2.model, (
        f"Different S3 URLs should create different model directories. "
539
540
        f"URL1 model: {config1.model}, URL2 model: {config2.model}"
    )
541
542
543
    assert config1.tokenizer != config2.tokenizer, (
        f"Different S3 URLs should create different tokenizer directories. "
        f"URL1 tokenizer: {config1.tokenizer}, "
544
545
        f"URL2 tokenizer: {config2.tokenizer}"
    )
546
547
548

    # Verify that both sets of directories exist
    assert os.path.exists(config1.model) and os.path.isdir(config1.model)
549
    assert os.path.exists(config1.tokenizer) and os.path.isdir(config1.tokenizer)
550
    assert os.path.exists(config2.model) and os.path.isdir(config2.model)
551
    assert os.path.exists(config2.tokenizer) and os.path.isdir(config2.tokenizer)