"examples/backends/trtllm/performance_sweeps/submit_disagg.sh" did not exist on "7e4eec2678fea114804fef6ec9f0dd1c7113f147"
test_worker.py 2.76 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3

4
5
6
7
8
import os
import random
import tempfile
from unittest.mock import patch

9
from vllm.config import (CacheConfig, DeviceConfig, LoadConfig, LoRAConfig,
10
11
                         ModelConfig, ParallelConfig, SchedulerConfig,
                         VllmConfig)
12
13
from vllm.lora.models import LoRAMapping
from vllm.lora.request import LoRARequest
14

15
from vllm.v1.worker.gpu_worker import Worker
16
from ..utils import models_path_prefix
17

Jee Jee Li's avatar
Jee Jee Li committed
18
NUM_LORAS = 16
19
20


21
22
@patch.dict(os.environ, {"RANK": "0"})
def test_worker_apply_lora(sql_lora_files):
23

24
    def set_active_loras(worker: Worker, lora_requests: list[LoRARequest]):
25
26
        lora_mapping = LoRAMapping([], [])

27
28
        worker.model_runner.lora_manager.set_active_adapters(
            lora_requests, lora_mapping)
29

30
    vllm_config = VllmConfig(
31
        model_config=ModelConfig(
32
            os.path.join(models_path_prefix, "meta-llama/Llama-2-7b-hf"),
33
34
            seed=0,
            dtype="float16",
35
            enforce_eager=True,
36
        ),
37
38
39
40
        load_config=LoadConfig(
            download_dir=None,
            load_format="dummy",
        ),
Jee Jee Li's avatar
Jee Jee Li committed
41
42
43
44
45
        parallel_config=ParallelConfig(
            pipeline_parallel_size=1,
            tensor_parallel_size=1,
            data_parallel_size=1,
        ),
46
        scheduler_config=SchedulerConfig("generate", 32, 32, 32),
47
        device_config=DeviceConfig("cuda"),
Jee Jee Li's avatar
Jee Jee Li committed
48
49
50
51
52
        cache_config=CacheConfig(
            block_size=16,
            swap_space=0,
            cache_dtype="auto",
        ),
Jee Jee Li's avatar
Jee Jee Li committed
53
54
55
        lora_config=LoRAConfig(max_lora_rank=8,
                               max_cpu_loras=NUM_LORAS,
                               max_loras=NUM_LORAS),
56
    )
57
    worker = Worker(
58
59
60
        vllm_config=vllm_config,
        local_rank=0,
        rank=0,
61
62
        distributed_init_method=f"file://{tempfile.mkstemp()[1]}",
    )
63

64
    worker.init_device()
65
66
    worker.load_model()

67
    set_active_loras(worker, [])
68
69
70
    assert worker.list_loras() == set()

    lora_requests = [
Jee Jee Li's avatar
Jee Jee Li committed
71
72
        LoRARequest(str(i + 1), i + 1, sql_lora_files)
        for i in range(NUM_LORAS)
73
74
    ]

75
    set_active_loras(worker, lora_requests)
76
77
78
79
80
    assert worker.list_loras() == {
        lora_request.lora_int_id
        for lora_request in lora_requests
    }

Jee Jee Li's avatar
Jee Jee Li committed
81
    for i in range(NUM_LORAS):
82
83
        random.seed(i)
        iter_lora_requests = random.choices(lora_requests,
Jee Jee Li's avatar
Jee Jee Li committed
84
                                            k=random.randint(1, NUM_LORAS))
85
        random.shuffle(iter_lora_requests)
Jee Jee Li's avatar
Jee Jee Li committed
86
        iter_lora_requests = iter_lora_requests[:-random.randint(0, NUM_LORAS)]
87
        set_active_loras(worker, lora_requests)
88
89
90
        assert worker.list_loras().issuperset(
            {lora_request.lora_int_id
             for lora_request in iter_lora_requests})