"vscode:/vscode.git/clone" did not exist on "beb16b2c810a87b28e7b8a7aa29d26f842f654b9"
utils.py 3.25 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3

4
from typing import Any, Callable, Optional
5
6
7


## model functions
8
def deactivate_adapter(adapter_id: int, active_adapters: dict[int, None],
9
10
11
12
13
14
15
16
                       deactivate_func: Callable) -> bool:
    if adapter_id in active_adapters:
        deactivate_func(adapter_id)
        active_adapters.pop(adapter_id)
        return True
    return False


17
def add_adapter(adapter: Any, registered_adapters: dict[int, Any],
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
                capacity: int, add_func: Callable) -> bool:
    if adapter.id not in registered_adapters:
        if len(registered_adapters) >= capacity:
            raise RuntimeError('No free adapter slots.')
        add_func(adapter)
        registered_adapters[adapter.id] = adapter
        return True
    return False


def set_adapter_mapping(mapping: Any, last_mapping: Any,
                        set_mapping_func: Callable) -> Any:
    if last_mapping != mapping:
        set_mapping_func(mapping)
        return mapping
    return last_mapping


36
def remove_adapter(adapter_id: int, registered_adapters: dict[int, Any],
37
38
39
40
41
                   deactivate_func: Callable) -> bool:
    deactivate_func(adapter_id)
    return bool(registered_adapters.pop(adapter_id, None))


42
def list_adapters(registered_adapters: dict[int, Any]) -> dict[int, Any]:
43
44
45
46
    return dict(registered_adapters)


def get_adapter(adapter_id: int,
47
                registered_adapters: dict[int, Any]) -> Optional[Any]:
48
    return registered_adapters.get(adapter_id)
49
50
51


## worker functions
52
def set_active_adapters_worker(requests: set[Any], mapping: Optional[Any],
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
                               apply_adapters_func,
                               set_adapter_mapping_func) -> None:
    apply_adapters_func(requests)
    set_adapter_mapping_func(mapping)


def add_adapter_worker(adapter_request: Any, list_adapters_func,
                       load_adapter_func, add_adapter_func,
                       activate_adapter_func) -> bool:
    if adapter_request.adapter_id in list_adapters_func():
        return False
    loaded_adapter = load_adapter_func(adapter_request)
    loaded = add_adapter_func(loaded_adapter)
    activate_adapter_func(loaded_adapter.id)
    return loaded


70
def apply_adapters_worker(adapter_requests: set[Any], list_adapters_func,
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
                          adapter_slots: int, remove_adapter_func,
                          add_adapter_func) -> None:
    models_that_exist = list_adapters_func()
    models_map = {
        adapter_request.adapter_id: adapter_request
        for adapter_request in adapter_requests if adapter_request
    }
    if len(models_map) > adapter_slots:
        raise RuntimeError(
            f"Number of requested models ({len(models_map)}) is greater "
            f"than the number of GPU model slots "
            f"({adapter_slots}).")
    new_models = set(models_map)
    models_to_add = new_models - models_that_exist
    models_to_remove = models_that_exist - new_models
    for adapter_id in models_to_remove:
        remove_adapter_func(adapter_id)
    for adapter_id in models_to_add:
        add_adapter_func(models_map[adapter_id])


92
def list_adapters_worker(adapter_manager_list_adapters_func) -> set[int]:
93
    return set(adapter_manager_list_adapters_func())