Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
ad932a22
Unverified
Commit
ad932a22
authored
May 08, 2024
by
Antoni Baum
Committed by
GitHub
May 08, 2024
Browse files
[Core] Faster startup for LoRA enabled models (#4634)
parent
5510cf0e
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
47 additions
and
18 deletions
+47
-18
vllm/lora/models.py
vllm/lora/models.py
+10
-0
vllm/lora/worker_manager.py
vllm/lora/worker_manager.py
+22
-4
vllm/worker/model_runner.py
vllm/worker/model_runner.py
+15
-14
No files found.
vllm/lora/models.py
View file @
ad932a22
...
...
@@ -119,6 +119,16 @@ class LoRAModel:
self
.
rank
=
rank
self
.
loras
:
Dict
[
str
,
LoRALayerWeights
]
=
loras
def
clone
(
self
,
lora_model_id
:
int
)
->
"LoRAModel"
:
"""Return a copy of the object with different ids.
Will share the underlying tensors."""
return
self
.
__class__
(
lora_model_id
,
rank
=
self
.
rank
,
loras
=
self
.
loras
.
copy
(),
)
@
property
def
extra_vocab_size
(
self
)
->
int
:
return
max
(
lora
.
extra_vocab_size
...
...
vllm/lora/worker_manager.py
View file @
ad932a22
from
abc
import
ABC
,
abstractmethod
,
abstractproperty
from
typing
import
Any
,
Dict
,
List
,
Set
,
Type
from
contextlib
import
contextmanager
from
typing
import
Any
,
Dict
,
List
,
Literal
,
Set
,
Type
,
Union
import
torch
...
...
@@ -25,6 +26,17 @@ class AbstractWorkerLoRAManager(ABC):
self
.
device
=
device
self
.
lora_config
=
lora_config
# If False, do not cache. If None, cache is empty.
self
.
_cached_dummy_lora
:
Union
[
None
,
Literal
[
False
],
LoRAModel
]
=
False
@
contextmanager
def
dummy_lora_cache
(
self
):
"""Use this context manager to reuse the dummy lora model
to avoid creating it repeatedly."""
self
.
_cached_dummy_lora
=
None
yield
self
.
_cached_dummy_lora
=
False
@
abstractproperty
def
is_enabled
(
self
)
->
bool
:
...
...
...
@@ -174,9 +186,15 @@ class WorkerLoRAManager(AbstractWorkerLoRAManager):
def
add_dummy_lora
(
self
,
lora_request
:
LoRARequest
,
rank
:
int
)
->
bool
:
if
lora_request
.
lora_int_id
in
self
.
list_loras
():
return
False
return
self
.
_lora_manager
.
add_lora
(
self
.
_lora_manager
.
create_dummy_lora
(
lora_request
.
lora_int_id
,
rank
,
self
.
embedding_modules
))
if
isinstance
(
self
.
_cached_dummy_lora
,
LoRAModel
):
dummy_lora
=
self
.
_cached_dummy_lora
.
clone
(
lora_request
.
lora_int_id
)
else
:
dummy_lora
=
self
.
_lora_manager
.
create_dummy_lora
(
lora_request
.
lora_int_id
,
rank
,
self
.
embedding_modules
)
if
self
.
_cached_dummy_lora
is
None
:
self
.
_cached_dummy_lora
=
dummy_lora
return
self
.
_lora_manager
.
add_lora
(
dummy_lora
)
def
add_lora
(
self
,
lora_request
:
LoRARequest
)
->
bool
:
if
lora_request
.
lora_int_id
in
self
.
list_loras
():
...
...
vllm/worker/model_runner.py
View file @
ad932a22
...
...
@@ -835,6 +835,7 @@ class ModelRunner:
dummy_lora_requests
=
[]
dummy_lora_requests_per_seq
=
[]
if
self
.
lora_config
:
with
self
.
lora_manager
.
dummy_lora_cache
():
for
idx
in
range
(
self
.
lora_config
.
max_loras
):
lora_id
=
idx
+
1
dummy_lora_request
=
LoRARequest
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment