Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
5c2e66e4
Unverified
Commit
5c2e66e4
authored
Apr 12, 2024
by
Dylan Hawk
Committed by
GitHub
Apr 12, 2024
Browse files
[Bugfix] More type hint fixes for py 3.8 (#4039)
parent
546e7211
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
8 additions
and
8 deletions
+8
-8
vllm/executor/executor_base.py
vllm/executor/executor_base.py
+1
-1
vllm/worker/cpu_worker.py
vllm/worker/cpu_worker.py
+2
-2
vllm/worker/neuron_worker.py
vllm/worker/neuron_worker.py
+2
-2
vllm/worker/worker_base.py
vllm/worker/worker_base.py
+3
-3
No files found.
vllm/executor/executor_base.py
View file @
5c2e66e4
...
@@ -39,7 +39,7 @@ class ExecutorBase(ABC):
...
@@ -39,7 +39,7 @@ class ExecutorBase(ABC):
ExecutorBase may require modification of the result, e.g. to ensure the
ExecutorBase may require modification of the result, e.g. to ensure the
selected cache sizes are compatible with all workers.
selected cache sizes are compatible with all workers.
Returns a
t
uple[num_gpu_blocks, num_cpu_blocks], where num_gpu_blocks
Returns a
T
uple[num_gpu_blocks, num_cpu_blocks], where num_gpu_blocks
are blocks that are "active" on the device and can be appended to.
are blocks that are "active" on the device and can be appended to.
num_cpu_blocks refers to "swapped" blocks in CPU memory and cannot be
num_cpu_blocks refers to "swapped" blocks in CPU memory and cannot be
appended to.
appended to.
...
...
vllm/worker/cpu_worker.py
View file @
5c2e66e4
"""A CPU worker class."""
"""A CPU worker class."""
from
typing
import
Dict
,
List
,
Optional
from
typing
import
Dict
,
List
,
Optional
,
Tuple
import
torch
import
torch
import
torch.distributed
import
torch.distributed
...
@@ -157,7 +157,7 @@ class CPUWorker(LoraNotSupportedWorkerBase):
...
@@ -157,7 +157,7 @@ class CPUWorker(LoraNotSupportedWorkerBase):
def
load_model
(
self
):
def
load_model
(
self
):
self
.
model_runner
.
load_model
()
self
.
model_runner
.
load_model
()
def
determine_num_available_blocks
(
self
)
->
t
uple
[
int
,
int
]:
def
determine_num_available_blocks
(
self
)
->
T
uple
[
int
,
int
]:
"""Determine the number of blocks available for the KV cache.
"""Determine the number of blocks available for the KV cache.
This determines how many KV blocks can fit into the configured CPU
This determines how many KV blocks can fit into the configured CPU
...
...
vllm/worker/neuron_worker.py
View file @
5c2e66e4
"""A Neuron worker class."""
"""A Neuron worker class."""
from
typing
import
List
,
Optional
from
typing
import
List
,
Optional
,
Tuple
import
torch
import
torch
import
torch.distributed
import
torch.distributed
...
@@ -40,7 +40,7 @@ class NeuronWorker(LoraNotSupportedWorkerBase):
...
@@ -40,7 +40,7 @@ class NeuronWorker(LoraNotSupportedWorkerBase):
def
load_model
(
self
):
def
load_model
(
self
):
self
.
model_runner
.
load_model
()
self
.
model_runner
.
load_model
()
def
determine_num_available_blocks
(
self
)
->
t
uple
[
int
,
int
]:
def
determine_num_available_blocks
(
self
)
->
T
uple
[
int
,
int
]:
"""Determine the number of available KV blocks.
"""Determine the number of available KV blocks.
Swapping is not yet supported, so always return num_cpu_blocks=0.
Swapping is not yet supported, so always return num_cpu_blocks=0.
...
...
vllm/worker/worker_base.py
View file @
5c2e66e4
from
abc
import
ABC
,
abstractmethod
from
abc
import
ABC
,
abstractmethod
from
typing
import
Dict
,
List
from
typing
import
Dict
,
List
,
Tuple
from
vllm.lora.request
import
LoRARequest
from
vllm.lora.request
import
LoRARequest
from
vllm.sequence
import
SamplerOutput
,
SequenceGroupMetadata
from
vllm.sequence
import
SamplerOutput
,
SequenceGroupMetadata
...
@@ -18,14 +18,14 @@ class WorkerBase(ABC):
...
@@ -18,14 +18,14 @@ class WorkerBase(ABC):
raise
NotImplementedError
raise
NotImplementedError
@
abstractmethod
@
abstractmethod
def
determine_num_available_blocks
(
self
)
->
t
uple
[
int
,
int
]:
def
determine_num_available_blocks
(
self
)
->
T
uple
[
int
,
int
]:
"""Determine the number of available blocks for the GPU KV cache and
"""Determine the number of available blocks for the GPU KV cache and
swappable CPU KV cache.
swappable CPU KV cache.
The implementation may run profiling or other heuristics to determine
The implementation may run profiling or other heuristics to determine
the size of caches.
the size of caches.
Returns a
t
uple[num_gpu_blocks, num_cpu_blocks], where num_gpu_blocks
Returns a
T
uple[num_gpu_blocks, num_cpu_blocks], where num_gpu_blocks
are blocks that are "active" on the device and can be appended to.
are blocks that are "active" on the device and can be appended to.
num_cpu_blocks refers to "swapped" blocks in CPU memory and cannot be
num_cpu_blocks refers to "swapped" blocks in CPU memory and cannot be
appended to.
appended to.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment