Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
ba4be44c
Unverified
Commit
ba4be44c
authored
Apr 27, 2024
by
Nick Hill
Committed by
GitHub
Apr 27, 2024
Browse files
[BugFix] Fix return type of executor execute_model methods (#4402)
parent
d6e520e1
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
9 additions
and
8 deletions
+9
-8
vllm/executor/cpu_executor.py
vllm/executor/cpu_executor.py
+1
-1
vllm/executor/distributed_gpu_executor.py
vllm/executor/distributed_gpu_executor.py
+4
-3
vllm/executor/executor_base.py
vllm/executor/executor_base.py
+1
-1
vllm/executor/gpu_executor.py
vllm/executor/gpu_executor.py
+1
-1
vllm/executor/neuron_executor.py
vllm/executor/neuron_executor.py
+1
-1
vllm/executor/ray_gpu_executor.py
vllm/executor/ray_gpu_executor.py
+1
-1
No files found.
vllm/executor/cpu_executor.py
View file @
ba4be44c
...
@@ -109,7 +109,7 @@ class CPUExecutorAsync(CPUExecutor, ExecutorAsyncBase):
...
@@ -109,7 +109,7 @@ class CPUExecutorAsync(CPUExecutor, ExecutorAsyncBase):
blocks_to_swap_in
:
Dict
[
int
,
int
],
blocks_to_swap_in
:
Dict
[
int
,
int
],
blocks_to_swap_out
:
Dict
[
int
,
int
],
blocks_to_swap_out
:
Dict
[
int
,
int
],
blocks_to_copy
:
Dict
[
int
,
List
[
int
]],
blocks_to_copy
:
Dict
[
int
,
List
[
int
]],
)
->
SamplerOutput
:
)
->
List
[
SamplerOutput
]
:
output
=
await
make_async
(
self
.
driver_worker
.
execute_model
)(
output
=
await
make_async
(
self
.
driver_worker
.
execute_model
)(
seq_group_metadata_list
=
seq_group_metadata_list
,
seq_group_metadata_list
=
seq_group_metadata_list
,
blocks_to_swap_in
=
blocks_to_swap_in
,
blocks_to_swap_in
=
blocks_to_swap_in
,
...
...
vllm/executor/distributed_gpu_executor.py
View file @
ba4be44c
from
abc
import
abstractmethod
from
abc
import
abstractmethod
from
typing
import
Any
,
Dict
,
Optional
,
Set
,
Tuple
from
typing
import
Any
,
Dict
,
List
,
Optional
,
Set
,
Tuple
from
vllm.executor.executor_base
import
ExecutorAsyncBase
from
vllm.executor.executor_base
import
ExecutorAsyncBase
from
vllm.executor.gpu_executor
import
GPUExecutor
from
vllm.executor.gpu_executor
import
GPUExecutor
...
@@ -52,7 +52,7 @@ class DistributedGPUExecutor(GPUExecutor):
...
@@ -52,7 +52,7 @@ class DistributedGPUExecutor(GPUExecutor):
num_gpu_blocks
=
num_gpu_blocks
,
num_gpu_blocks
=
num_gpu_blocks
,
num_cpu_blocks
=
num_cpu_blocks
)
num_cpu_blocks
=
num_cpu_blocks
)
def
execute_model
(
self
,
*
args
,
**
kwargs
)
->
SamplerOutput
:
def
execute_model
(
self
,
*
args
,
**
kwargs
)
->
List
[
SamplerOutput
]
:
all_outputs
=
self
.
_run_workers
(
"execute_model"
,
all_outputs
=
self
.
_run_workers
(
"execute_model"
,
driver_args
=
args
,
driver_args
=
args
,
driver_kwargs
=
kwargs
)
driver_kwargs
=
kwargs
)
...
@@ -105,7 +105,8 @@ class DistributedGPUExecutorAsync(DistributedGPUExecutor, ExecutorAsyncBase):
...
@@ -105,7 +105,8 @@ class DistributedGPUExecutorAsync(DistributedGPUExecutor, ExecutorAsyncBase):
"""Runs the given method on all workers."""
"""Runs the given method on all workers."""
raise
NotImplementedError
raise
NotImplementedError
async
def
execute_model_async
(
self
,
*
args
,
**
kwargs
)
->
SamplerOutput
:
async
def
execute_model_async
(
self
,
*
args
,
**
kwargs
)
->
List
[
SamplerOutput
]:
all_outputs
=
await
self
.
_run_workers_async
(
"execute_model"
,
all_outputs
=
await
self
.
_run_workers_async
(
"execute_model"
,
driver_args
=
args
,
driver_args
=
args
,
driver_kwargs
=
kwargs
)
driver_kwargs
=
kwargs
)
...
...
vllm/executor/executor_base.py
View file @
ba4be44c
...
@@ -112,7 +112,7 @@ class ExecutorAsyncBase(ExecutorBase):
...
@@ -112,7 +112,7 @@ class ExecutorAsyncBase(ExecutorBase):
blocks_to_swap_in
:
Dict
[
int
,
int
],
blocks_to_swap_in
:
Dict
[
int
,
int
],
blocks_to_swap_out
:
Dict
[
int
,
int
],
blocks_to_swap_out
:
Dict
[
int
,
int
],
blocks_to_copy
:
Dict
[
int
,
List
[
int
]],
blocks_to_copy
:
Dict
[
int
,
List
[
int
]],
)
->
SamplerOutput
:
)
->
List
[
SamplerOutput
]
:
"""Executes one model step on the given sequences."""
"""Executes one model step on the given sequences."""
raise
NotImplementedError
raise
NotImplementedError
...
...
vllm/executor/gpu_executor.py
View file @
ba4be44c
...
@@ -163,7 +163,7 @@ class GPUExecutorAsync(GPUExecutor, ExecutorAsyncBase):
...
@@ -163,7 +163,7 @@ class GPUExecutorAsync(GPUExecutor, ExecutorAsyncBase):
blocks_to_swap_in
:
Dict
[
int
,
int
],
blocks_to_swap_in
:
Dict
[
int
,
int
],
blocks_to_swap_out
:
Dict
[
int
,
int
],
blocks_to_swap_out
:
Dict
[
int
,
int
],
blocks_to_copy
:
Dict
[
int
,
List
[
int
]],
blocks_to_copy
:
Dict
[
int
,
List
[
int
]],
)
->
SamplerOutput
:
)
->
List
[
SamplerOutput
]
:
output
=
await
make_async
(
self
.
driver_worker
.
execute_model
)(
output
=
await
make_async
(
self
.
driver_worker
.
execute_model
)(
seq_group_metadata_list
=
seq_group_metadata_list
,
seq_group_metadata_list
=
seq_group_metadata_list
,
blocks_to_swap_in
=
blocks_to_swap_in
,
blocks_to_swap_in
=
blocks_to_swap_in
,
...
...
vllm/executor/neuron_executor.py
View file @
ba4be44c
...
@@ -84,7 +84,7 @@ class NeuronExecutorAsync(NeuronExecutor, ExecutorAsyncBase):
...
@@ -84,7 +84,7 @@ class NeuronExecutorAsync(NeuronExecutor, ExecutorAsyncBase):
blocks_to_swap_in
:
Dict
[
int
,
int
],
blocks_to_swap_in
:
Dict
[
int
,
int
],
blocks_to_swap_out
:
Dict
[
int
,
int
],
blocks_to_swap_out
:
Dict
[
int
,
int
],
blocks_to_copy
:
Dict
[
int
,
List
[
int
]],
blocks_to_copy
:
Dict
[
int
,
List
[
int
]],
)
->
SamplerOutput
:
)
->
List
[
SamplerOutput
]
:
output
=
await
make_async
(
self
.
driver_worker
.
execute_model
)(
output
=
await
make_async
(
self
.
driver_worker
.
execute_model
)(
seq_group_metadata_list
=
seq_group_metadata_list
,
)
seq_group_metadata_list
=
seq_group_metadata_list
,
)
return
output
return
output
...
...
vllm/executor/ray_gpu_executor.py
View file @
ba4be44c
...
@@ -188,7 +188,7 @@ class RayGPUExecutor(DistributedGPUExecutor):
...
@@ -188,7 +188,7 @@ class RayGPUExecutor(DistributedGPUExecutor):
blocks_to_swap_in
:
Dict
[
int
,
int
],
blocks_to_swap_in
:
Dict
[
int
,
int
],
blocks_to_swap_out
:
Dict
[
int
,
int
],
blocks_to_swap_out
:
Dict
[
int
,
int
],
blocks_to_copy
:
Dict
[
int
,
List
[
int
]],
blocks_to_copy
:
Dict
[
int
,
List
[
int
]],
num_lookahead_slots
:
int
=
0
)
->
SamplerOutput
:
num_lookahead_slots
:
int
=
0
)
->
List
[
SamplerOutput
]
:
all_outputs
=
self
.
_run_workers
(
all_outputs
=
self
.
_run_workers
(
"execute_model"
,
"execute_model"
,
driver_kwargs
=
{
driver_kwargs
=
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment