Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
4141608c
"vscode:/vscode.git/clone" did not exist on "2abbd351ef6436860cb3c64f66b1452c1d941fe4"
Unverified
Commit
4141608c
authored
Oct 15, 2024
by
Kunshang Ji
Committed by
GitHub
Oct 14, 2024
Browse files
[Hardware][intel GPU] add async output process for xpu (#8897)
parent
dfe43a20
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
8 additions
and
4 deletions
+8
-4
vllm/config.py
vllm/config.py
+2
-2
vllm/worker/xpu_model_runner.py
vllm/worker/xpu_model_runner.py
+6
-2
No files found.
vllm/config.py
View file @
4141608c
...
...
@@ -361,9 +361,9 @@ class ModelConfig:
# Reminder: Please update docs/source/serving/compatibility_matrix.rst
# If the feature combo become valid
if
device_config
.
device_type
not
in
(
"cuda"
,
"tpu"
):
if
device_config
.
device_type
not
in
(
"cuda"
,
"tpu"
,
"xpu"
):
logger
.
warning
(
"Async output processing is only supported for CUDA
or T
PU. "
"Async output processing is only supported for CUDA
, TPU, X
PU. "
"Disabling it for other platforms."
)
self
.
use_async_output_proc
=
False
return
...
...
vllm/worker/xpu_model_runner.py
View file @
4141608c
...
...
@@ -2,8 +2,8 @@ import dataclasses
import
time
import
weakref
from
dataclasses
import
dataclass
from
typing
import
(
TYPE_CHECKING
,
Any
,
Dict
,
List
,
Optional
,
Tuple
,
Type
,
TypeVar
)
from
typing
import
(
TYPE_CHECKING
,
Any
,
Callable
,
Dict
,
List
,
Optional
,
Tuple
,
Type
,
TypeVar
)
import
torch
import
torch.nn
as
nn
...
...
@@ -57,6 +57,7 @@ class ModelInputForXPU(ModelRunnerInputBase):
virtual_engine
:
Optional
[
int
]
=
None
seq_lens
:
Optional
[
List
[
int
]]
=
None
query_lens
:
Optional
[
List
[
int
]]
=
None
async_callback
:
Optional
[
Callable
]
=
None
def
as_broadcastable_tensor_dict
(
self
)
->
Dict
[
str
,
Any
]:
tensor_dict
=
{
...
...
@@ -582,6 +583,9 @@ class XPUModelRunner(ModelRunnerBase[ModelInputForXPUWithSamplingMetadata]):
if
not
self
.
is_driver_worker
:
return
[]
if
model_input
.
async_callback
is
not
None
:
model_input
.
async_callback
()
# Sample the next token.
output
:
SamplerOutput
=
self
.
model
.
sample
(
logits
=
logits
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment