Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
80b57f00
Unverified
Commit
80b57f00
authored
Oct 08, 2024
by
Kunshang Ji
Committed by
GitHub
Oct 08, 2024
Browse files
[Intel GPU] Fix xpu decode input (#9145)
parent
04c12f81
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
14 additions
and
7 deletions
+14
-7
vllm/worker/xpu_model_runner.py
vllm/worker/xpu_model_runner.py
+14
-7
No files found.
vllm/worker/xpu_model_runner.py
View file @
80b57f00
...
@@ -15,6 +15,7 @@ from vllm.config import (CacheConfig, DeviceConfig, LoadConfig, LoRAConfig,
...
@@ -15,6 +15,7 @@ from vllm.config import (CacheConfig, DeviceConfig, LoadConfig, LoRAConfig,
from
vllm.distributed
import
get_pp_group
from
vllm.distributed
import
get_pp_group
from
vllm.inputs
import
INPUT_REGISTRY
,
InputRegistry
from
vllm.inputs
import
INPUT_REGISTRY
,
InputRegistry
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.model_executor
import
SamplingMetadataCache
from
vllm.model_executor.layers.sampler
import
SamplerOutput
from
vllm.model_executor.layers.sampler
import
SamplerOutput
from
vllm.model_executor.model_loader
import
get_model
from
vllm.model_executor.model_loader
import
get_model
from
vllm.multimodal
import
(
MULTIMODAL_REGISTRY
,
BatchedTensorInputs
,
from
vllm.multimodal
import
(
MULTIMODAL_REGISTRY
,
BatchedTensorInputs
,
...
@@ -136,7 +137,7 @@ class ModelInputForXPUBuilder(ModelRunnerInputBuilderBase[ModelInputForXPU]):
...
@@ -136,7 +137,7 @@ class ModelInputForXPUBuilder(ModelRunnerInputBuilderBase[ModelInputForXPU]):
(
input_tokens
,
input_positions
,
(
input_tokens
,
input_positions
,
attn_metadata
)
=
self
.
_prepare_decode
(
attn_metadata
)
=
self
.
_prepare_decode
(
self
.
seq_group_metadata_list
)
self
.
seq_group_metadata_list
)
seq_lens
=
[]
seq_lens
=
None
multi_modal_kwargs
=
None
multi_modal_kwargs
=
None
return
self
.
model_input_cls
(
return
self
.
model_input_cls
(
...
@@ -390,6 +391,10 @@ class XPUModelRunner(ModelRunnerBase[ModelInputForXPUWithSamplingMetadata]):
...
@@ -390,6 +391,10 @@ class XPUModelRunner(ModelRunnerBase[ModelInputForXPUWithSamplingMetadata]):
# Lazy initialization.
# Lazy initialization.
self
.
model
:
nn
.
Module
# Set after init_Model
self
.
model
:
nn
.
Module
# Set after init_Model
self
.
sampling_metadata_cache
:
SamplingMetadataCache
=
\
SamplingMetadataCache
()
\
if
self
.
parallel_config
.
pipeline_parallel_size
==
1
else
None
def
load_model
(
self
)
->
None
:
def
load_model
(
self
)
->
None
:
with
DeviceMemoryProfiler
()
as
m
:
with
DeviceMemoryProfiler
()
as
m
:
self
.
model
=
get_model
(
self
.
model
=
get_model
(
...
@@ -524,12 +529,14 @@ class XPUModelRunner(ModelRunnerBase[ModelInputForXPUWithSamplingMetadata]):
...
@@ -524,12 +529,14 @@ class XPUModelRunner(ModelRunnerBase[ModelInputForXPUWithSamplingMetadata]):
seq_group_metadata_list
,
finished_requests_ids
)
seq_group_metadata_list
,
finished_requests_ids
)
# Sampling metadata is only required for the final pp group
# Sampling metadata is only required for the final pp group
generators
=
self
.
get_generators
(
finished_requests_ids
)
generators
=
self
.
get_generators
(
finished_requests_ids
)
sampling_metadata
=
SamplingMetadata
.
prepare
(
seq_group_metadata_list
,
sampling_metadata
=
SamplingMetadata
.
prepare
(
model_input
.
seq_lens
,
seq_group_metadata_list
,
model_input
.
query_lens
,
model_input
.
seq_lens
,
self
.
device
,
model_input
.
query_lens
,
pin_memory
=
False
,
self
.
device
,
generators
=
generators
)
pin_memory
=
False
,
generators
=
generators
,
cache
=
self
.
sampling_metadata_cache
)
return
dataclasses
.
replace
(
model_input
,
return
dataclasses
.
replace
(
model_input
,
sampling_metadata
=
sampling_metadata
,
sampling_metadata
=
sampling_metadata
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment