Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
2cebda42
Unverified
Commit
2cebda42
authored
Nov 11, 2024
by
Isotr0py
Committed by
GitHub
Nov 11, 2024
Browse files
[Bugfix][Hardware][CPU] Fix broken encoder-decoder CPU runner (#10218)
Signed-off-by:
Isotr0py
<
2037008807@qq.com
>
parent
5fb1f935
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
16 additions
and
0 deletions
+16
-0
.buildkite/run-cpu-test-ppc64le.sh
.buildkite/run-cpu-test-ppc64le.sh
+2
-0
.buildkite/run-cpu-test.sh
.buildkite/run-cpu-test.sh
+2
-0
vllm/worker/cpu_embedding_model_runner.py
vllm/worker/cpu_embedding_model_runner.py
+1
-0
vllm/worker/cpu_enc_dec_model_runner.py
vllm/worker/cpu_enc_dec_model_runner.py
+11
-0
No files found.
.buildkite/run-cpu-test-ppc64le.sh
View file @
2cebda42
...
...
@@ -18,6 +18,8 @@ source /etc/environment
docker run
-itd
--entrypoint
/bin/bash
-v
~/.cache/huggingface:/root/.cache/huggingface
--privileged
=
true
--network
host
-e
HF_TOKEN
=
"
$HF_TOKEN
"
--name
cpu-test cpu-test
function
cpu_tests
()
{
set
-e
# Run basic model test
docker
exec
cpu-test bash
-c
"
set -e
...
...
.buildkite/run-cpu-test.sh
View file @
2cebda42
...
...
@@ -20,6 +20,8 @@ docker run -itd --entrypoint /bin/bash -v ~/.cache/huggingface:/root/.cache/hugg
--cpuset-mems
=
1
--privileged
=
true
--network
host
-e
HF_TOKEN
--env
VLLM_CPU_KVCACHE_SPACE
=
4
--shm-size
=
4g
--name
cpu-test-avx2 cpu-test-avx2
function
cpu_tests
()
{
set
-e
# offline inference
docker
exec
cpu-test-avx2 bash
-c
"
set -e
...
...
vllm/worker/cpu_embedding_model_runner.py
View file @
2cebda42
...
...
@@ -95,6 +95,7 @@ class CPUEmbeddingModelRunner(
model_input
.
seq_lens
)
return
dataclasses
.
replace
(
model_input
,
virtual_engine
=
virtual_engine
,
pooling_metadata
=
pooling_metadata
)
def
_prepare_pooling
(
...
...
vllm/worker/cpu_enc_dec_model_runner.py
View file @
2cebda42
...
...
@@ -4,6 +4,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Type, cast
import
torch
from
vllm.attention
import
AttentionMetadata
from
vllm.model_executor
import
SamplingMetadata
from
vllm.model_executor.layers.sampler
import
SamplerOutput
from
vllm.multimodal
import
MultiModalKwargs
from
vllm.sequence
import
IntermediateTensors
,
SequenceGroupMetadata
...
...
@@ -96,11 +97,21 @@ class CPUEncoderDecoderModelRunner(
encoder_input_positions_tensor
,
)
=
self
.
_prepare_encoder_model_input_tensors
(
seq_group_metadata_list
,
model_input
)
# Sampling metadata is only required for the final pp group
generators
=
self
.
get_generators
(
finished_requests_ids
)
sampling_metadata
=
SamplingMetadata
.
prepare
(
seq_group_metadata_list
,
model_input
.
seq_lens
,
model_input
.
query_lens
,
self
.
device
,
pin_memory
=
False
,
generators
=
generators
)
return
dataclasses
.
replace
(
model_input
,
sampling_metadata
=
sampling_metadata
,
attn_metadata
=
attn_metadata
,
encoder_input_tokens
=
encoder_input_tokens_tensor
,
encoder_input_positions
=
encoder_input_positions_tensor
,
virtual_engine
=
virtual_engine
,
)
def
_prepare_encoder_model_input_tensors
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment