Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
d559979c
Unverified
Commit
d559979c
authored
Nov 22, 2024
by
youkaichao
Committed by
GitHub
Nov 22, 2024
Browse files
[bugfix] fix cpu tests (#10585)
Signed-off-by:
youkaichao
<
youkaichao@gmail.com
>
parent
d345f409
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
16 additions
and
10 deletions
+16
-10
vllm/worker/cpu_embedding_model_runner.py
vllm/worker/cpu_embedding_model_runner.py
+3
-1
vllm/worker/cpu_enc_dec_model_runner.py
vllm/worker/cpu_enc_dec_model_runner.py
+3
-1
vllm/worker/cpu_model_runner.py
vllm/worker/cpu_model_runner.py
+10
-8
No files found.
vllm/worker/cpu_embedding_model_runner.py
View file @
d559979c
...
...
@@ -3,6 +3,7 @@ from typing import Any, Dict, List, Optional, Tuple, Type, Union
import
torch
from
vllm.forward_context
import
set_forward_context
from
vllm.model_executor.pooling_metadata
import
PoolingMetadata
from
vllm.multimodal
import
MultiModalKwargs
from
vllm.pooling_params
import
PoolingParams
...
...
@@ -64,6 +65,7 @@ class CPUEmbeddingModelRunner(
intermediate_tensors
,
}
with
set_forward_context
(
model_input
.
attn_metadata
,
self
.
vllm_config
):
hidden_states
=
model_executable
(
**
execute_model_kwargs
)
# Only perform pooling in the driver worker.
...
...
vllm/worker/cpu_enc_dec_model_runner.py
View file @
d559979c
...
...
@@ -4,6 +4,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Type, cast
import
torch
from
vllm.attention
import
AttentionMetadata
from
vllm.forward_context
import
set_forward_context
from
vllm.model_executor
import
SamplingMetadata
from
vllm.model_executor.layers.sampler
import
SamplerOutput
from
vllm.multimodal
import
MultiModalKwargs
...
...
@@ -303,6 +304,7 @@ class CPUEncoderDecoderModelRunner(
intermediate_tensors
,
}
with
set_forward_context
(
model_input
.
attn_metadata
,
self
.
vllm_config
):
hidden_states
=
model_executable
(
**
execute_model_kwargs
)
# Compute the logits.
...
...
vllm/worker/cpu_model_runner.py
View file @
d559979c
...
...
@@ -10,6 +10,7 @@ from torch import nn
from
vllm.attention
import
AttentionMetadata
,
get_attn_backend
from
vllm.config
import
VllmConfig
from
vllm.forward_context
import
set_forward_context
from
vllm.logger
import
init_logger
from
vllm.model_executor
import
SamplingMetadata
from
vllm.model_executor.layers.rotary_embedding
import
MRotaryEmbedding
...
...
@@ -487,6 +488,7 @@ class CPUModelRunner(CPUModelRunnerBase[ModelInputForCPUWithSamplingMetadata]):
multimodal_kwargs
=
MultiModalKwargs
.
as_kwargs
(
model_input
.
multi_modal_kwargs
,
device
=
self
.
device
)
with
set_forward_context
(
model_input
.
attn_metadata
,
self
.
vllm_config
):
hidden_states
=
model_executable
(
input_ids
=
model_input
.
input_tokens
,
positions
=
model_input
.
input_positions
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment