Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
d559979c
Unverified
Commit
d559979c
authored
Nov 22, 2024
by
youkaichao
Committed by
GitHub
Nov 22, 2024
Browse files
[bugfix] fix cpu tests (#10585)
Signed-off-by:
youkaichao
<
youkaichao@gmail.com
>
parent
d345f409
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
16 additions
and
10 deletions
+16
-10
vllm/worker/cpu_embedding_model_runner.py
vllm/worker/cpu_embedding_model_runner.py
+3
-1
vllm/worker/cpu_enc_dec_model_runner.py
vllm/worker/cpu_enc_dec_model_runner.py
+3
-1
vllm/worker/cpu_model_runner.py
vllm/worker/cpu_model_runner.py
+10
-8
No files found.
vllm/worker/cpu_embedding_model_runner.py
View file @
d559979c
...
@@ -3,6 +3,7 @@ from typing import Any, Dict, List, Optional, Tuple, Type, Union
...
@@ -3,6 +3,7 @@ from typing import Any, Dict, List, Optional, Tuple, Type, Union
import
torch
import
torch
from
vllm.forward_context
import
set_forward_context
from
vllm.model_executor.pooling_metadata
import
PoolingMetadata
from
vllm.model_executor.pooling_metadata
import
PoolingMetadata
from
vllm.multimodal
import
MultiModalKwargs
from
vllm.multimodal
import
MultiModalKwargs
from
vllm.pooling_params
import
PoolingParams
from
vllm.pooling_params
import
PoolingParams
...
@@ -64,7 +65,8 @@ class CPUEmbeddingModelRunner(
...
@@ -64,7 +65,8 @@ class CPUEmbeddingModelRunner(
intermediate_tensors
,
intermediate_tensors
,
}
}
hidden_states
=
model_executable
(
**
execute_model_kwargs
)
with
set_forward_context
(
model_input
.
attn_metadata
,
self
.
vllm_config
):
hidden_states
=
model_executable
(
**
execute_model_kwargs
)
# Only perform pooling in the driver worker.
# Only perform pooling in the driver worker.
if
not
self
.
is_driver_worker
:
if
not
self
.
is_driver_worker
:
...
...
vllm/worker/cpu_enc_dec_model_runner.py
View file @
d559979c
...
@@ -4,6 +4,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Type, cast
...
@@ -4,6 +4,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Type, cast
import
torch
import
torch
from
vllm.attention
import
AttentionMetadata
from
vllm.attention
import
AttentionMetadata
from
vllm.forward_context
import
set_forward_context
from
vllm.model_executor
import
SamplingMetadata
from
vllm.model_executor
import
SamplingMetadata
from
vllm.model_executor.layers.sampler
import
SamplerOutput
from
vllm.model_executor.layers.sampler
import
SamplerOutput
from
vllm.multimodal
import
MultiModalKwargs
from
vllm.multimodal
import
MultiModalKwargs
...
@@ -303,7 +304,8 @@ class CPUEncoderDecoderModelRunner(
...
@@ -303,7 +304,8 @@ class CPUEncoderDecoderModelRunner(
intermediate_tensors
,
intermediate_tensors
,
}
}
hidden_states
=
model_executable
(
**
execute_model_kwargs
)
with
set_forward_context
(
model_input
.
attn_metadata
,
self
.
vllm_config
):
hidden_states
=
model_executable
(
**
execute_model_kwargs
)
# Compute the logits.
# Compute the logits.
logits
=
self
.
model
.
compute_logits
(
hidden_states
,
logits
=
self
.
model
.
compute_logits
(
hidden_states
,
...
...
vllm/worker/cpu_model_runner.py
View file @
d559979c
...
@@ -10,6 +10,7 @@ from torch import nn
...
@@ -10,6 +10,7 @@ from torch import nn
from
vllm.attention
import
AttentionMetadata
,
get_attn_backend
from
vllm.attention
import
AttentionMetadata
,
get_attn_backend
from
vllm.config
import
VllmConfig
from
vllm.config
import
VllmConfig
from
vllm.forward_context
import
set_forward_context
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.model_executor
import
SamplingMetadata
from
vllm.model_executor
import
SamplingMetadata
from
vllm.model_executor.layers.rotary_embedding
import
MRotaryEmbedding
from
vllm.model_executor.layers.rotary_embedding
import
MRotaryEmbedding
...
@@ -487,14 +488,15 @@ class CPUModelRunner(CPUModelRunnerBase[ModelInputForCPUWithSamplingMetadata]):
...
@@ -487,14 +488,15 @@ class CPUModelRunner(CPUModelRunnerBase[ModelInputForCPUWithSamplingMetadata]):
multimodal_kwargs
=
MultiModalKwargs
.
as_kwargs
(
multimodal_kwargs
=
MultiModalKwargs
.
as_kwargs
(
model_input
.
multi_modal_kwargs
,
device
=
self
.
device
)
model_input
.
multi_modal_kwargs
,
device
=
self
.
device
)
hidden_states
=
model_executable
(
with
set_forward_context
(
model_input
.
attn_metadata
,
self
.
vllm_config
):
input_ids
=
model_input
.
input_tokens
,
hidden_states
=
model_executable
(
positions
=
model_input
.
input_positions
,
input_ids
=
model_input
.
input_tokens
,
kv_caches
=
kv_caches
,
positions
=
model_input
.
input_positions
,
attn_metadata
=
model_input
.
attn_metadata
,
kv_caches
=
kv_caches
,
intermediate_tensors
=
intermediate_tensors
,
attn_metadata
=
model_input
.
attn_metadata
,
**
multimodal_kwargs
,
intermediate_tensors
=
intermediate_tensors
,
)
**
multimodal_kwargs
,
)
# Compute the logits.
# Compute the logits.
logits
=
self
.
model
.
compute_logits
(
hidden_states
,
logits
=
self
.
model
.
compute_logits
(
hidden_states
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment