Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
577bb34f
Unverified
Commit
577bb34f
authored
Nov 17, 2025
by
Li, Jiang
Committed by
GitHub
Nov 17, 2025
Browse files
[CPU][Bugfix] Fix _to_list in CPU model runner (#28824)
Signed-off-by:
jiang1.li
<
jiang1.li@intel.com
>
parent
3380ed5e
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
8 additions
and
3 deletions
+8
-3
csrc/cpu/torch_bindings.cpp
csrc/cpu/torch_bindings.cpp
+8
-0
vllm/v1/worker/cpu_model_runner.py
vllm/v1/worker/cpu_model_runner.py
+0
-3
No files found.
csrc/cpu/torch_bindings.cpp
View file @
577bb34f
...
...
@@ -100,6 +100,9 @@ void cpu_attention_with_kv_cache(
const
torch
::
Tensor
&
scheduler_metadata
,
const
std
::
optional
<
torch
::
Tensor
>&
s_aux
);
// Note: just for avoiding importing errors
void
placeholder_op
()
{
TORCH_CHECK
(
false
,
"Unimplemented"
);
}
TORCH_LIBRARY_EXPAND
(
TORCH_EXTENSION_NAME
,
ops
)
{
// vLLM custom ops
...
...
@@ -275,6 +278,11 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
"sliding_window_left, SymInt sliding_window_right, Tensor block_table, "
"float softcap, Tensor sheduler_metadata, Tensor? s_aux) -> ()"
,
&
cpu_attention_with_kv_cache
);
// placeholders
ops
.
def
(
"static_scaled_fp8_quant() -> ()"
,
placeholder_op
);
ops
.
def
(
"dynamic_scaled_fp8_quant() -> ()"
,
placeholder_op
);
ops
.
def
(
"dynamic_per_token_scaled_fp8_quant() -> ()"
,
placeholder_op
);
}
TORCH_LIBRARY_EXPAND
(
CONCAT
(
TORCH_EXTENSION_NAME
,
_utils
),
utils
)
{
...
...
vllm/v1/worker/cpu_model_runner.py
View file @
577bb34f
...
...
@@ -80,9 +80,6 @@ class CPUModelRunner(GPUModelRunner):
def
_sync_device
(
self
)
->
None
:
pass
def
_to_list
(
self
,
sampled_token_ids
:
torch
.
Tensor
)
->
list
[
list
[
int
]]:
return
sampled_token_ids
.
tolist
()
def
get_dp_padding
(
self
,
num_tokens
:
int
)
->
tuple
[
int
,
torch
.
Tensor
|
None
]:
# Note: For CPU backend, dp padding is not required for now.
return
0
,
None
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment