Unverified Commit 577bb34f authored by Li, Jiang's avatar Li, Jiang Committed by GitHub
Browse files

[CPU][Bugfix] Fix _to_list in CPU model runner (#28824)


Signed-off-by: default avatarjiang1.li <jiang1.li@intel.com>
parent 3380ed5e
...@@ -100,6 +100,9 @@ void cpu_attention_with_kv_cache( ...@@ -100,6 +100,9 @@ void cpu_attention_with_kv_cache(
const torch::Tensor& scheduler_metadata, const torch::Tensor& scheduler_metadata,
const std::optional<torch::Tensor>& s_aux); const std::optional<torch::Tensor>& s_aux);
// Note: just for avoiding importing errors
void placeholder_op() { TORCH_CHECK(false, "Unimplemented"); }
TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) { TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
// vLLM custom ops // vLLM custom ops
...@@ -275,6 +278,11 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) { ...@@ -275,6 +278,11 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
"sliding_window_left, SymInt sliding_window_right, Tensor block_table, " "sliding_window_left, SymInt sliding_window_right, Tensor block_table, "
"float softcap, Tensor sheduler_metadata, Tensor? s_aux) -> ()", "float softcap, Tensor sheduler_metadata, Tensor? s_aux) -> ()",
&cpu_attention_with_kv_cache); &cpu_attention_with_kv_cache);
// placeholders
ops.def("static_scaled_fp8_quant() -> ()", placeholder_op);
ops.def("dynamic_scaled_fp8_quant() -> ()", placeholder_op);
ops.def("dynamic_per_token_scaled_fp8_quant() -> ()", placeholder_op);
} }
TORCH_LIBRARY_EXPAND(CONCAT(TORCH_EXTENSION_NAME, _utils), utils) { TORCH_LIBRARY_EXPAND(CONCAT(TORCH_EXTENSION_NAME, _utils), utils) {
......
...@@ -80,9 +80,6 @@ class CPUModelRunner(GPUModelRunner): ...@@ -80,9 +80,6 @@ class CPUModelRunner(GPUModelRunner):
def _sync_device(self) -> None: def _sync_device(self) -> None:
pass pass
def _to_list(self, sampled_token_ids: torch.Tensor) -> list[list[int]]:
return sampled_token_ids.tolist()
def get_dp_padding(self, num_tokens: int) -> tuple[int, torch.Tensor | None]: def get_dp_padding(self, num_tokens: int) -> tuple[int, torch.Tensor | None]:
# Note: For CPU backend, dp padding is not required for now. # Note: For CPU backend, dp padding is not required for now.
return 0, None return 0, None
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment