Commit ad038b4e authored by zhuwenwen's avatar zhuwenwen
Browse files

update utils.py

parent 78d833ae
......@@ -18,7 +18,6 @@ if TYPE_CHECKING:
from vllm.attention.backends.abstract import AttentionImpl
from vllm.v1.core.sched.output import SchedulerOutput
from vllm.v1.worker.gpu_input_batch import InputBatch
from vllm.v1.attention.backends.mla.common import MLACommonMetadataBuilder
import vllm.envs as envs
from vllm.attention.backends.abstract import AttentionBackend
......@@ -631,7 +630,6 @@ def reorder_batch_to_split_decodes_and_prefills(
input_batch: "InputBatch",
scheduler_output: "SchedulerOutput",
decode_threshold: int = 1,
# num_scheduled_tokens_np: np.ndarray = np.zeros(256, dtype=np.int32),
) -> bool:
"""
Reorders the batch to split into prefill and decode requests; places all
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment