Unverified Commit 2f41d6c0 authored by Ajay Anubolu's avatar Ajay Anubolu Committed by GitHub
Browse files

[Bugfix] Fix cpu-offload-gb assertion with non-default block sizes (#36461)


Signed-off-by: default avatarAjAnubolu <anuboluajay@gmail.com>
Signed-off-by: default avatarMichael Goin <mgoin64@gmail.com>
Co-authored-by: default avatarMichael Goin <mgoin64@gmail.com>
parent 3aecdf08
......@@ -6517,11 +6517,6 @@ class GPUModelRunner(
block_sizes != self._init_block_sizes
or kernel_block_sizes != self._init_kernel_block_sizes
):
assert self.offload_config.uva.cpu_offload_gb == 0, (
"Cannot re-initialize the input batch when CPU weight "
"offloading is enabled. See https://github.com/vllm-project/vllm/pull/18298 " # noqa: E501
"for more details."
)
self._init_block_sizes = block_sizes
self._init_kernel_block_sizes = kernel_block_sizes
self.input_batch = InputBatch(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment