Unverified Commit 9b60e2ff authored by Rishapveer Singh's avatar Rishapveer Singh Committed by GitHub
Browse files

[Bugfix] Fix quantized model initialization failure with prefetch offloading (#40432)


Signed-off-by: default avatarRishapveer Singh <singhrishapveer@gmail.com>
Co-authored-by: default avatarmergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
parent 3951d3ea
...@@ -21,6 +21,7 @@ import torch.nn as nn ...@@ -21,6 +21,7 @@ import torch.nn as nn
import vllm.model_executor.offloader.prefetch_ops # noqa: F401 import vllm.model_executor.offloader.prefetch_ops # noqa: F401
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.model_executor.offloader.base import BaseOffloader, should_pin_memory from vllm.model_executor.offloader.base import BaseOffloader, should_pin_memory
from vllm.utils.torch_utils import get_dtype_size
logger = init_logger(__name__) logger = init_logger(__name__)
...@@ -53,7 +54,7 @@ class ParamInfo: ...@@ -53,7 +54,7 @@ class ParamInfo:
numel = 1 numel = 1
for dim in self.shape: for dim in self.shape:
numel *= dim numel *= dim
return numel * torch.finfo(self.dtype).bits // 8 return numel * get_dtype_size(self.dtype)
class StaticBufferPool: class StaticBufferPool:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment