[Bugfix] Fix broken deepseek fp8 TP weights loading (#24367)

Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>

[Bugfix] Fix broken deepseek fp8 TP weights loading (#24367)
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
00a4e56d · Isotr0py · GitHub · 0eadaeff · 00a4e56d · 00a4e56d
Unverified Commit 00a4e56d authored Sep 07, 2025 by Isotr0py Committed by GitHub Sep 06, 2025
Showing with 5 additions and 2 deletions

vllm/model_executor/layers/linear.py vllm/model_executor/layers/linear.py +3 -1

vllm/model_executor/layers/quantization/fp8.py vllm/model_executor/layers/quantization/fp8.py +2 -1

No files found.
--- a/vllm/model_executor/layers/linear.py
+++ b/vllm/model_executor/layers/linear.py
@@ -262,7 +262,7 @@ class LinearBase(CustomOp):
        self.tp_size = (get_tensor_model_parallel_world_size()
                        if not disable_tp else 1)

-    def __post_init__(self):
+    def update_param_tp_status(self):
        for param in self.parameters():
            if isinstance(param, BasevLLMParameter):
                param.tp_rank = self.tp_rank
@@ -459,6 +459,7 @@ class ColumnParallelLinear(LinearBase):
            })
        else:
            self.register_parameter("bias", None)
+        self.update_param_tp_status()

    def weight_loader(self, param: Parameter, loaded_weight: torch.Tensor):

@@ -1250,6 +1251,7 @@ class RowParallelLinear(LinearBase):
            })
        else:
            self.register_parameter("bias", None)
+        self.update_param_tp_status()

    def weight_loader(self, param: Parameter, loaded_weight: torch.Tensor):
        input_dim = getattr(param, "input_dim", None)

--- a/vllm/model_executor/layers/quantization/fp8.py
+++ b/vllm/model_executor/layers/quantization/fp8.py
@@ -270,7 +270,8 @@ class Fp8LinearMethod(LinearMethodBase):
        layer.weight_block_size = None

        if self.block_quant:
-            tp_size = get_tensor_model_parallel_world_size()
+            tp_size = getattr(layer, "tp_size",
+                              get_tensor_model_parallel_world_size())
            assert self.quant_config.weight_block_size is not None
            layer.weight_block_size = self.quant_config.weight_block_size
            block_n, block_k = (