[Bugfix] Fix Qwen3.5-FP8 Weight Loading Error on TPU (#37348)

Signed-off-by: Jacob Platin <jacobplatin@google.com>

[Bugfix] Fix Qwen3.5-FP8 Weight Loading Error on TPU (#37348)
Signed-off-by: Jacob Platin <jacobplatin@google.com>
d7d51a7e · Jacob Platin · GitHub · 3c3c0842 · d7d51a7e
Unverified Commit d7d51a7e authored Mar 25, 2026 by Jacob Platin Committed by GitHub Mar 26, 2026
Hide whitespace changes
Inline Side-by-side

Showing with 14 additions and 0 deletions

vllm/model_executor/layers/linear.py vllm/model_executor/layers/linear.py +14 -0

No files found.
--- a/vllm/model_executor/layers/linear.py
+++ b/vllm/model_executor/layers/linear.py
@@ -768,6 +768,13 @@ class MergedColumnParallelLinear(ColumnParallelLinear):
                # Special case for Quantization.
                # If quantized, we need to adjust the offset and size to account
                # for the packing.
+                # Add check to adjust the size/offset for FP8 block scales
+                if isinstance(param, BlockQuantScaleParameter):
+                    weight_block_size = getattr(self, "weight_block_size", None)
+                    shard_size, shard_offset = adjust_block_scale_shard(
+                        weight_block_size, shard_size, shard_offset
+                    )
+
                if packed_dim == output_dim:
                    shard_size = shard_size // param.packed_factor
                    shard_offset = shard_offset // param.packed_factor
@@ -1218,6 +1225,13 @@ class QKVParallelLinear(ColumnParallelLinear):
                # Special case for Quantized Weights.
                # If quantized, we need to adjust the offset and size to account
                # for the packing.
+                # Add check to adjust the size/offset for FP8 block scales
+                if isinstance(param, BlockQuantScaleParameter):
+                    weight_block_size = getattr(self, "weight_block_size", None)
+                    shard_size, shard_offset = adjust_block_scale_shard(
+                        weight_block_size, shard_size, shard_offset
+                    )
+
                if packed_dim == output_dim:
                    shard_size = shard_size // param.packed_factor
                    shard_offset = shard_offset // param.packed_factor