Unverified Commit 42c19496 authored by Tsukasa OI's avatar Tsukasa OI Committed by GitHub
Browse files

[Bugfix][Quantization] Support BF16 tensors on GGUF (#29948)


Signed-off-by: default avatarTsukasa OI <floss_llm@irq.a4lg.com>
parent cc4e296e
...@@ -47,6 +47,12 @@ QWEN2_CONFIG = GGUFTestConfig( ...@@ -47,6 +47,12 @@ QWEN2_CONFIG = GGUFTestConfig(
gguf_filename="qwen2.5-1.5b-instruct-q6_k.gguf", gguf_filename="qwen2.5-1.5b-instruct-q6_k.gguf",
) )
QWEN3_CONFIG = GGUFTestConfig(
original_model="Qwen/Qwen3-0.6B",
gguf_repo="unsloth/Qwen3-0.6B-GGUF",
gguf_filename="Qwen3-0.6B-BF16.gguf",
)
PHI3_CONFIG = GGUFTestConfig( PHI3_CONFIG = GGUFTestConfig(
original_model="microsoft/Phi-3.5-mini-instruct", original_model="microsoft/Phi-3.5-mini-instruct",
gguf_repo="bartowski/Phi-3.5-mini-instruct-GGUF", gguf_repo="bartowski/Phi-3.5-mini-instruct-GGUF",
...@@ -87,6 +93,7 @@ GEMMA3_CONFIG = GGUFTestConfig( ...@@ -87,6 +93,7 @@ GEMMA3_CONFIG = GGUFTestConfig(
MODELS = [ MODELS = [
# LLAMA_CONFIG, # broken: https://github.com/vllm-project/vllm/issues/19458 # LLAMA_CONFIG, # broken: https://github.com/vllm-project/vllm/issues/19458
QWEN2_CONFIG, QWEN2_CONFIG,
QWEN3_CONFIG,
PHI3_CONFIG, PHI3_CONFIG,
GPT2_CONFIG, GPT2_CONFIG,
STABLELM_CONFIG, STABLELM_CONFIG,
......
...@@ -921,7 +921,17 @@ def gguf_quant_weights_iterator( ...@@ -921,7 +921,17 @@ def gguf_quant_weights_iterator(
name = gguf_to_hf_name_map[tensor.name] name = gguf_to_hf_name_map[tensor.name]
if weight_type.name not in ("F32", "BF16", "F16"): if weight_type.name not in ("F32", "BF16", "F16"):
name = name.replace("weight", "qweight") name = name.replace("weight", "qweight")
param = torch.tensor(weight) if weight_type.name == "BF16" and tensor.data.dtype == np.uint8:
# BF16 is currently the only "quantization" type that isn't
# actually quantized but is read as a raw byte tensor.
# Reinterpret as `torch.bfloat16` tensor.
weight = weight.view(np.uint16)
if reader.byte_order == "S":
# GGUF endianness != system endianness
weight = weight.byteswap()
param = torch.tensor(weight).view(torch.bfloat16)
else:
param = torch.tensor(weight)
yield name, param yield name, param
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment