[Bugfix] Fix new Llama3.1 GGUF model loading (#7269)

8334c39f · Isotr0py · GitHub · e9045767 · 8334c39f
Unverified Commit 8334c39f authored Aug 09, 2024 by Isotr0py Committed by GitHub Aug 08, 2024
Show whitespace changes
Inline Side-by-side

Showing with 16 additions and 14 deletions

vllm/model_executor/model_loader/weight_utils.py vllm/model_executor/model_loader/weight_utils.py +16 -14

No files found.
--- a/vllm/model_executor/model_loader/weight_utils.py
+++ b/vllm/model_executor/model_loader/weight_utils.py
@@ -435,6 +435,7 @@ def gguf_quant_weights_iterator(
    reader = gguf.GGUFReader(gguf_file)

    for tensor in reader.tensors:
+        if tensor.name in gguf_to_hf_name_map:
            weight_type = tensor.tensor_type
            name = gguf_to_hf_name_map[tensor.name]

@@ -444,6 +445,7 @@ def gguf_quant_weights_iterator(
                yield weight_type_name, weight_type

    for tensor in reader.tensors:
+        if tensor.name in gguf_to_hf_name_map:
            weight = tensor.data
            weight_type = tensor.tensor_type
            name = gguf_to_hf_name_map[tensor.name]