expand coverage of gpt2 model loading (#271)

4026a049 · twaka · GitHub · 43710e8d · 4026a049
Unverified Commit 4026a049 authored Jun 27, 2023 by twaka Committed by GitHub Jun 27, 2023
Hide whitespace changes
Inline Side-by-side

Showing with 4 additions and 2 deletions

vllm/model_executor/models/gpt2.py vllm/model_executor/models/gpt2.py +4 -2

No files found.
--- a/vllm/model_executor/models/gpt2.py
+++ b/vllm/model_executor/models/gpt2.py
@@ -228,11 +228,13 @@ class GPT2LMHeadModel(nn.Module):
                # GPT-2 ties the weights of the embedding layer and the final
                # linear layer.
                continue
-            if ".attn.bias" in name:
+            if ".attn.bias" in name or ".attn.masked_bias" in name:
                # Skip attention mask.
                # NOTE: "c_attn.bias" should not be skipped.
                continue
-            name = "transformer." + name
+
+            if not name.startswith("transformer."):
+                name = "transformer." + name

            # The HF's GPT-2 implementation uses Conv1D instead of Linear.
            # Because of this, we need to transpose the weights.