Support GPT-NeoX Models without attention biases (#2301)

4934d492 · Jong-hun Shin · GitHub · 358c328d · 4934d492
Unverified Commit 4934d492 authored Dec 31, 2023 by Jong-hun Shin Committed by GitHub Dec 30, 2023
Hide whitespace changes
Inline Side-by-side

Showing with 3 additions and 0 deletions

vllm/model_executor/models/gpt_neox.py vllm/model_executor/models/gpt_neox.py +3 -0

No files found.
--- a/vllm/model_executor/models/gpt_neox.py
+++ b/vllm/model_executor/models/gpt_neox.py
@@ -54,6 +54,7 @@ class GPTNeoXAttention(nn.Module):
        self.total_num_heads = config.num_attention_heads
        self.hidden_size = config.hidden_size
        self.head_size = self.hidden_size // self.total_num_heads
+        self.bias = getattr(config, "attention_bias", True)

        tensor_model_parallel_world_size = (
            get_tensor_model_parallel_world_size())
@@ -65,11 +66,13 @@ class GPTNeoXAttention(nn.Module):
            config.hidden_size,
            self.head_size,
            self.total_num_heads,
+            bias=self.bias,
            linear_method=linear_method,
        )
        self.dense = RowParallelLinear(
            config.hidden_size,
            config.hidden_size,
+            bias=self.bias,
            linear_method=linear_method,
        )
        scaling = self.head_size**-0.5