Unverified Commit bc0c0192 authored by Kiran R's avatar Kiran R Committed by GitHub
Browse files

[Bugfix] Enable Proper `attention_bias` Usage in Llama Model Configuration (#3767)


Co-authored-by: default avatarroy <jasonailu87@gmail.com>
parent f46864d6
...@@ -184,6 +184,10 @@ class LlamaDecoderLayer(nn.Module): ...@@ -184,6 +184,10 @@ class LlamaDecoderLayer(nn.Module):
max_position_embeddings = getattr(config, "max_position_embeddings", max_position_embeddings = getattr(config, "max_position_embeddings",
8192) 8192)
sliding_window = getattr(config, "sliding_window", None) sliding_window = getattr(config, "sliding_window", None)
# Support abacusai/Smaug-72B-v0.1 with attention_bias
# Support internlm/internlm-7b with bias
attention_bias = getattr(config, "attention_bias", False) or getattr(
config, "bias", False)
self.self_attn = LlamaAttention( self.self_attn = LlamaAttention(
hidden_size=self.hidden_size, hidden_size=self.hidden_size,
num_heads=config.num_attention_heads, num_heads=config.num_attention_heads,
...@@ -193,7 +197,7 @@ class LlamaDecoderLayer(nn.Module): ...@@ -193,7 +197,7 @@ class LlamaDecoderLayer(nn.Module):
rope_scaling=rope_scaling, rope_scaling=rope_scaling,
max_position_embeddings=max_position_embeddings, max_position_embeddings=max_position_embeddings,
linear_method=linear_method, linear_method=linear_method,
bias=getattr(config, "bias", False), bias=attention_bias,
sliding_window=sliding_window, sliding_window=sliding_window,
) )
self.mlp = LlamaMLP( self.mlp = LlamaMLP(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment