Unverified Commit 1fd0cf8a authored by Michael Feil's avatar Michael Feil Committed by GitHub
Browse files

Update comment in qwen2.py (#4447)

parent bf63ee54
...@@ -170,6 +170,7 @@ class Qwen2MoeAttention(nn.Module): ...@@ -170,6 +170,7 @@ class Qwen2MoeAttention(nn.Module):
rope_theta: float = 10000, rope_theta: float = 10000,
rope_scaling: Optional[Dict[str, Any]] = None, rope_scaling: Optional[Dict[str, Any]] = None,
max_position_embeddings: int = 8192, max_position_embeddings: int = 8192,
qkv_bias: int = True,
quant_config: Optional[QuantizationConfig] = None, quant_config: Optional[QuantizationConfig] = None,
prefix: str = "", prefix: str = "",
) -> None: ) -> None:
...@@ -201,7 +202,7 @@ class Qwen2MoeAttention(nn.Module): ...@@ -201,7 +202,7 @@ class Qwen2MoeAttention(nn.Module):
self.head_dim, self.head_dim,
self.total_num_heads, self.total_num_heads,
self.total_num_kv_heads, self.total_num_kv_heads,
bias=True, bias=qkv_bias,
quant_config=quant_config, quant_config=quant_config,
prefix=add_prefix("qkv_proj", prefix), prefix=add_prefix("qkv_proj", prefix),
) )
...@@ -257,6 +258,8 @@ class Qwen2MoeDecoderLayer(nn.Module): ...@@ -257,6 +258,8 @@ class Qwen2MoeDecoderLayer(nn.Module):
rope_theta = getattr(config, "rope_theta", 10000) rope_theta = getattr(config, "rope_theta", 10000)
rope_scaling = getattr(config, "rope_scaling", None) rope_scaling = getattr(config, "rope_scaling", None)
max_position_embeddings = getattr(config, "max_position_embeddings", 8192) max_position_embeddings = getattr(config, "max_position_embeddings", 8192)
# note: replace config.num_hidden_layers < 80 with True once its available in transformers 4.50.0
qkv_bias = getattr(config, "qkv_bias", config.num_hidden_layers < 80)
self.self_attn = Qwen2MoeAttention( self.self_attn = Qwen2MoeAttention(
hidden_size=self.hidden_size, hidden_size=self.hidden_size,
num_heads=config.num_attention_heads, num_heads=config.num_attention_heads,
...@@ -266,6 +269,7 @@ class Qwen2MoeDecoderLayer(nn.Module): ...@@ -266,6 +269,7 @@ class Qwen2MoeDecoderLayer(nn.Module):
rope_scaling=rope_scaling, rope_scaling=rope_scaling,
max_position_embeddings=max_position_embeddings, max_position_embeddings=max_position_embeddings,
quant_config=quant_config, quant_config=quant_config,
qkv_bias=qkv_bias,
prefix=add_prefix("self_attn", prefix), prefix=add_prefix("self_attn", prefix),
) )
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment