Unverified Commit 11fd7ea6 authored by Patrick von Platen's avatar Patrick von Platen Committed by GitHub
Browse files

[Pixtral-Large] Pixtral actually has no bias in vision-lang adapter (#10449)

parent f028dff3
...@@ -331,6 +331,7 @@ class VisionEncoderArgs: ...@@ -331,6 +331,7 @@ class VisionEncoderArgs:
num_attention_heads: int num_attention_heads: int
rope_theta: float # for rope-2D rope_theta: float # for rope-2D
image_token_id: int image_token_id: int
adapter_bias: bool = True
def _reshape_for_broadcast(freqs_cis: torch.Tensor, def _reshape_for_broadcast(freqs_cis: torch.Tensor,
...@@ -595,10 +596,10 @@ class VisionLanguageAdapter(nn.Module): ...@@ -595,10 +596,10 @@ class VisionLanguageAdapter(nn.Module):
self.w_in = nn.Linear( self.w_in = nn.Linear(
args.hidden_size, args.hidden_size,
dim, dim,
bias=True, bias=args.adapter_bias,
) )
self.gelu = nn.GELU() self.gelu = nn.GELU()
self.w_out = nn.Linear(dim, dim, bias=True) self.w_out = nn.Linear(dim, dim, bias=args.adapter_bias)
def forward(self, x: torch.Tensor) -> torch.Tensor: def forward(self, x: torch.Tensor) -> torch.Tensor:
return self.w_out(self.gelu(self.w_in(x))) return self.w_out(self.gelu(self.w_in(x)))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment