Unverified Commit f2bd246c authored by Jani Monoses's avatar Jani Monoses Committed by GitHub
Browse files

[VLM] Fix paligemma, fuyu and persimmon with transformers 4.45 : use...

[VLM] Fix paligemma, fuyu and persimmon with transformers 4.45 : use config.text_config.vocab_size (#8707)
parent a79e5229
...@@ -229,7 +229,7 @@ class FuyuForCausalLM(nn.Module, SupportsMultiModal): ...@@ -229,7 +229,7 @@ class FuyuForCausalLM(nn.Module, SupportsMultiModal):
self.multimodal_config = multimodal_config self.multimodal_config = multimodal_config
self.padding_idx = config.pad_token_id self.padding_idx = config.pad_token_id
self.vocab_size = config.vocab_size self.vocab_size = config.text_config.vocab_size
self.image_token_id = _IMAGE_TOKEN_ID self.image_token_id = _IMAGE_TOKEN_ID
self.image_feature_size = config.patch_size**2 * config.num_channels self.image_feature_size = config.patch_size**2 * config.num_channels
......
...@@ -152,7 +152,8 @@ class PaliGemmaForConditionalGeneration(nn.Module, SupportsMultiModal): ...@@ -152,7 +152,8 @@ class PaliGemmaForConditionalGeneration(nn.Module, SupportsMultiModal):
self.unpadded_vocab_size = config.text_config.vocab_size self.unpadded_vocab_size = config.text_config.vocab_size
logit_scale = getattr(config, "logit_scale", 1.0) logit_scale = getattr(config, "logit_scale", 1.0)
self.logits_processor = LogitsProcessor(self.unpadded_vocab_size, self.logits_processor = LogitsProcessor(self.unpadded_vocab_size,
config.vocab_size, logit_scale) config.text_config.vocab_size,
logit_scale)
self.sampler = Sampler() self.sampler = Sampler()
def _validate_pixel_values(self, data: torch.Tensor) -> torch.Tensor: def _validate_pixel_values(self, data: torch.Tensor) -> torch.Tensor:
......
...@@ -213,10 +213,10 @@ class PersimmonModel(nn.Module): ...@@ -213,10 +213,10 @@ class PersimmonModel(nn.Module):
cache_config: Optional[CacheConfig] = None, cache_config: Optional[CacheConfig] = None,
quant_config: Optional[QuantizationConfig] = None): quant_config: Optional[QuantizationConfig] = None):
super().__init__() super().__init__()
self.vocab_size = config.vocab_size self.vocab_size = config.text_config.vocab_size
self.embed_tokens = VocabParallelEmbedding(config.vocab_size, self.embed_tokens = VocabParallelEmbedding(
config.hidden_size) config.text_config.vocab_size, config.hidden_size)
self.layers = nn.ModuleList([ self.layers = nn.ModuleList([
PersimmonDecoderLayer(config, PersimmonDecoderLayer(config,
cache_config=cache_config, cache_config=cache_config,
...@@ -257,14 +257,14 @@ class PersimmonForCausalLM(nn.Module): ...@@ -257,14 +257,14 @@ class PersimmonForCausalLM(nn.Module):
quant_config: Optional[QuantizationConfig] = None): quant_config: Optional[QuantizationConfig] = None):
super().__init__() super().__init__()
self.config = config self.config = config
self.vocab_size = config.vocab_size self.vocab_size = config.text_config.vocab_size
self.model = PersimmonModel(config, self.model = PersimmonModel(config,
cache_config=cache_config, cache_config=cache_config,
quant_config=quant_config) quant_config=quant_config)
self.lm_head = ParallelLMHead(config.vocab_size, self.lm_head = ParallelLMHead(config.text_config.vocab_size,
config.hidden_size, config.hidden_size,
bias=False) bias=False)
self.logits_processor = LogitsProcessor(config.vocab_size) self.logits_processor = LogitsProcessor(config.text_config.vocab_size)
self.sampler = Sampler() self.sampler = Sampler()
def forward( def forward(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment