Unverified Commit c38b8d5a authored by Harry Mellor's avatar Harry Mellor Committed by GitHub
Browse files

Remove `padding_index` from models that don't use it for better Transformers...


Remove `padding_index` from models that don't use it for better Transformers v5 compatibility (#35189)
Signed-off-by: default avatarHarry Mellor <19981378+hmellor@users.noreply.github.com>
parent 60da0e15
...@@ -421,7 +421,6 @@ class Ernie4_5_MoeModel(nn.Module): ...@@ -421,7 +421,6 @@ class Ernie4_5_MoeModel(nn.Module):
cache_config = vllm_config.cache_config cache_config = vllm_config.cache_config
quant_config = vllm_config.quant_config quant_config = vllm_config.quant_config
self.padding_idx = config.pad_token_id
self.vocab_size = config.vocab_size self.vocab_size = config.vocab_size
self.config = config self.config = config
parallel_config = vllm_config.parallel_config parallel_config = vllm_config.parallel_config
......
...@@ -523,7 +523,6 @@ class Ernie4_5_VLMoeModel(nn.Module): ...@@ -523,7 +523,6 @@ class Ernie4_5_VLMoeModel(nn.Module):
cache_config = vllm_config.cache_config cache_config = vllm_config.cache_config
quant_config = vllm_config.quant_config quant_config = vllm_config.quant_config
self.padding_idx = config.pad_token_id
self.vocab_size = config.vocab_size self.vocab_size = config.vocab_size
self.config = config self.config = config
......
...@@ -157,7 +157,6 @@ class GraniteMoeSharedModel(nn.Module): ...@@ -157,7 +157,6 @@ class GraniteMoeSharedModel(nn.Module):
self.config = config self.config = config
self.quant_config = quant_config # Required by MixtralModel self.quant_config = quant_config # Required by MixtralModel
self.padding_idx = config.pad_token_id
self.vocab_size = config.vocab_size self.vocab_size = config.vocab_size
......
...@@ -451,7 +451,6 @@ class Grok1Model(nn.Module): ...@@ -451,7 +451,6 @@ class Grok1Model(nn.Module):
self.config = config self.config = config
self.quant_config = quant_config self.quant_config = quant_config
self.padding_idx = config.pad_token_id
# Store expert naming for weight loading # Store expert naming for weight loading
self.ckpt_gate_proj_name = ckpt_gate_proj_name self.ckpt_gate_proj_name = ckpt_gate_proj_name
......
...@@ -600,7 +600,6 @@ class HunYuanModel(nn.Module): ...@@ -600,7 +600,6 @@ class HunYuanModel(nn.Module):
self.config = config self.config = config
self.quant_config = quant_config self.quant_config = quant_config
self.padding_idx = config.pad_token_id
self.vocab_size = config.vocab_size self.vocab_size = config.vocab_size
......
...@@ -305,7 +305,6 @@ class Jais2Model(nn.Module): ...@@ -305,7 +305,6 @@ class Jais2Model(nn.Module):
self.config = config self.config = config
self.quant_config = quant_config self.quant_config = quant_config
self.padding_idx = config.pad_token_id
self.vocab_size = config.vocab_size self.vocab_size = config.vocab_size
self.org_vocab_size = config.vocab_size self.org_vocab_size = config.vocab_size
......
...@@ -393,7 +393,6 @@ class KimiLinearModel(nn.Module): ...@@ -393,7 +393,6 @@ class KimiLinearModel(nn.Module):
parallel_config = vllm_config.parallel_config parallel_config = vllm_config.parallel_config
self.config = config self.config = config
self.padding_idx = config.pad_token_id
self.vocab_size = config.vocab_size self.vocab_size = config.vocab_size
if get_pp_group().is_first_rank: if get_pp_group().is_first_rank:
......
...@@ -486,7 +486,6 @@ class FlashModel(nn.Module): ...@@ -486,7 +486,6 @@ class FlashModel(nn.Module):
quant_config = vllm_config.quant_config quant_config = vllm_config.quant_config
self.config = config self.config = config
self.padding_idx = getattr(config, "pad_token_id", None)
self.vocab_size = config.vocab_size self.vocab_size = config.vocab_size
if get_pp_group().is_first_rank: if get_pp_group().is_first_rank:
......
...@@ -495,7 +495,6 @@ class MiniMaxText01Model(nn.Module): ...@@ -495,7 +495,6 @@ class MiniMaxText01Model(nn.Module):
cache_config = vllm_config.cache_config cache_config = vllm_config.cache_config
scheduler_config = vllm_config.scheduler_config scheduler_config = vllm_config.scheduler_config
self.padding_idx = config.pad_token_id
self.vocab_size = config.vocab_size self.vocab_size = config.vocab_size
self.decoder_attention_types = getattr( self.decoder_attention_types = getattr(
......
...@@ -241,7 +241,6 @@ class DeciModel(nn.Module): ...@@ -241,7 +241,6 @@ class DeciModel(nn.Module):
self.config = config self.config = config
self.quant_config = quant_config self.quant_config = quant_config
self.padding_idx = config.pad_token_id
self.vocab_size = config.vocab_size self.vocab_size = config.vocab_size
......
...@@ -1029,7 +1029,6 @@ class OpenPanguModel(nn.Module): ...@@ -1029,7 +1029,6 @@ class OpenPanguModel(nn.Module):
self.config = config self.config = config
self.num_redundant_experts = eplb_config.num_redundant_experts self.num_redundant_experts = eplb_config.num_redundant_experts
self.padding_idx = config.pad_token_id
self.vocab_size = config.vocab_size self.vocab_size = config.vocab_size
if get_pp_group().is_first_rank or ( if get_pp_group().is_first_rank or (
......
...@@ -748,7 +748,6 @@ class Plamo2Model(torch.nn.Module): ...@@ -748,7 +748,6 @@ class Plamo2Model(torch.nn.Module):
config = vllm_config.model_config.hf_config config = vllm_config.model_config.hf_config
self.config = config self.config = config
self.padding_idx = config.pad_token_id
self.vocab_size = config.vocab_size self.vocab_size = config.vocab_size
self.embed_tokens = VocabParallelEmbedding( self.embed_tokens = VocabParallelEmbedding(
......
...@@ -317,7 +317,6 @@ class Plamo3Model(nn.Module): ...@@ -317,7 +317,6 @@ class Plamo3Model(nn.Module):
config = vllm_config.model_config.hf_config config = vllm_config.model_config.hf_config
self.config = config self.config = config
self.padding_idx = config.pad_token_id
self.vocab_size = config.vocab_size self.vocab_size = config.vocab_size
self.org_vocab_size = config.vocab_size self.org_vocab_size = config.vocab_size
......
...@@ -443,7 +443,6 @@ class Qwen3MoeModel(nn.Module): ...@@ -443,7 +443,6 @@ class Qwen3MoeModel(nn.Module):
eplb_config = parallel_config.eplb_config eplb_config = parallel_config.eplb_config
self.num_redundant_experts = eplb_config.num_redundant_experts self.num_redundant_experts = eplb_config.num_redundant_experts
self.padding_idx = config.pad_token_id
self.vocab_size = config.vocab_size self.vocab_size = config.vocab_size
self.config = config self.config = config
self.quant_config = quant_config self.quant_config = quant_config
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment