Unverified Commit 4f5b059f authored by Tyler Michael Smith's avatar Tyler Michael Smith Committed by GitHub
Browse files

Clean up unused padding_idx variables across many model definitions (#13240)


Signed-off-by: default avatarTyler Michael Smith <tyler@neuralmagic.com>
parent 288ca110
......@@ -300,7 +300,6 @@ class NemotronModel(nn.Module):
lora_config = vllm_config.lora_config
self.config = config
self.padding_idx = config.pad_token_id
lora_vocab = (lora_config.lora_extra_vocab_size *
(lora_config.max_loras or 1)) if lora_config else 0
self.vocab_size = config.vocab_size + lora_vocab
......
......@@ -252,7 +252,6 @@ class OlmoeModel(nn.Module):
cache_config = vllm_config.cache_config
quant_config = vllm_config.quant_config
self.padding_idx = config.pad_token_id
self.vocab_size = config.vocab_size
self.embed_tokens = VocabParallelEmbedding(
......
......@@ -200,7 +200,6 @@ class OPTDecoder(nn.Module):
):
super().__init__()
self.config = config
self.padding_idx = config.pad_token_id
self.max_target_positions = config.max_position_embeddings
self.vocab_size = config.vocab_size
......
......@@ -217,7 +217,6 @@ class OrionModel(nn.Module):
quant_config = vllm_config.quant_config
self.config = config
self.padding_idx = config.pad_token_id
self.vocab_size = config.vocab_size
self.embed_tokens = VocabParallelEmbedding(
config.vocab_size,
......
......@@ -441,7 +441,6 @@ class PhiMoEModel(nn.Module):
quant_config = vllm_config.quant_config
lora_config = vllm_config.lora_config
self.padding_idx = config.pad_token_id
lora_vocab = ((lora_config.lora_extra_vocab_size *
(lora_config.max_loras or 1)) if lora_config else 0)
self.vocab_size = config.vocab_size + lora_vocab
......
......@@ -284,7 +284,6 @@ class Qwen2Model(nn.Module):
self.config = config
self.quant_config = quant_config
self.padding_idx = config.pad_token_id
self.vocab_size = config.vocab_size
if get_pp_group().is_first_rank or (config.tie_word_embeddings
......
......@@ -325,7 +325,6 @@ class Qwen2MoeModel(nn.Module):
cache_config = vllm_config.cache_config
quant_config = vllm_config.quant_config
self.padding_idx = config.pad_token_id
self.vocab_size = config.vocab_size
self.embed_tokens = VocabParallelEmbedding(
......
......@@ -269,7 +269,6 @@ class SolarModel(nn.Module):
lora_config = vllm_config.lora_config
self.config = config
self.padding_idx = config.pad_token_id
lora_vocab = ((lora_config.lora_extra_vocab_size *
(lora_config.max_loras or 1)) if lora_config else 0)
self.vocab_size = config.vocab_size + lora_vocab
......
......@@ -212,10 +212,8 @@ class Starcoder2Model(nn.Module):
quant_config = vllm_config.quant_config
self.config = config
self.padding_idx = config.pad_token_id
self.vocab_size = config.vocab_size
# TODO: consider padding_idx (currently removed)
self.embed_tokens = VocabParallelEmbedding(
config.vocab_size,
config.hidden_size,
......
......@@ -49,10 +49,7 @@ class WhisperAudioInputs(TypedDict):
class WhisperPositionalEmbedding(nn.Embedding):
def __init__(self,
num_positions: int,
embedding_dim: int,
padding_idx: Optional[int] = None):
def __init__(self, num_positions: int, embedding_dim: int):
super().__init__(num_positions, embedding_dim)
def forward(self, position_ids):
......@@ -359,7 +356,6 @@ class WhisperEncoder(nn.Module):
config = vllm_config.model_config.hf_config
embed_dim = config.d_model
self.num_mel_bins = config.num_mel_bins
self.padding_idx = config.pad_token_id
self.max_source_positions = config.max_source_positions
self.embed_scale = (math.sqrt(embed_dim)
if config.scale_embedding else 1.0)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment