Commit eefa41c1 authored by zhuwenwen's avatar zhuwenwen
Browse files

sync v0.18.0

parent 82155c76
...@@ -338,7 +338,7 @@ class MixtralModel(nn.Module): ...@@ -338,7 +338,7 @@ class MixtralModel(nn.Module):
def forward( def forward(
self, self,
input_ids: torch.Tensor, input_ids: torch.Tensor | None,
positions: torch.Tensor, positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None, intermediate_tensors: IntermediateTensors | None,
inputs_embeds: torch.Tensor | None = None, inputs_embeds: torch.Tensor | None = None,
...@@ -574,7 +574,7 @@ class MixtralForCausalLM(nn.Module, SupportsLoRA, SupportsPP, MixtureOfExperts): ...@@ -574,7 +574,7 @@ class MixtralForCausalLM(nn.Module, SupportsLoRA, SupportsPP, MixtureOfExperts):
def forward( def forward(
self, self,
input_ids: torch.Tensor, input_ids: torch.Tensor | None,
positions: torch.Tensor, positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None, intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None, inputs_embeds: torch.Tensor | None = None,
......
...@@ -875,7 +875,7 @@ class Llama4ForConditionalGeneration( ...@@ -875,7 +875,7 @@ class Llama4ForConditionalGeneration(
def forward( def forward(
self, self,
input_ids: torch.Tensor, input_ids: torch.Tensor | None,
positions: torch.Tensor, positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None, intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None, inputs_embeds: torch.Tensor | None = None,
...@@ -1157,4 +1157,4 @@ class Llama4ForConditionalGeneration( ...@@ -1157,4 +1157,4 @@ class Llama4ForConditionalGeneration(
return 0 return 0
num_chunks = num_vision_tokens // (raw_patches + 1) num_chunks = num_vision_tokens // (raw_patches + 1)
patches_per_chunk = Mllama4ProcessingInfo.get_patch_per_chunk(vision_config) patches_per_chunk = Mllama4ProcessingInfo.get_patch_per_chunk(vision_config)
return num_chunks * patches_per_chunk return num_chunks * patches_per_chunk
\ No newline at end of file
...@@ -54,12 +54,11 @@ class ModernBertEmbeddings(nn.Module): ...@@ -54,12 +54,11 @@ class ModernBertEmbeddings(nn.Module):
input_ids: torch.Tensor, input_ids: torch.Tensor,
inputs_embeds: torch.Tensor | None = None, inputs_embeds: torch.Tensor | None = None,
) -> torch.Tensor: ) -> torch.Tensor:
if inputs_embeds is not None: if inputs_embeds is None:
return self.norm(inputs_embeds)
else:
inputs_embeds = self.tok_embeddings(input_ids) inputs_embeds = self.tok_embeddings(input_ids)
embeddings = self.norm(inputs_embeds)
return embeddings embeddings = self.norm(inputs_embeds)
return embeddings
class ModernBertAttention(nn.Module): class ModernBertAttention(nn.Module):
......
...@@ -876,7 +876,7 @@ class MolmoModel(nn.Module, SupportsQuant): ...@@ -876,7 +876,7 @@ class MolmoModel(nn.Module, SupportsQuant):
def forward( def forward(
self, self,
input_ids: torch.Tensor, input_ids: torch.Tensor | None,
positions: torch.Tensor, positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None, intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None, inputs_embeds: torch.Tensor | None = None,
......
...@@ -1206,7 +1206,7 @@ class Molmo2TextModel(nn.Module, SupportsQuant): ...@@ -1206,7 +1206,7 @@ class Molmo2TextModel(nn.Module, SupportsQuant):
def forward( def forward(
self, self,
input_ids: torch.Tensor, input_ids: torch.Tensor | None,
positions: torch.Tensor, positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None, intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None, inputs_embeds: torch.Tensor | None = None,
......
...@@ -253,7 +253,7 @@ class MPTModel(nn.Module): ...@@ -253,7 +253,7 @@ class MPTModel(nn.Module):
def forward( def forward(
self, self,
input_ids: torch.Tensor, input_ids: torch.Tensor | None,
position_ids: torch.Tensor, position_ids: torch.Tensor,
intermediate_tensors: IntermediateTensors | None, intermediate_tensors: IntermediateTensors | None,
inputs_embeds: torch.Tensor | None = None, inputs_embeds: torch.Tensor | None = None,
...@@ -313,7 +313,7 @@ class MPTForCausalLM(nn.Module, SupportsPP): ...@@ -313,7 +313,7 @@ class MPTForCausalLM(nn.Module, SupportsPP):
def forward( def forward(
self, self,
input_ids: torch.Tensor, input_ids: torch.Tensor | None,
positions: torch.Tensor, positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None, intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None, inputs_embeds: torch.Tensor | None = None,
......
...@@ -2293,7 +2293,7 @@ class NemotronH_Nano_VL_V2( ...@@ -2293,7 +2293,7 @@ class NemotronH_Nano_VL_V2(
def forward( def forward(
self, self,
input_ids: torch.Tensor, input_ids: torch.Tensor | None,
positions: torch.Tensor, positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None, intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None, inputs_embeds: torch.Tensor | None = None,
...@@ -2359,7 +2359,7 @@ class NemotronH_Nano_VL_V2( ...@@ -2359,7 +2359,7 @@ class NemotronH_Nano_VL_V2(
with torch.no_grad(): with torch.no_grad():
default_weight_loader(param, w) default_weight_loader(param, w)
elif is_vision_weights(name): elif is_vision_weights(name):
# Convert: vision_model.radio_model.* radio_model.* # Convert: vision_model.radio_model.* 鈫?radio_model.*
hf_key = name[len("vision_model.") :] # Remove "vision_model." prefix hf_key = name[len("vision_model.") :] # Remove "vision_model." prefix
vision_weights.append((hf_key, w)) vision_weights.append((hf_key, w))
elif is_sound_weights(name): elif is_sound_weights(name):
...@@ -2419,4 +2419,4 @@ class NemotronH_Nano_VL_V2( ...@@ -2419,4 +2419,4 @@ class NemotronH_Nano_VL_V2(
@classmethod @classmethod
def get_mamba_state_copy_func(cls): def get_mamba_state_copy_func(cls):
return NemotronHForCausalLM.get_mamba_state_copy_func() return NemotronHForCausalLM.get_mamba_state_copy_func()
\ No newline at end of file
...@@ -477,7 +477,7 @@ class NemotronForCausalLM(nn.Module, SupportsLoRA, SupportsPP): ...@@ -477,7 +477,7 @@ class NemotronForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
def forward( def forward(
self, self,
input_ids: torch.Tensor, input_ids: torch.Tensor | None,
positions: torch.Tensor, positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None, intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None, inputs_embeds: torch.Tensor | None = None,
......
...@@ -608,7 +608,7 @@ class NemotronHModel(nn.Module): ...@@ -608,7 +608,7 @@ class NemotronHModel(nn.Module):
def forward( def forward(
self, self,
input_ids: torch.Tensor, input_ids: torch.Tensor | None,
positions: torch.Tensor, positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None, intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None, inputs_embeds: torch.Tensor | None = None,
...@@ -933,7 +933,7 @@ class NemotronHForCausalLM( ...@@ -933,7 +933,7 @@ class NemotronHForCausalLM(
def forward( def forward(
self, self,
input_ids: torch.Tensor, input_ids: torch.Tensor | None,
positions: torch.Tensor, positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None, intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None, inputs_embeds: torch.Tensor | None = None,
......
...@@ -453,7 +453,7 @@ class DeciLMForCausalLM(nn.Module, SupportsLoRA, SupportsPP, HasNoOps): ...@@ -453,7 +453,7 @@ class DeciLMForCausalLM(nn.Module, SupportsLoRA, SupportsPP, HasNoOps):
def forward( def forward(
self, self,
input_ids: torch.Tensor, input_ids: torch.Tensor | None,
positions: torch.Tensor, positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None, intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None, inputs_embeds: torch.Tensor | None = None,
......
...@@ -290,7 +290,7 @@ class MBartDecoderNoPos(nn.Module): ...@@ -290,7 +290,7 @@ class MBartDecoderNoPos(nn.Module):
def forward( def forward(
self, self,
decoder_input_ids: torch.Tensor, decoder_input_ids: torch.Tensor | None,
*, *,
encoder_hidden_states: torch.Tensor | None, encoder_hidden_states: torch.Tensor | None,
inputs_embeds: torch.Tensor | None = None, inputs_embeds: torch.Tensor | None = None,
...@@ -901,7 +901,7 @@ class NemotronParseForConditionalGeneration(nn.Module, SupportsMultiModal): ...@@ -901,7 +901,7 @@ class NemotronParseForConditionalGeneration(nn.Module, SupportsMultiModal):
def forward( def forward(
self, self,
input_ids: torch.Tensor, input_ids: torch.Tensor | None,
positions: torch.Tensor, positions: torch.Tensor,
encoder_outputs: list[torch.Tensor] | None = None, encoder_outputs: list[torch.Tensor] | None = None,
**kwargs, **kwargs,
......
...@@ -644,7 +644,7 @@ class LlamaNemotronVLChatModel(nn.Module, SupportsMultiModal, SupportsPP, Suppor ...@@ -644,7 +644,7 @@ class LlamaNemotronVLChatModel(nn.Module, SupportsMultiModal, SupportsPP, Suppor
def forward( def forward(
self, self,
input_ids: torch.Tensor, input_ids: torch.Tensor | None,
positions: torch.Tensor, positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None, intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None, inputs_embeds: torch.Tensor | None = None,
...@@ -941,4 +941,4 @@ class LlamaNemotronVLForSequenceClassification( ...@@ -941,4 +941,4 @@ class LlamaNemotronVLForSequenceClassification(
loaded_weights.add(name) loaded_weights.add(name)
return loaded_weights return loaded_weights
\ No newline at end of file
...@@ -271,7 +271,7 @@ class OlmoModel(nn.Module): ...@@ -271,7 +271,7 @@ class OlmoModel(nn.Module):
def forward( def forward(
self, self,
input_ids: torch.Tensor, input_ids: torch.Tensor | None,
positions: torch.Tensor, positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None, intermediate_tensors: IntermediateTensors | None,
inputs_embeds: torch.Tensor | None = None, inputs_embeds: torch.Tensor | None = None,
...@@ -382,7 +382,7 @@ class OlmoForCausalLM(nn.Module, SupportsPP, SupportsLoRA): ...@@ -382,7 +382,7 @@ class OlmoForCausalLM(nn.Module, SupportsPP, SupportsLoRA):
def forward( def forward(
self, self,
input_ids: torch.Tensor, input_ids: torch.Tensor | None,
positions: torch.Tensor, positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None, intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None, inputs_embeds: torch.Tensor | None = None,
......
...@@ -309,7 +309,7 @@ class Olmo2Model(nn.Module): ...@@ -309,7 +309,7 @@ class Olmo2Model(nn.Module):
def forward( def forward(
self, self,
input_ids: torch.Tensor, input_ids: torch.Tensor | None,
positions: torch.Tensor, positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None, intermediate_tensors: IntermediateTensors | None,
inputs_embeds: torch.Tensor | None = None, inputs_embeds: torch.Tensor | None = None,
...@@ -424,7 +424,7 @@ class Olmo2ForCausalLM(nn.Module, SupportsPP, SupportsLoRA): ...@@ -424,7 +424,7 @@ class Olmo2ForCausalLM(nn.Module, SupportsPP, SupportsLoRA):
def forward( def forward(
self, self,
input_ids: torch.Tensor, input_ids: torch.Tensor | None,
positions: torch.Tensor, positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None, intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None, inputs_embeds: torch.Tensor | None = None,
......
...@@ -300,7 +300,7 @@ class OlmoeModel(nn.Module): ...@@ -300,7 +300,7 @@ class OlmoeModel(nn.Module):
def forward( def forward(
self, self,
input_ids: torch.Tensor, input_ids: torch.Tensor | None,
positions: torch.Tensor, positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None, intermediate_tensors: IntermediateTensors | None,
inputs_embeds: torch.Tensor | None = None, inputs_embeds: torch.Tensor | None = None,
...@@ -476,7 +476,7 @@ class OlmoeForCausalLM(nn.Module, SupportsPP, SupportsLoRA): ...@@ -476,7 +476,7 @@ class OlmoeForCausalLM(nn.Module, SupportsPP, SupportsLoRA):
def forward( def forward(
self, self,
input_ids: torch.Tensor, input_ids: torch.Tensor | None,
positions: torch.Tensor, positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None, intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None, inputs_embeds: torch.Tensor | None = None,
......
...@@ -1062,7 +1062,7 @@ class OpenPanguModel(nn.Module): ...@@ -1062,7 +1062,7 @@ class OpenPanguModel(nn.Module):
def forward( def forward(
self, self,
input_ids: torch.Tensor, input_ids: torch.Tensor | None,
positions: torch.Tensor, positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None, intermediate_tensors: IntermediateTensors | None,
inputs_embeds: torch.Tensor | None = None, inputs_embeds: torch.Tensor | None = None,
...@@ -1292,7 +1292,7 @@ class OpenPanguModelBase(nn.Module, SupportsPP, SupportsLoRA): ...@@ -1292,7 +1292,7 @@ class OpenPanguModelBase(nn.Module, SupportsPP, SupportsLoRA):
def forward( def forward(
self, self,
input_ids: torch.Tensor, input_ids: torch.Tensor | None,
positions: torch.Tensor, positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None, intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None, inputs_embeds: torch.Tensor | None = None,
......
...@@ -104,7 +104,7 @@ class OpenPanguMTP(nn.Module): ...@@ -104,7 +104,7 @@ class OpenPanguMTP(nn.Module):
def forward( def forward(
self, self,
input_ids: torch.Tensor, input_ids: torch.Tensor | None,
positions: torch.Tensor, positions: torch.Tensor,
hidden_states: torch.Tensor, hidden_states: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None, intermediate_tensors: IntermediateTensors | None = None,
......
...@@ -267,7 +267,7 @@ class OPTDecoder(nn.Module): ...@@ -267,7 +267,7 @@ class OPTDecoder(nn.Module):
def forward( def forward(
self, self,
input_ids: torch.Tensor, input_ids: torch.Tensor | None,
positions: torch.Tensor, positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None, intermediate_tensors: IntermediateTensors | None,
inputs_embeds: torch.Tensor | None = None, inputs_embeds: torch.Tensor | None = None,
...@@ -316,7 +316,7 @@ class OPTModel(nn.Module): ...@@ -316,7 +316,7 @@ class OPTModel(nn.Module):
def forward( def forward(
self, self,
input_ids: torch.Tensor, input_ids: torch.Tensor | None,
positions: torch.Tensor, positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None, intermediate_tensors: IntermediateTensors | None,
inputs_embeds: torch.Tensor | None = None, inputs_embeds: torch.Tensor | None = None,
...@@ -399,7 +399,7 @@ class OPTForCausalLM(nn.Module, SupportsPP, SupportsLoRA): ...@@ -399,7 +399,7 @@ class OPTForCausalLM(nn.Module, SupportsPP, SupportsLoRA):
def forward( def forward(
self, self,
input_ids: torch.Tensor, input_ids: torch.Tensor | None,
positions: torch.Tensor, positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None, intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None, inputs_embeds: torch.Tensor | None = None,
......
...@@ -253,7 +253,7 @@ class OrionModel(nn.Module): ...@@ -253,7 +253,7 @@ class OrionModel(nn.Module):
def forward( def forward(
self, self,
input_ids: torch.Tensor, input_ids: torch.Tensor | None,
positions: torch.Tensor, positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None, intermediate_tensors: IntermediateTensors | None,
inputs_embeds: torch.Tensor | None = None, inputs_embeds: torch.Tensor | None = None,
...@@ -343,7 +343,7 @@ class OrionForCausalLM(nn.Module, SupportsPP): ...@@ -343,7 +343,7 @@ class OrionForCausalLM(nn.Module, SupportsPP):
def forward( def forward(
self, self,
input_ids: torch.Tensor, input_ids: torch.Tensor | None,
positions: torch.Tensor, positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None, intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None, inputs_embeds: torch.Tensor | None = None,
......
...@@ -357,7 +357,7 @@ class OuroModel(nn.Module): ...@@ -357,7 +357,7 @@ class OuroModel(nn.Module):
def forward( def forward(
self, self,
input_ids: torch.Tensor, input_ids: torch.Tensor | None,
positions: torch.Tensor, positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None, intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None, inputs_embeds: torch.Tensor | None = None,
...@@ -482,7 +482,7 @@ class OuroForCausalLM(nn.Module, SupportsLoRA): ...@@ -482,7 +482,7 @@ class OuroForCausalLM(nn.Module, SupportsLoRA):
def forward( def forward(
self, self,
input_ids: torch.Tensor, input_ids: torch.Tensor | None,
positions: torch.Tensor, positions: torch.Tensor,
intermediate_tensors: IntermediateTensors | None = None, intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None, inputs_embeds: torch.Tensor | None = None,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment