Unverified Commit 5d29530e authored by nakranivaibhav's avatar nakranivaibhav Committed by GitHub
Browse files

Improved type hinting for all attention parameters (#28479)

* Changed type hinting for all attention inputs to 'Optional[Tuple[torch.FloatTensor,...]] = None'

* Fixed the ruff formatting issue

* fixed type hinting for all hidden_states to 'Optional[Tuple[torch.FloatTensor, ...]] = None'

* Changed type hinting in these 12 scripts modeling_dpr.py,modeling_nat.py,idefics/vision.py,modeling_tf_dpr.py,modeling_luke.py,modeling_swin.py,modeling_tf_swin.py,modeling_blip.py,modeling_tf_blip.py,modeling_donut_swin.py,modeling_dinat.py,modeling_swinv2.py

* test fail update

* fixed type hinting for these 15 scripts modeling_xlnet.py,modeling_tf_xlnet.py,modeling_led.py,modeling_tf_led.py,modleing_rwkv.py,modeling_dpt.py,modeling_tf_cvt.py,modeling_clip.py,modeling_flax_clip.py,modeling_tf_clip.py,modeling_longformer.py,modeling_tf_longformer.py,modeling_siglip.py,modeling_clap.py,modeling_git.py

* Changed type hinting in these 12 scripts modeling_dpr.py,modeling_nat.py,idefics/vision.py,modeling_tf_dpr.py,modeling_luke.py,modeling_swin.py,modeling_tf_swin.py,modeling_blip.py,modeling_tf_blip.py,modeling_donut_swin.py,modeling_dinat.py,modeling_swinv2.py

* test fail update

* Removed the myvenv file

* Fixed type hinting for these 8 scripts modeling_tvlt.py,modeling_sam.py,modeling_tf_sam.py,modeling_tvp.py,modeling_rag.py,modeling_tf_rag.py,modeling_tf_xlm.py,modeling_xlm.py
parent 738ec75c
This diff is collapsed.
......@@ -98,8 +98,8 @@ class BlipForConditionalGenerationModelOutput(ModelOutput):
logits: Optional[Tuple[torch.FloatTensor]] = None
image_embeds: Optional[torch.FloatTensor] = None
last_hidden_state: torch.FloatTensor = None
hidden_states: Optional[Tuple[torch.FloatTensor]] = None
attentions: Optional[Tuple[torch.FloatTensor]] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@property
def decoder_logits(self):
......@@ -140,8 +140,8 @@ class BlipTextVisionModelOutput(ModelOutput):
loss: Optional[torch.FloatTensor] = None
image_embeds: Optional[torch.FloatTensor] = None
last_hidden_state: torch.FloatTensor = None
hidden_states: Optional[Tuple[torch.FloatTensor]] = None
attentions: Optional[Tuple[torch.FloatTensor]] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@dataclass
......@@ -181,9 +181,9 @@ class BlipImageTextMatchingModelOutput(ModelOutput):
loss: Optional[torch.FloatTensor] = None
image_embeds: Optional[torch.FloatTensor] = None
last_hidden_state: torch.FloatTensor = None
hidden_states: Optional[Tuple[torch.FloatTensor]] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
vision_pooler_output: Optional[torch.FloatTensor] = None
attentions: Optional[Tuple[torch.FloatTensor]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
question_embeds: Optional[Tuple[torch.FloatTensor]] = None
......
......@@ -108,8 +108,8 @@ class TFBlipForConditionalGenerationModelOutput(ModelOutput):
logits: Tuple[tf.Tensor] | None = None
image_embeds: tf.Tensor | None = None
last_hidden_state: tf.Tensor = None
hidden_states: Tuple[tf.Tensor] | None = None
attentions: Tuple[tf.Tensor] | None = None
hidden_states: Tuple[tf.Tensor, ...] | None = None
attentions: Tuple[tf.Tensor, ...] | None = None
@property
def decoder_logits(self):
......@@ -150,8 +150,8 @@ class TFBlipTextVisionModelOutput(ModelOutput):
loss: tf.Tensor | None = None
image_embeds: tf.Tensor | None = None
last_hidden_state: tf.Tensor = None
hidden_states: Tuple[tf.Tensor] | None = None
attentions: Tuple[tf.Tensor] | None = None
hidden_states: Tuple[tf.Tensor, ...] | None = None
attentions: Tuple[tf.Tensor, ...] | None = None
@dataclass
......@@ -191,9 +191,9 @@ class TFBlipImageTextMatchingModelOutput(ModelOutput):
loss: tf.Tensor | None = None
image_embeds: tf.Tensor | None = None
last_hidden_state: tf.Tensor = None
hidden_states: Tuple[tf.Tensor] | None = None
hidden_states: Tuple[tf.Tensor, ...] | None = None
vision_pooler_output: tf.Tensor | None = None
attentions: Tuple[tf.Tensor] | None = None
attentions: Tuple[tf.Tensor, ...] | None = None
question_embeds: Tuple[tf.Tensor] | None = None
......
......@@ -159,8 +159,8 @@ class ClapTextModelOutput(ModelOutput):
text_embeds: Optional[torch.FloatTensor] = None
last_hidden_state: torch.FloatTensor = None
hidden_states: Optional[Tuple[torch.FloatTensor]] = None
attentions: Optional[Tuple[torch.FloatTensor]] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@dataclass
......@@ -188,8 +188,8 @@ class ClapAudioModelOutput(ModelOutput):
audio_embeds: Optional[torch.FloatTensor] = None
last_hidden_state: torch.FloatTensor = None
hidden_states: Optional[Tuple[torch.FloatTensor]] = None
attentions: Optional[Tuple[torch.FloatTensor]] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@dataclass
......
......@@ -83,8 +83,8 @@ class CLIPVisionModelOutput(ModelOutput):
image_embeds: Optional[torch.FloatTensor] = None
last_hidden_state: torch.FloatTensor = None
hidden_states: Optional[Tuple[torch.FloatTensor]] = None
attentions: Optional[Tuple[torch.FloatTensor]] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@dataclass
......@@ -112,8 +112,8 @@ class CLIPTextModelOutput(ModelOutput):
text_embeds: Optional[torch.FloatTensor] = None
last_hidden_state: torch.FloatTensor = None
hidden_states: Optional[Tuple[torch.FloatTensor]] = None
attentions: Optional[Tuple[torch.FloatTensor]] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@dataclass
......
......@@ -182,8 +182,8 @@ class FlaxCLIPTextModelOutput(ModelOutput):
text_embeds: jnp.ndarray = None
last_hidden_state: jnp.ndarray = None
hidden_states: Optional[Tuple[jnp.ndarray]] = None
attentions: Optional[Tuple[jnp.ndarray]] = None
hidden_states: Optional[Tuple[jnp.ndarray, ...]] = None
attentions: Optional[Tuple[jnp.ndarray, ...]] = None
@flax.struct.dataclass
......
......@@ -74,7 +74,7 @@ class BaseModelOutputWithCLSToken(ModelOutput):
last_hidden_state: torch.FloatTensor = None
cls_token_value: torch.FloatTensor = None
hidden_states: Optional[Tuple[torch.FloatTensor]] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
# Copied from transformers.models.beit.modeling_beit.drop_path
......
......@@ -77,7 +77,7 @@ class TFBaseModelOutputWithCLSToken(ModelOutput):
last_hidden_state: tf.Tensor = None
cls_token_value: tf.Tensor = None
hidden_states: Tuple[tf.Tensor] | None = None
hidden_states: Tuple[tf.Tensor, ...] | None = None
class TFCvtDropPath(tf.keras.layers.Layer):
......
......@@ -105,9 +105,9 @@ class DinatEncoderOutput(ModelOutput):
"""
last_hidden_state: torch.FloatTensor = None
hidden_states: Optional[Tuple[torch.FloatTensor]] = None
attentions: Optional[Tuple[torch.FloatTensor]] = None
reshaped_hidden_states: Optional[Tuple[torch.FloatTensor]] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
reshaped_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
@dataclass
......@@ -142,9 +142,9 @@ class DinatModelOutput(ModelOutput):
last_hidden_state: torch.FloatTensor = None
pooler_output: Optional[torch.FloatTensor] = None
hidden_states: Optional[Tuple[torch.FloatTensor]] = None
attentions: Optional[Tuple[torch.FloatTensor]] = None
reshaped_hidden_states: Optional[Tuple[torch.FloatTensor]] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
reshaped_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
@dataclass
......@@ -179,9 +179,9 @@ class DinatImageClassifierOutput(ModelOutput):
loss: Optional[torch.FloatTensor] = None
logits: torch.FloatTensor = None
hidden_states: Optional[Tuple[torch.FloatTensor]] = None
attentions: Optional[Tuple[torch.FloatTensor]] = None
reshaped_hidden_states: Optional[Tuple[torch.FloatTensor]] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
reshaped_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
# Copied from transformers.models.nat.modeling_nat.NatEmbeddings with Nat->Dinat
......
......@@ -83,9 +83,9 @@ class DonutSwinEncoderOutput(ModelOutput):
"""
last_hidden_state: torch.FloatTensor = None
hidden_states: Optional[Tuple[torch.FloatTensor]] = None
attentions: Optional[Tuple[torch.FloatTensor]] = None
reshaped_hidden_states: Optional[Tuple[torch.FloatTensor]] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
reshaped_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
@dataclass
......@@ -120,9 +120,9 @@ class DonutSwinModelOutput(ModelOutput):
last_hidden_state: torch.FloatTensor = None
pooler_output: Optional[torch.FloatTensor] = None
hidden_states: Optional[Tuple[torch.FloatTensor]] = None
attentions: Optional[Tuple[torch.FloatTensor]] = None
reshaped_hidden_states: Optional[Tuple[torch.FloatTensor]] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
reshaped_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
# Copied from transformers.models.swin.modeling_swin.window_partition
......
......@@ -82,8 +82,8 @@ class DPRContextEncoderOutput(ModelOutput):
"""
pooler_output: torch.FloatTensor
hidden_states: Optional[Tuple[torch.FloatTensor]] = None
attentions: Optional[Tuple[torch.FloatTensor]] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@dataclass
......@@ -110,8 +110,8 @@ class DPRQuestionEncoderOutput(ModelOutput):
"""
pooler_output: torch.FloatTensor
hidden_states: Optional[Tuple[torch.FloatTensor]] = None
attentions: Optional[Tuple[torch.FloatTensor]] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@dataclass
......@@ -143,8 +143,8 @@ class DPRReaderOutput(ModelOutput):
start_logits: torch.FloatTensor
end_logits: torch.FloatTensor = None
relevance_logits: torch.FloatTensor = None
hidden_states: Optional[Tuple[torch.FloatTensor]] = None
attentions: Optional[Tuple[torch.FloatTensor]] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
class DPRPreTrainedModel(PreTrainedModel):
......
......@@ -82,8 +82,8 @@ class TFDPRContextEncoderOutput(ModelOutput):
"""
pooler_output: tf.Tensor = None
hidden_states: Tuple[tf.Tensor] | None = None
attentions: Tuple[tf.Tensor] | None = None
hidden_states: Tuple[tf.Tensor, ...] | None = None
attentions: Tuple[tf.Tensor, ...] | None = None
@dataclass
......@@ -110,8 +110,8 @@ class TFDPRQuestionEncoderOutput(ModelOutput):
"""
pooler_output: tf.Tensor = None
hidden_states: Tuple[tf.Tensor] | None = None
attentions: Tuple[tf.Tensor] | None = None
hidden_states: Tuple[tf.Tensor, ...] | None = None
attentions: Tuple[tf.Tensor, ...] | None = None
@dataclass
......@@ -143,8 +143,8 @@ class TFDPRReaderOutput(ModelOutput):
start_logits: tf.Tensor = None
end_logits: tf.Tensor = None
relevance_logits: tf.Tensor = None
hidden_states: Tuple[tf.Tensor] | None = None
attentions: Tuple[tf.Tensor] | None = None
hidden_states: Tuple[tf.Tensor, ...] | None = None
attentions: Tuple[tf.Tensor, ...] | None = None
class TFDPREncoderLayer(tf.keras.layers.Layer):
......
......@@ -76,7 +76,7 @@ class BaseModelOutputWithIntermediateActivations(ModelOutput):
"""
last_hidden_states: torch.FloatTensor = None
intermediate_activations: Optional[Tuple[torch.FloatTensor]] = None
intermediate_activations: Optional[Tuple[torch.FloatTensor, ...]] = None
@dataclass
......@@ -110,9 +110,9 @@ class BaseModelOutputWithPoolingAndIntermediateActivations(ModelOutput):
last_hidden_state: torch.FloatTensor = None
pooler_output: torch.FloatTensor = None
hidden_states: Optional[Tuple[torch.FloatTensor]] = None
attentions: Optional[Tuple[torch.FloatTensor]] = None
intermediate_activations: Optional[Tuple[torch.FloatTensor]] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
intermediate_activations: Optional[Tuple[torch.FloatTensor, ...]] = None
class DPTViTHybridEmbeddings(nn.Module):
......
......@@ -77,8 +77,8 @@ class GitVisionModelOutput(ModelOutput):
image_embeds: Optional[torch.FloatTensor] = None
last_hidden_state: torch.FloatTensor = None
hidden_states: Optional[Tuple[torch.FloatTensor]] = None
attentions: Optional[Tuple[torch.FloatTensor]] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
class GitEmbeddings(nn.Module):
......
......@@ -57,8 +57,8 @@ class IdeficsVisionModelOutput(ModelOutput):
image_embeds: Optional[torch.FloatTensor] = None
last_hidden_state: torch.FloatTensor = None
hidden_states: Optional[Tuple[torch.FloatTensor]] = None
attentions: Optional[Tuple[torch.FloatTensor]] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
# Adapted from transformers.models.clip.modeling_clip.CLIPVisionEmbeddings
......
......@@ -1191,9 +1191,9 @@ class LEDEncoderBaseModelOutput(ModelOutput):
"""
last_hidden_state: torch.FloatTensor
hidden_states: Optional[Tuple[torch.FloatTensor]] = None
attentions: Optional[Tuple[torch.FloatTensor]] = None
global_attentions: Optional[Tuple[torch.FloatTensor]] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
global_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@dataclass
......@@ -1255,13 +1255,13 @@ class LEDSeq2SeqModelOutput(ModelOutput):
last_hidden_state: torch.FloatTensor = None
past_key_values: Optional[List[torch.FloatTensor]] = None
decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None
decoder_attentions: Optional[Tuple[torch.FloatTensor]] = None
cross_attentions: Optional[Tuple[torch.FloatTensor]] = None
decoder_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
decoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
cross_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
encoder_last_hidden_state: Optional[torch.FloatTensor] = None
encoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None
encoder_attentions: Optional[Tuple[torch.FloatTensor]] = None
encoder_global_attentions: Optional[Tuple[torch.FloatTensor]] = None
encoder_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
encoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
encoder_global_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@dataclass
......@@ -1322,13 +1322,13 @@ class LEDSeq2SeqLMOutput(ModelOutput):
loss: Optional[torch.FloatTensor] = None
logits: torch.FloatTensor = None
past_key_values: Optional[List[torch.FloatTensor]] = None
decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None
decoder_attentions: Optional[Tuple[torch.FloatTensor]] = None
cross_attentions: Optional[Tuple[torch.FloatTensor]] = None
decoder_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
decoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
cross_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
encoder_last_hidden_state: Optional[torch.FloatTensor] = None
encoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None
encoder_attentions: Optional[Tuple[torch.FloatTensor]] = None
encoder_global_attentions: Optional[Tuple[torch.FloatTensor]] = None
encoder_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
encoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
encoder_global_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@dataclass
......@@ -1389,13 +1389,13 @@ class LEDSeq2SeqSequenceClassifierOutput(ModelOutput):
loss: Optional[torch.FloatTensor] = None
logits: torch.FloatTensor = None
past_key_values: Optional[List[torch.FloatTensor]] = None
decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None
decoder_attentions: Optional[Tuple[torch.FloatTensor]] = None
cross_attentions: Optional[Tuple[torch.FloatTensor]] = None
decoder_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
decoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
cross_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
encoder_last_hidden_state: Optional[torch.FloatTensor] = None
encoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None
encoder_attentions: Optional[Tuple[torch.FloatTensor]] = None
encoder_global_attentions: Optional[Tuple[torch.FloatTensor]] = None
encoder_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
encoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
encoder_global_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@dataclass
......@@ -1459,13 +1459,13 @@ class LEDSeq2SeqQuestionAnsweringModelOutput(ModelOutput):
start_logits: torch.FloatTensor = None
end_logits: torch.FloatTensor = None
past_key_values: Optional[List[torch.FloatTensor]] = None
decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None
decoder_attentions: Optional[Tuple[torch.FloatTensor]] = None
cross_attentions: Optional[Tuple[torch.FloatTensor]] = None
decoder_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
decoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
cross_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
encoder_last_hidden_state: Optional[torch.FloatTensor] = None
encoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None
encoder_attentions: Optional[Tuple[torch.FloatTensor]] = None
encoder_global_attentions: Optional[Tuple[torch.FloatTensor]] = None
encoder_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
encoder_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
encoder_global_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
LED_START_DOCSTRING = r"""
......
......@@ -1471,9 +1471,9 @@ class TFLEDEncoderBaseModelOutput(ModelOutput):
"""
last_hidden_state: tf.Tensor = None
hidden_states: Tuple[tf.Tensor] | None = None
attentions: Tuple[tf.Tensor] | None = None
global_attentions: Tuple[tf.Tensor] | None = None
hidden_states: Tuple[tf.Tensor, ...] | None = None
attentions: Tuple[tf.Tensor, ...] | None = None
global_attentions: Tuple[tf.Tensor, ...] | None = None
@dataclass
......@@ -1535,13 +1535,13 @@ class TFLEDSeq2SeqModelOutput(ModelOutput):
last_hidden_state: tf.Tensor = None
past_key_values: List[tf.Tensor] | None = None
decoder_hidden_states: Tuple[tf.Tensor] | None = None
decoder_attentions: Tuple[tf.Tensor] | None = None
cross_attentions: Tuple[tf.Tensor] | None = None
decoder_hidden_states: Tuple[tf.Tensor, ...] | None = None
decoder_attentions: Tuple[tf.Tensor, ...] | None = None
cross_attentions: Tuple[tf.Tensor, ...] | None = None
encoder_last_hidden_state: tf.Tensor | None = None
encoder_hidden_states: Tuple[tf.Tensor] | None = None
encoder_attentions: Tuple[tf.Tensor] | None = None
encoder_global_attentions: Tuple[tf.Tensor] | None = None
encoder_hidden_states: Tuple[tf.Tensor, ...] | None = None
encoder_attentions: Tuple[tf.Tensor, ...] | None = None
encoder_global_attentions: Tuple[tf.Tensor, ...] | None = None
@dataclass
......@@ -1602,13 +1602,13 @@ class TFLEDSeq2SeqLMOutput(ModelOutput):
loss: tf.Tensor | None = None
logits: tf.Tensor = None
past_key_values: List[tf.Tensor] | None = None
decoder_hidden_states: Tuple[tf.Tensor] | None = None
decoder_attentions: Tuple[tf.Tensor] | None = None
cross_attentions: Tuple[tf.Tensor] | None = None
decoder_hidden_states: Tuple[tf.Tensor, ...] | None = None
decoder_attentions: Tuple[tf.Tensor, ...] | None = None
cross_attentions: Tuple[tf.Tensor, ...] | None = None
encoder_last_hidden_state: tf.Tensor | None = None
encoder_hidden_states: Tuple[tf.Tensor] | None = None
encoder_attentions: Tuple[tf.Tensor] | None = None
encoder_global_attentions: Tuple[tf.Tensor] | None = None
encoder_hidden_states: Tuple[tf.Tensor, ...] | None = None
encoder_attentions: Tuple[tf.Tensor, ...] | None = None
encoder_global_attentions: Tuple[tf.Tensor, ...] | None = None
LED_START_DOCSTRING = r"""
......
......@@ -90,9 +90,9 @@ class LongformerBaseModelOutput(ModelOutput):
"""
last_hidden_state: torch.FloatTensor
hidden_states: Optional[Tuple[torch.FloatTensor]] = None
attentions: Optional[Tuple[torch.FloatTensor]] = None
global_attentions: Optional[Tuple[torch.FloatTensor]] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
global_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@dataclass
......@@ -138,9 +138,9 @@ class LongformerBaseModelOutputWithPooling(ModelOutput):
last_hidden_state: torch.FloatTensor
pooler_output: torch.FloatTensor = None
hidden_states: Optional[Tuple[torch.FloatTensor]] = None
attentions: Optional[Tuple[torch.FloatTensor]] = None
global_attentions: Optional[Tuple[torch.FloatTensor]] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
global_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@dataclass
......@@ -184,9 +184,9 @@ class LongformerMaskedLMOutput(ModelOutput):
loss: Optional[torch.FloatTensor] = None
logits: torch.FloatTensor = None
hidden_states: Optional[Tuple[torch.FloatTensor]] = None
attentions: Optional[Tuple[torch.FloatTensor]] = None
global_attentions: Optional[Tuple[torch.FloatTensor]] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
global_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@dataclass
......@@ -233,9 +233,9 @@ class LongformerQuestionAnsweringModelOutput(ModelOutput):
loss: Optional[torch.FloatTensor] = None
start_logits: torch.FloatTensor = None
end_logits: torch.FloatTensor = None
hidden_states: Optional[Tuple[torch.FloatTensor]] = None
attentions: Optional[Tuple[torch.FloatTensor]] = None
global_attentions: Optional[Tuple[torch.FloatTensor]] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
global_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@dataclass
......@@ -279,9 +279,9 @@ class LongformerSequenceClassifierOutput(ModelOutput):
loss: Optional[torch.FloatTensor] = None
logits: torch.FloatTensor = None
hidden_states: Optional[Tuple[torch.FloatTensor]] = None
attentions: Optional[Tuple[torch.FloatTensor]] = None
global_attentions: Optional[Tuple[torch.FloatTensor]] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
global_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@dataclass
......@@ -327,9 +327,9 @@ class LongformerMultipleChoiceModelOutput(ModelOutput):
loss: Optional[torch.FloatTensor] = None
logits: torch.FloatTensor = None
hidden_states: Optional[Tuple[torch.FloatTensor]] = None
attentions: Optional[Tuple[torch.FloatTensor]] = None
global_attentions: Optional[Tuple[torch.FloatTensor]] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
global_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@dataclass
......@@ -373,9 +373,9 @@ class LongformerTokenClassifierOutput(ModelOutput):
loss: Optional[torch.FloatTensor] = None
logits: torch.FloatTensor = None
hidden_states: Optional[Tuple[torch.FloatTensor]] = None
attentions: Optional[Tuple[torch.FloatTensor]] = None
global_attentions: Optional[Tuple[torch.FloatTensor]] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
global_attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
def _get_question_end_index(input_ids, sep_token_id):
......
......@@ -103,9 +103,9 @@ class TFLongformerBaseModelOutput(ModelOutput):
"""
last_hidden_state: tf.Tensor = None
hidden_states: Tuple[tf.Tensor] | None = None
attentions: Tuple[tf.Tensor] | None = None
global_attentions: Tuple[tf.Tensor] | None = None
hidden_states: Tuple[tf.Tensor, ...] | None = None
attentions: Tuple[tf.Tensor, ...] | None = None
global_attentions: Tuple[tf.Tensor, ...] | None = None
@dataclass
......@@ -151,9 +151,9 @@ class TFLongformerBaseModelOutputWithPooling(ModelOutput):
last_hidden_state: tf.Tensor = None
pooler_output: tf.Tensor = None
hidden_states: Tuple[tf.Tensor] | None = None
attentions: Tuple[tf.Tensor] | None = None
global_attentions: Tuple[tf.Tensor] | None = None
hidden_states: Tuple[tf.Tensor, ...] | None = None
attentions: Tuple[tf.Tensor, ...] | None = None
global_attentions: Tuple[tf.Tensor, ...] | None = None
@dataclass
......@@ -197,9 +197,9 @@ class TFLongformerMaskedLMOutput(ModelOutput):
loss: tf.Tensor | None = None
logits: tf.Tensor = None
hidden_states: Tuple[tf.Tensor] | None = None
attentions: Tuple[tf.Tensor] | None = None
global_attentions: Tuple[tf.Tensor] | None = None
hidden_states: Tuple[tf.Tensor, ...] | None = None
attentions: Tuple[tf.Tensor, ...] | None = None
global_attentions: Tuple[tf.Tensor, ...] | None = None
@dataclass
......@@ -246,9 +246,9 @@ class TFLongformerQuestionAnsweringModelOutput(ModelOutput):
loss: tf.Tensor | None = None
start_logits: tf.Tensor = None
end_logits: tf.Tensor = None
hidden_states: Tuple[tf.Tensor] | None = None
attentions: Tuple[tf.Tensor] | None = None
global_attentions: Tuple[tf.Tensor] | None = None
hidden_states: Tuple[tf.Tensor, ...] | None = None
attentions: Tuple[tf.Tensor, ...] | None = None
global_attentions: Tuple[tf.Tensor, ...] | None = None
@dataclass
......@@ -292,9 +292,9 @@ class TFLongformerSequenceClassifierOutput(ModelOutput):
loss: tf.Tensor | None = None
logits: tf.Tensor = None
hidden_states: Tuple[tf.Tensor] | None = None
attentions: Tuple[tf.Tensor] | None = None
global_attentions: Tuple[tf.Tensor] | None = None
hidden_states: Tuple[tf.Tensor, ...] | None = None
attentions: Tuple[tf.Tensor, ...] | None = None
global_attentions: Tuple[tf.Tensor, ...] | None = None
@dataclass
......@@ -340,9 +340,9 @@ class TFLongformerMultipleChoiceModelOutput(ModelOutput):
loss: tf.Tensor | None = None
logits: tf.Tensor = None
hidden_states: Tuple[tf.Tensor] | None = None
attentions: Tuple[tf.Tensor] | None = None
global_attentions: Tuple[tf.Tensor] | None = None
hidden_states: Tuple[tf.Tensor, ...] | None = None
attentions: Tuple[tf.Tensor, ...] | None = None
global_attentions: Tuple[tf.Tensor, ...] | None = None
@dataclass
......@@ -386,9 +386,9 @@ class TFLongformerTokenClassifierOutput(ModelOutput):
loss: tf.Tensor | None = None
logits: tf.Tensor = None
hidden_states: Tuple[tf.Tensor] | None = None
attentions: Tuple[tf.Tensor] | None = None
global_attentions: Tuple[tf.Tensor] | None = None
hidden_states: Tuple[tf.Tensor, ...] | None = None
attentions: Tuple[tf.Tensor, ...] | None = None
global_attentions: Tuple[tf.Tensor, ...] | None = None
def _compute_global_attention_mask(input_ids_shape, sep_token_indices, before_sep_token=True):
......
......@@ -78,7 +78,7 @@ class BaseLukeModelOutputWithPooling(BaseModelOutputWithPooling):
"""
entity_last_hidden_state: torch.FloatTensor = None
entity_hidden_states: Optional[Tuple[torch.FloatTensor]] = None
entity_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
@dataclass
......@@ -109,7 +109,7 @@ class BaseLukeModelOutput(BaseModelOutput):
"""
entity_last_hidden_state: torch.FloatTensor = None
entity_hidden_states: Optional[Tuple[torch.FloatTensor]] = None
entity_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
@dataclass
......@@ -151,8 +151,8 @@ class LukeMaskedLMOutput(ModelOutput):
logits: torch.FloatTensor = None
entity_logits: torch.FloatTensor = None
hidden_states: Optional[Tuple[torch.FloatTensor]] = None
entity_hidden_states: Optional[Tuple[torch.FloatTensor]] = None
attentions: Optional[Tuple[torch.FloatTensor]] = None
entity_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@dataclass
......@@ -181,9 +181,9 @@ class EntityClassificationOutput(ModelOutput):
loss: Optional[torch.FloatTensor] = None
logits: torch.FloatTensor = None
hidden_states: Optional[Tuple[torch.FloatTensor]] = None
entity_hidden_states: Optional[Tuple[torch.FloatTensor]] = None
attentions: Optional[Tuple[torch.FloatTensor]] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
entity_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@dataclass
......@@ -212,9 +212,9 @@ class EntityPairClassificationOutput(ModelOutput):
loss: Optional[torch.FloatTensor] = None
logits: torch.FloatTensor = None
hidden_states: Optional[Tuple[torch.FloatTensor]] = None
entity_hidden_states: Optional[Tuple[torch.FloatTensor]] = None
attentions: Optional[Tuple[torch.FloatTensor]] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
entity_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@dataclass
......@@ -243,9 +243,9 @@ class EntitySpanClassificationOutput(ModelOutput):
loss: Optional[torch.FloatTensor] = None
logits: torch.FloatTensor = None
hidden_states: Optional[Tuple[torch.FloatTensor]] = None
entity_hidden_states: Optional[Tuple[torch.FloatTensor]] = None
attentions: Optional[Tuple[torch.FloatTensor]] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
entity_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@dataclass
......@@ -277,9 +277,9 @@ class LukeSequenceClassifierOutput(ModelOutput):
loss: Optional[torch.FloatTensor] = None
logits: torch.FloatTensor = None
hidden_states: Optional[Tuple[torch.FloatTensor]] = None
entity_hidden_states: Optional[Tuple[torch.FloatTensor]] = None
attentions: Optional[Tuple[torch.FloatTensor]] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
entity_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@dataclass
......@@ -311,9 +311,9 @@ class LukeTokenClassifierOutput(ModelOutput):
loss: Optional[torch.FloatTensor] = None
logits: torch.FloatTensor = None
hidden_states: Optional[Tuple[torch.FloatTensor]] = None
entity_hidden_states: Optional[Tuple[torch.FloatTensor]] = None
attentions: Optional[Tuple[torch.FloatTensor]] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
entity_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@dataclass
......@@ -348,9 +348,9 @@ class LukeQuestionAnsweringModelOutput(ModelOutput):
loss: Optional[torch.FloatTensor] = None
start_logits: torch.FloatTensor = None
end_logits: torch.FloatTensor = None
hidden_states: Optional[Tuple[torch.FloatTensor]] = None
entity_hidden_states: Optional[Tuple[torch.FloatTensor]] = None
attentions: Optional[Tuple[torch.FloatTensor]] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
entity_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
@dataclass
......@@ -384,9 +384,9 @@ class LukeMultipleChoiceModelOutput(ModelOutput):
loss: Optional[torch.FloatTensor] = None
logits: torch.FloatTensor = None
hidden_states: Optional[Tuple[torch.FloatTensor]] = None
entity_hidden_states: Optional[Tuple[torch.FloatTensor]] = None
attentions: Optional[Tuple[torch.FloatTensor]] = None
hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
entity_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
class LukeEmbeddings(nn.Module):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment