"vscode:/vscode.git/clone" did not exist on "6f257bb3c241eabf7eb970240a92fdbf6ff2de49"
Unverified Commit 38ddab10 authored by Karim Foda's avatar Karim Foda Committed by GitHub
Browse files

Doctest longformer (#16441)



* Add initial doctring changes

* make fixup

* Add TF doc changes

* fix seq classifier output

* fix quality errors

* t

* swithc head to random init

* Fix expected outputs

* Update src/transformers/models/longformer/modeling_longformer.py
Co-authored-by: default avatarYih-Dar <2521628+ydshieh@users.noreply.github.com>
Co-authored-by: default avatarYih-Dar <2521628+ydshieh@users.noreply.github.com>
parent 10704e12
...@@ -1782,23 +1782,31 @@ class LongformerForMaskedLM(LongformerPreTrainedModel): ...@@ -1782,23 +1782,31 @@ class LongformerForMaskedLM(LongformerPreTrainedModel):
Returns: Returns:
Examples: Mask filling example:
```python ```python
>>> import torch >>> from transformers import LongformerTokenizer, LongformerForMaskedLM
>>> from transformers import LongformerForMaskedLM, LongformerTokenizer
>>> model = LongformerForMaskedLM.from_pretrained("allenai/longformer-base-4096")
>>> tokenizer = LongformerTokenizer.from_pretrained("allenai/longformer-base-4096") >>> tokenizer = LongformerTokenizer.from_pretrained("allenai/longformer-base-4096")
>>> model = LongformerForMaskedLM.from_pretrained("allenai/longformer-base-4096")
```
>>> SAMPLE_TEXT = " ".join(["Hello world! "] * 1000) # long input document Let's try a very long input.
>>> input_ids = torch.tensor(tokenizer.encode(SAMPLE_TEXT)).unsqueeze(0) # batch of size 1
>>> attention_mask = None # default is local attention everywhere, which is a good choice for MaskedLM ```python
>>> # check `LongformerModel.forward` for more details how to set *attention_mask* >>> TXT = (
>>> outputs = model(input_ids, attention_mask=attention_mask, labels=input_ids) ... "My friends are <mask> but they eat too many carbs."
>>> loss = outputs.loss ... + " That's why I decide not to eat with them." * 300
>>> prediction_logits = outputs.logits ... )
>>> input_ids = tokenizer([TXT], return_tensors="pt")["input_ids"]
>>> logits = model(input_ids).logits
>>> masked_index = (input_ids[0] == tokenizer.mask_token_id).nonzero().item()
>>> probs = logits[0, masked_index].softmax(dim=0)
>>> values, predictions = probs.topk(5)
>>> tokenizer.decode(predictions).split()
['healthy', 'skinny', 'thin', 'good', 'vegetarian']
```""" ```"""
return_dict = return_dict if return_dict is not None else self.config.use_return_dict return_dict = return_dict if return_dict is not None else self.config.use_return_dict
...@@ -1860,9 +1868,11 @@ class LongformerForSequenceClassification(LongformerPreTrainedModel): ...@@ -1860,9 +1868,11 @@ class LongformerForSequenceClassification(LongformerPreTrainedModel):
@add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC, processor_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint="jpelhaw/longformer-base-plagiarism-detection",
output_type=LongformerSequenceClassifierOutput, output_type=LongformerSequenceClassifierOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
expected_output="'ORIGINAL'",
expected_loss=5.44,
) )
def forward( def forward(
self, self,
...@@ -2127,9 +2137,11 @@ class LongformerForTokenClassification(LongformerPreTrainedModel): ...@@ -2127,9 +2137,11 @@ class LongformerForTokenClassification(LongformerPreTrainedModel):
@add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC, processor_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint="brad1141/Longformer-finetuned-norm",
output_type=LongformerTokenClassifierOutput, output_type=LongformerTokenClassifierOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
expected_output="['Evidence', 'Evidence', 'Evidence', 'Evidence', 'Evidence', 'Evidence', 'Evidence', 'Evidence', 'Evidence', 'Evidence', 'Evidence', 'Evidence']",
expected_loss=0.63,
) )
def forward( def forward(
self, self,
......
...@@ -2102,10 +2102,12 @@ class TFLongformerForMaskedLM(TFLongformerPreTrainedModel, TFMaskedLanguageModel ...@@ -2102,10 +2102,12 @@ class TFLongformerForMaskedLM(TFLongformerPreTrainedModel, TFMaskedLanguageModel
@add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC, processor_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint="allenai/longformer-base-4096",
output_type=TFLongformerMaskedLMOutput, output_type=TFLongformerMaskedLMOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
mask="<mask>", mask="<mask>",
expected_output="' Paris'",
expected_loss=0.44,
) )
def call( def call(
self, self,
...@@ -2198,6 +2200,8 @@ class TFLongformerForQuestionAnswering(TFLongformerPreTrainedModel, TFQuestionAn ...@@ -2198,6 +2200,8 @@ class TFLongformerForQuestionAnswering(TFLongformerPreTrainedModel, TFQuestionAn
checkpoint="allenai/longformer-large-4096-finetuned-triviaqa", checkpoint="allenai/longformer-large-4096-finetuned-triviaqa",
output_type=TFLongformerQuestionAnsweringModelOutput, output_type=TFLongformerQuestionAnsweringModelOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
expected_output="' puppet'",
expected_loss=0.96,
) )
def call( def call(
self, self,
...@@ -2344,9 +2348,11 @@ class TFLongformerForSequenceClassification(TFLongformerPreTrainedModel, TFSeque ...@@ -2344,9 +2348,11 @@ class TFLongformerForSequenceClassification(TFLongformerPreTrainedModel, TFSeque
@add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC, processor_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint="hf-internal-testing/tiny-random-longformer",
output_type=TFLongformerSequenceClassifierOutput, output_type=TFLongformerSequenceClassifierOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
expected_output="'LABEL_1'",
expected_loss=0.69,
) )
def call( def call(
self, self,
...@@ -2582,9 +2588,11 @@ class TFLongformerForTokenClassification(TFLongformerPreTrainedModel, TFTokenCla ...@@ -2582,9 +2588,11 @@ class TFLongformerForTokenClassification(TFLongformerPreTrainedModel, TFTokenCla
@add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(LONGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
processor_class=_TOKENIZER_FOR_DOC, processor_class=_TOKENIZER_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint="hf-internal-testing/tiny-random-longformer",
output_type=TFLongformerTokenClassifierOutput, output_type=TFLongformerTokenClassifierOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
expected_output="['LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1', 'LABEL_1']",
expected_loss=0.59,
) )
def call( def call(
self, self,
......
...@@ -31,6 +31,8 @@ src/transformers/models/glpn/modeling_glpn.py ...@@ -31,6 +31,8 @@ src/transformers/models/glpn/modeling_glpn.py
src/transformers/models/gpt2/modeling_gpt2.py src/transformers/models/gpt2/modeling_gpt2.py
src/transformers/models/gptj/modeling_gptj.py src/transformers/models/gptj/modeling_gptj.py
src/transformers/models/hubert/modeling_hubert.py src/transformers/models/hubert/modeling_hubert.py
src/transformers/models/longformer/modeling_longformer.py
src/transformers/models/longformer/modeling_tf_longformer.py
src/transformers/models/marian/modeling_marian.py src/transformers/models/marian/modeling_marian.py
src/transformers/models/mbart/modeling_mbart.py src/transformers/models/mbart/modeling_mbart.py
src/transformers/models/mobilebert/modeling_mobilebert.py src/transformers/models/mobilebert/modeling_mobilebert.py
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment