Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
b29fd697
Unverified
Commit
b29fd697
authored
Mar 28, 2023
by
Joao Gante
Committed by
GitHub
Mar 28, 2023
Browse files
MBart: Fix docs and doctests (#22422)
Fix docs and doctests
parent
ae5fc2db
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
26 additions
and
17 deletions
+26
-17
src/transformers/models/mbart/modeling_mbart.py
src/transformers/models/mbart/modeling_mbart.py
+3
-2
src/transformers/models/mbart/modeling_tf_mbart.py
src/transformers/models/mbart/modeling_tf_mbart.py
+21
-14
utils/documentation_tests.txt
utils/documentation_tests.txt
+2
-1
No files found.
src/transformers/models/mbart/modeling_mbart.py
View file @
b29fd697
...
...
@@ -561,7 +561,7 @@ MBART_GENERATION_EXAMPLE = r"""
>>> inputs = tokenizer(example_english_phrase, return_tensors="pt")
>>> # Translate
>>> generated_ids = model.generate(inputs
["input_ids"]
, num_beams=4, max_length=5)
>>> generated_ids = model.generate(
**
inputs, num_beams=4, max_length=5)
>>> tokenizer.batch_decode(generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
'42 este răspuns'
```
...
...
@@ -1266,7 +1266,8 @@ class MBartModel(MBartPreTrainedModel):
@
add_start_docstrings
(
"The MBART Model with a language modeling head. Can be used for summarization."
,
MBART_START_DOCSTRING
"The MBART Model with a language modeling head. Can be used for summarization, after fine-tuning the pretrained models."
,
MBART_START_DOCSTRING
,
)
class
MBartForConditionalGeneration
(
MBartPreTrainedModel
):
base_model_prefix
=
"model"
...
...
src/transformers/models/mbart/modeling_tf_mbart.py
View file @
b29fd697
...
...
@@ -619,37 +619,44 @@ MBART_INPUTS_DOCSTRING = r"""
"""
MBART_GENERATION_EXAMPLE
=
r
"""
Summariz
ation example:
Transl
ation example:
```python
>>> from transformers import AutoTokenizer, TFMBartForConditionalGeneration
, MBartConfig
>>> from transformers import AutoTokenizer, TFMBartForConditionalGeneration
>>> model = TFMBartForConditionalGeneration.from_pretrained("facebook/mbart-large-
cc25
")
>>> tokenizer = AutoTokenizer.from_pretrained("facebook/mbart-large-
cc25
")
>>> model = TFMBartForConditionalGeneration.from_pretrained("facebook/mbart-large-
en-ro
")
>>> tokenizer = AutoTokenizer.from_pretrained("facebook/mbart-large-
en-ro
")
>>>
ARTICLE_TO_SUMMARIZE = "Meine Freunde sind cool, aber sie essen zu viel Kuchen.
"
>>> inputs = tokenizer(
[ARTICLE_TO_SUMMARIZE], max_length=1024
, return_tensors="tf")
>>>
example_english_phrase = "42 is the answer
"
>>> inputs = tokenizer(
example_english_phrase
, return_tensors="tf")
>>> # Generate Summary
>>> summary_ids = model.generate(inputs["input_ids"], num_beams=4, max_length=5)
>>> print(tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False))
>>> # Translate
>>> generated_ids = model.generate(**inputs, num_beams=4, max_length=5)
>>> tokenizer.batch_decode(generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
'42 este răspuns'
```
Mask filling example:
```python
>>> from transformers import AutoTokenizer, TFMBartForConditionalGeneration
>>> import tensorflow as tf
>>> model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-cc25")
>>> model =
TF
MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-cc25")
>>> tokenizer = AutoTokenizer.from_pretrained("facebook/mbart-large-cc25")
>>> # de_DE is the language symbol id <LID> for German
>>> TXT = "</s> Meine Freunde sind <mask> nett aber sie essen zu viel Kuchen. </s> de_DE"
>>> input_ids = tokenizer([TXT], add_special_tokens=False, return_tensors="tf")["input_ids"]
>>> input_ids = tokenizer([TXT], add_special_tokens=False, return_tensors="tf")["input_ids"]
>>> logits = model(input_ids).logits
>>> probs = tf.nn.softmax(logits[0])
>>> # probs[5] is associated with the mask token
>>> masked_index = tf.where(input_ids[0] == tokenizer.mask_token_id)[0, 0]
>>> probs = tf.nn.softmax(logits[0, masked_index], axis=0)
>>> values, predictions = tf.math.top_k(probs, 5)
>>> tokenizer.decode(predictions).split()
['nett', 'sehr', 'ganz', 'nicht', 'so']
```
"""
...
...
@@ -1299,7 +1306,7 @@ class BiasLayer(tf.keras.layers.Layer):
@
add_start_docstrings
(
"The MBART Model with a language modeling head. Can be used for summarization."
,
"The MBART Model with a language modeling head. Can be used for summarization
, after fine-tuning the pretrained models
."
,
MBART_START_DOCSTRING
,
)
class
TFMBartForConditionalGeneration
(
TFMBartPreTrainedModel
,
TFCausalLanguageModelingLoss
):
...
...
utils/documentation_tests.txt
View file @
b29fd697
...
...
@@ -120,6 +120,7 @@ src/transformers/models/maskformer/configuration_maskformer.py
src/transformers/models/maskformer/modeling_maskformer.py
src/transformers/models/mbart/configuration_mbart.py
src/transformers/models/mbart/modeling_mbart.py
src/transformers/models/mbart/modeling_tf_mbart.py
src/transformers/models/mctct/configuration_mctct.py
src/transformers/models/megatron_bert/configuration_megatron_bert.py
src/transformers/models/mobilebert/configuration_mobilebert.py
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment