Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
03835af7
Unverified
Commit
03835af7
authored
Oct 05, 2020
by
Sylvain Gugger
Committed by
GitHub
Oct 05, 2020
Browse files
Documentation fixes (#7585)
parent
9cf7b23b
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
39 additions
and
25 deletions
+39
-25
src/transformers/configuration_utils.py
src/transformers/configuration_utils.py
+2
-2
src/transformers/file_utils.py
src/transformers/file_utils.py
+34
-23
src/transformers/modeling_funnel.py
src/transformers/modeling_funnel.py
+1
-0
src/transformers/modeling_roberta.py
src/transformers/modeling_roberta.py
+1
-0
src/transformers/modeling_xlm.py
src/transformers/modeling_xlm.py
+1
-0
No files found.
src/transformers/configuration_utils.py
View file @
03835af7
...
...
@@ -114,8 +114,8 @@ class PretrainedConfig(object):
model pretrained weights.
- **finetuning_task** (:obj:`str`, `optional`) -- Name of the task used to fine-tune the model. This can be
used when converting from an original (TensorFlow or PyTorch) checkpoint.
- **id2label** (:obj:`
List[
str]`, `optional`) -- A map from index (for instance prediction index, or
target
index) to label.
- **id2label** (:obj:`
Dict[int,
str]`, `optional`) -- A map from index (for instance prediction index, or
target
index) to label.
- **label2id** (:obj:`Dict[str, int]`, `optional`) -- A map from label to index for the model.
- **num_labels** (:obj:`int`, `optional`) -- Number of labels to use in the last layer added to the model,
typically for a classification task.
...
...
src/transformers/file_utils.py
View file @
03835af7
...
...
@@ -455,11 +455,12 @@ PT_MASKED_LM_SAMPLE = r"""
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
>>> model = {model_class}.from_pretrained('{checkpoint}', return_dict=True)
>>> input_ids = tokenizer("Hello, my dog is cute", return_tensors="pt")["input_ids"]
>>> inputs = tokenizer("The capital of France is {mask}.", return_tensors="pt")
>>> labels = tokenizer("The capital of France is Paris.", return_tensors="pt")["input_ids"]
>>> outputs = model(input
_id
s, labels=
input_id
s)
>>> outputs = model(
**
inputs, labels=
label
s)
>>> loss = outputs.loss
>>>
prediction_
logits = outputs.logits
>>> logits = outputs.logits
"""
PT_BASE_MODEL_SAMPLE
=
r
"""
...
...
@@ -521,14 +522,15 @@ TF_TOKEN_CLASSIFICATION_SAMPLE = r"""
>>> import tensorflow as tf
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
>>> model = {model_class}.from_pretrained('{checkpoint}')
>>> model = {model_class}.from_pretrained('{checkpoint}'
, return_dict=True)
)
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="tf")
>>> input_ids = inputs["input_ids"]
>>> inputs["labels"] = tf.reshape(tf.constant([1] * tf.size(input_ids).numpy()), (-1, tf.size(input_ids))) # Batch size 1
>>> outputs = model(inputs)
>>> loss, scores = outputs[:2]
>>> loss = outputs.loss
>>> logits = outputs.logits
"""
TF_QUESTION_ANSWERING_SAMPLE
=
r
"""
...
...
@@ -538,14 +540,16 @@ TF_QUESTION_ANSWERING_SAMPLE = r"""
>>> import tensorflow as tf
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
>>> model = {model_class}.from_pretrained('{checkpoint}')
>>> model = {model_class}.from_pretrained('{checkpoint}'
, return_dict=True)
)
>>> question, text = "Who was Jim Henson?", "Jim Henson was a nice puppet"
>>> input_dict = tokenizer(question, text, return_tensors='tf')
>>> start_scores, end_scores = model(input_dict)
>>> outputs = model(input_dict)
>>> start_logits = outputs.start_logits
>>> end_logits = outputs.end_logits
>>> all_tokens = tokenizer.convert_ids_to_tokens(input_dict["input_ids"].numpy()[0])
>>> answer = ' '.join(all_tokens[tf.math.argmax(start_
score
s, 1)[0] : tf.math.argmax(end_
score
s, 1)[0]+1])
>>> answer = ' '.join(all_tokens[tf.math.argmax(start_
logit
s, 1)[0] : tf.math.argmax(end_
logit
s, 1)[0]+1])
"""
TF_SEQUENCE_CLASSIFICATION_SAMPLE
=
r
"""
...
...
@@ -555,13 +559,14 @@ TF_SEQUENCE_CLASSIFICATION_SAMPLE = r"""
>>> import tensorflow as tf
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
>>> model = {model_class}.from_pretrained('{checkpoint}')
>>> model = {model_class}.from_pretrained('{checkpoint}'
, return_dict=True)
)
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="tf")
>>> inputs["labels"] = tf.reshape(tf.constant(1), (-1, 1)) # Batch size 1
>>> outputs = model(inputs)
>>> loss, logits = outputs[:2]
>>> loss = outputs.loss
>>> logits = outputs.logits
"""
TF_MASKED_LM_SAMPLE
=
r
"""
...
...
@@ -571,12 +576,14 @@ TF_MASKED_LM_SAMPLE = r"""
>>> import tensorflow as tf
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
>>> model = {model_class}.from_pretrained('{checkpoint}')
>>> model = {model_class}.from_pretrained('{checkpoint}'
, return_dict=True)
)
>>> input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True))[None, :] # Batch size 1
>>> inputs = tokenizer("The capital of France is {mask}.", return_tensors="tf")
>>> inputs["labels"] = tokenizer("The capital of France is Paris.", return_tensors="tf")["input_ids"]
>>> outputs = model(input_ids)
>>> prediction_scores = outputs[0]
>>> outputs = model(inputs)
>>> loss = outputs.loss
>>> logits = outputs.logits
"""
TF_BASE_MODEL_SAMPLE
=
r
"""
...
...
@@ -586,12 +593,12 @@ TF_BASE_MODEL_SAMPLE = r"""
>>> import tensorflow as tf
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
>>> model = {model_class}.from_pretrained('{checkpoint}')
>>> model = {model_class}.from_pretrained('{checkpoint}'
, return_dict=True)
)
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="tf")
>>> outputs = model(inputs)
>>> last_hidden_states = outputs
[0] # The
last
hidden
-
state
is the first element of the output tuple
>>> last_hidden_states = outputs
.
last
_
hidden
_
state
s
"""
TF_MULTIPLE_CHOICE_SAMPLE
=
r
"""
...
...
@@ -601,7 +608,7 @@ TF_MULTIPLE_CHOICE_SAMPLE = r"""
>>> import tensorflow as tf
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
>>> model = {model_class}.from_pretrained('{checkpoint}')
>>> model = {model_class}.from_pretrained('{checkpoint}'
, return_dict=True)
)
>>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
>>> choice0 = "It is eaten with a fork and a knife."
...
...
@@ -612,7 +619,7 @@ TF_MULTIPLE_CHOICE_SAMPLE = r"""
>>> outputs = model(inputs) # batch size is 1
>>> # the linear classifier still needs to be trained
>>> logits = outputs
[0]
>>> logits = outputs
.logits
"""
TF_CAUSAL_LM_SAMPLE
=
r
"""
...
...
@@ -622,18 +629,21 @@ TF_CAUSAL_LM_SAMPLE = r"""
>>> import tensorflow as tf
>>> tokenizer = {tokenizer_class}.from_pretrained('{checkpoint}')
>>> model = {model_class}.from_pretrained('{checkpoint}')
>>> model = {model_class}.from_pretrained('{checkpoint}'
, return_dict=True)
)
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="tf")
>>> outputs = model(inputs)
>>> logits = outputs
[0]
>>> logits = outputs
.logits
"""
def
add_code_sample_docstrings
(
*
docstr
,
tokenizer_class
=
None
,
checkpoint
=
None
,
output_type
=
None
,
config_class
=
None
):
def
add_code_sample_docstrings
(
*
docstr
,
tokenizer_class
=
None
,
checkpoint
=
None
,
output_type
=
None
,
config_class
=
None
,
mask
=
None
):
def
docstring_decorator
(
fn
):
model_class
=
fn
.
__qualname__
.
split
(
"."
)[
0
]
is_tf_class
=
model_class
[:
2
]
==
"TF"
doc_kwargs
=
dict
(
model_class
=
model_class
,
tokenizer_class
=
tokenizer_class
,
checkpoint
=
checkpoint
)
if
"SequenceClassification"
in
model_class
:
code_sample
=
TF_SEQUENCE_CLASSIFICATION_SAMPLE
if
is_tf_class
else
PT_SEQUENCE_CLASSIFICATION_SAMPLE
...
...
@@ -643,7 +653,8 @@ def add_code_sample_docstrings(*docstr, tokenizer_class=None, checkpoint=None, o
code_sample
=
TF_TOKEN_CLASSIFICATION_SAMPLE
if
is_tf_class
else
PT_TOKEN_CLASSIFICATION_SAMPLE
elif
"MultipleChoice"
in
model_class
:
code_sample
=
TF_MULTIPLE_CHOICE_SAMPLE
if
is_tf_class
else
PT_MULTIPLE_CHOICE_SAMPLE
elif
"MaskedLM"
in
model_class
:
elif
"MaskedLM"
in
model_class
or
model_class
in
[
"FlaubertWithLMHeadModel"
,
"XLMWithLMHeadModel"
]:
doc_kwargs
[
"mask"
]
=
"[MASK]"
if
mask
is
None
else
mask
code_sample
=
TF_MASKED_LM_SAMPLE
if
is_tf_class
else
PT_MASKED_LM_SAMPLE
elif
"LMHead"
in
model_class
:
code_sample
=
TF_CAUSAL_LM_SAMPLE
if
is_tf_class
else
PT_CAUSAL_LM_SAMPLE
...
...
@@ -653,7 +664,7 @@ def add_code_sample_docstrings(*docstr, tokenizer_class=None, checkpoint=None, o
raise
ValueError
(
f
"Docstring can't be built for model
{
model_class
}
"
)
output_doc
=
_prepare_output_docstrings
(
output_type
,
config_class
)
if
output_type
is
not
None
else
""
built_doc
=
code_sample
.
format
(
model_class
=
model_class
,
tokenizer_class
=
tokenizer_class
,
checkpoint
=
checkpoint
)
built_doc
=
code_sample
.
format
(
**
doc_kwargs
)
fn
.
__doc__
=
(
fn
.
__doc__
or
""
)
+
""
.
join
(
docstr
)
+
output_doc
+
built_doc
return
fn
...
...
src/transformers/modeling_funnel.py
View file @
03835af7
...
...
@@ -1169,6 +1169,7 @@ class FunnelForMaskedLM(FunnelPreTrainedModel):
checkpoint
=
"funnel-transformer/small"
,
output_type
=
MaskedLMOutput
,
config_class
=
_CONFIG_FOR_DOC
,
mask
=
"<mask>"
,
)
def
forward
(
self
,
...
...
src/transformers/modeling_roberta.py
View file @
03835af7
...
...
@@ -847,6 +847,7 @@ class RobertaForMaskedLM(RobertaPreTrainedModel):
checkpoint
=
"roberta-base"
,
output_type
=
MaskedLMOutput
,
config_class
=
_CONFIG_FOR_DOC
,
mask
=
"<mask>"
,
)
def
forward
(
self
,
...
...
src/transformers/modeling_xlm.py
View file @
03835af7
...
...
@@ -706,6 +706,7 @@ class XLMWithLMHeadModel(XLMPreTrainedModel):
checkpoint
=
"xlm-mlm-en-2048"
,
output_type
=
MaskedLMOutput
,
config_class
=
_CONFIG_FOR_DOC
,
mask
=
"<special1>"
,
)
def
forward
(
self
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment