Unverified Commit 04a17f85 authored by Sylvain Gugger's avatar Sylvain Gugger Committed by GitHub
Browse files

Doc fixes in preparation for the docstyle PR (#8061)

* Fixes in preparation for doc styling

* More fixes

* Better syntax

* Fixes

* Style

* More fixes

* More fixes
parent 8bbb74f2
...@@ -625,7 +625,7 @@ class TFOpenAIGPTDoubleHeadsModel(TFOpenAIGPTPreTrainedModel): ...@@ -625,7 +625,7 @@ class TFOpenAIGPTDoubleHeadsModel(TFOpenAIGPTPreTrainedModel):
training=False, training=False,
): ):
r""" r"""
mc_token_ids (:obj:`tf.Tensor` or :obj:`Numpy array` of shape :obj:`(batch_size, num_choices)`, `optional`, default to index of the last token of the input) mc_token_ids (:obj:`tf.Tensor` or :obj:`Numpy array` of shape :obj:`(batch_size, num_choices)`, `optional`, default to index of the last token of the input):
Index of the classification token in each input sequence. Index of the classification token in each input sequence.
Selected in the range ``[0, input_ids.size(-1) - 1]``. Selected in the range ``[0, input_ids.size(-1) - 1]``.
......
...@@ -111,8 +111,11 @@ class TFRobertaEmbeddings(tf.keras.layers.Layer): ...@@ -111,8 +111,11 @@ class TFRobertaEmbeddings(tf.keras.layers.Layer):
"""Replace non-padding symbols with their position numbers. Position numbers begin at """Replace non-padding symbols with their position numbers. Position numbers begin at
padding_idx+1. Padding symbols are ignored. This is modified from fairseq's padding_idx+1. Padding symbols are ignored. This is modified from fairseq's
`utils.make_positions`. `utils.make_positions`.
:param tf.Tensor x:
:return tf.Tensor: Args:
x: tf.Tensor
Returns: tf.Tensor
""" """
mask = tf.cast(tf.math.not_equal(x, self.padding_idx), dtype=tf.int32) mask = tf.cast(tf.math.not_equal(x, self.padding_idx), dtype=tf.int32)
incremental_indicies = tf.math.cumsum(mask, axis=1) * mask incremental_indicies = tf.math.cumsum(mask, axis=1) * mask
...@@ -122,8 +125,11 @@ class TFRobertaEmbeddings(tf.keras.layers.Layer): ...@@ -122,8 +125,11 @@ class TFRobertaEmbeddings(tf.keras.layers.Layer):
def create_position_ids_from_inputs_embeds(self, inputs_embeds): def create_position_ids_from_inputs_embeds(self, inputs_embeds):
"""We are provided embeddings directly. We cannot infer which are padded so just generate """We are provided embeddings directly. We cannot infer which are padded so just generate
sequential position ids. sequential position ids.
:param tf.Tensor inputs_embeds:
:return tf.Tensor: Args:
inputs_embeds: tf.Tensor
Returns: tf.Tensor
""" """
seq_length = shape_list(inputs_embeds)[1] seq_length = shape_list(inputs_embeds)[1]
position_ids = tf.range(self.padding_idx + 1, seq_length + self.padding_idx + 1, dtype=tf.int32)[tf.newaxis, :] position_ids = tf.range(self.padding_idx + 1, seq_length + self.padding_idx + 1, dtype=tf.int32)[tf.newaxis, :]
......
...@@ -1718,120 +1718,3 @@ class TFXLNetForQuestionAnsweringSimple(TFXLNetPreTrainedModel, TFQuestionAnswer ...@@ -1718,120 +1718,3 @@ class TFXLNetForQuestionAnsweringSimple(TFXLNetPreTrainedModel, TFQuestionAnswer
hidden_states=transformer_outputs.hidden_states, hidden_states=transformer_outputs.hidden_states,
attentions=transformer_outputs.attentions, attentions=transformer_outputs.attentions,
) )
# @add_start_docstrings("""XLNet Model with a span classification head on top for extractive question-answering tasks like SQuAD (a linear layers on top of
# the hidden-states output to compute `span start logits` and `span end logits`). """,
# XLNET_START_DOCSTRING, XLNET_INPUTS_DOCSTRING)
# class TFXLNetForQuestionAnswering(TFXLNetPreTrainedModel):
# r"""
# Outputs: `Tuple` comprising various elements depending on the configuration (config) and inputs:
# **start_top_log_probs**: (`optional`, returned if ``start_positions`` or ``end_positions`` is not provided)
# ``tf.Tensor`` of shape ``(batch_size, config.start_n_top)``
# Log probabilities for the top config.start_n_top start token possibilities (beam-search).
# **start_top_index**: (`optional`, returned if ``start_positions`` or ``end_positions`` is not provided)
# ``tf.Tensor`` of shape ``(batch_size, config.start_n_top)``
# Indices for the top config.start_n_top start token possibilities (beam-search).
# **end_top_log_probs**: (`optional`, returned if ``start_positions`` or ``end_positions`` is not provided)
# ``tf.Tensor`` of shape ``(batch_size, config.start_n_top * config.end_n_top)``
# Log probabilities for the top ``config.start_n_top * config.end_n_top`` end token possibilities (beam-search).
# **end_top_index**: (`optional`, returned if ``start_positions`` or ``end_positions`` is not provided)
# ``tf.Tensor`` of shape ``(batch_size, config.start_n_top * config.end_n_top)``
# Indices for the top ``config.start_n_top * config.end_n_top`` end token possibilities (beam-search).
# **cls_logits**: (`optional`, returned if ``start_positions`` or ``end_positions`` is not provided)
# ``tf.Tensor`` of shape ``(batch_size,)``
# Log probabilities for the ``is_impossible`` label of the answers.
# **mems**:
# list of ``tf.Tensor`` (one for each layer):
# that contains pre-computed hidden-states (key and values in the attention blocks) as computed by the model
# if config.mem_len > 0 else tuple of None. Can be used to speed up sequential decoding and attend to longer context.
# See details in the docstring of the `mems` input above.
# **hidden_states**: (`optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``)
# list of ``tf.Tensor`` (one for the output of each layer + the output of the embeddings)
# of shape ``(batch_size, sequence_length, hidden_size)``:
# Hidden-states of the model at the output of each layer plus the initial embedding outputs.
# **attentions**: (`optional`, returned when ``output_attentions=True``)
# list of ``tf.Tensor`` (one for each layer) of shape ``(batch_size, num_heads, sequence_length, sequence_length)``:
# Attentions weights after the attention softmax, used to compute the weighted average in the self-attention heads.
# Examples::
# # For example purposes. Not runnable.
# tokenizer = XLMTokenizer.from_pretrained('xlm-mlm-en-2048')
# model = XLMForQuestionAnswering.from_pretrained('xlnet-large-cased')
# input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute", add_special_tokens=True))[None, :] # Batch size 1
# start_positions = tf.constant([1])
# end_positions = tf.constant([3])
# outputs = model(input_ids, start_positions=start_positions, end_positions=end_positions)
# loss, start_scores, end_scores = outputs[:2]
# """
# def __init__(self, config, *inputs, **kwargs):
# super().__init__(config, *inputs, **kwargs)
# self.start_n_top = config.start_n_top
# self.end_n_top = config.end_n_top
# self.transformer = TFXLNetMainLayer(config, name='transformer')
# self.start_logits = TFPoolerStartLogits(config, name='start_logits')
# self.end_logits = TFPoolerEndLogits(config, name='end_logits')
# self.answer_class = TFPoolerAnswerClass(config, name='answer_class')
# def call(self, inputs, training=False):
# transformer_outputs = self.transformer(inputs, training=training)
# hidden_states = transformer_outputs[0]
# start_logits = self.start_logits(hidden_states, p_mask=p_mask)
# outputs = transformer_outputs[1:] # Keep mems, hidden states, attentions if there are in it
# if start_positions is not None and end_positions is not None:
# # If we are on multi-GPU, let's remove the dimension added by batch splitting
# for x in (start_positions, end_positions, cls_index, is_impossible):
# if x is not None and x.dim() > 1:
# x.squeeze_(-1)
# # during training, compute the end logits based on the ground truth of the start position
# end_logits = self.end_logits(hidden_states, start_positions=start_positions, p_mask=p_mask)
# loss_fct = CrossEntropyLoss()
# start_loss = loss_fct(start_logits, start_positions)
# end_loss = loss_fct(end_logits, end_positions)
# total_loss = (start_loss + end_loss) / 2
# if cls_index is not None and is_impossible is not None:
# # Predict answerability from the representation of CLS and START
# cls_logits = self.answer_class(hidden_states, start_positions=start_positions, cls_index=cls_index)
# loss_fct_cls = nn.BCEWithLogitsLoss()
# cls_loss = loss_fct_cls(cls_logits, is_impossible)
# # note(zhiliny): by default multiply the loss by 0.5 so that the scale is comparable to start_loss and end_loss
# total_loss += cls_loss * 0.5
# outputs = (total_loss,) + outputs
# else:
# # during inference, compute the end logits based on beam search
# bsz, slen, hsz = hidden_states.size()
# start_log_probs = F.softmax(start_logits, dim=-1) # shape (bsz, slen)
# start_top_log_probs, start_top_index = torch.topk(start_log_probs, self.start_n_top, dim=-1) # shape (bsz, start_n_top)
# start_top_index_exp = start_top_index.unsqueeze(-1).expand(-1, -1, hsz) # shape (bsz, start_n_top, hsz)
# start_states = torch.gather(hidden_states, -2, start_top_index_exp) # shape (bsz, start_n_top, hsz)
# start_states = start_states.unsqueeze(1).expand(-1, slen, -1, -1) # shape (bsz, slen, start_n_top, hsz)
# hidden_states_expanded = hidden_states.unsqueeze(2).expand_as(start_states) # shape (bsz, slen, start_n_top, hsz)
# p_mask = p_mask.unsqueeze(-1) if p_mask is not None else None
# end_logits = self.end_logits(hidden_states_expanded, start_states=start_states, p_mask=p_mask)
# end_log_probs = F.softmax(end_logits, dim=1) # shape (bsz, slen, start_n_top)
# end_top_log_probs, end_top_index = torch.topk(end_log_probs, self.end_n_top, dim=1) # shape (bsz, end_n_top, start_n_top)
# end_top_log_probs = end_top_log_probs.view(-1, self.start_n_top * self.end_n_top)
# end_top_index = end_top_index.view(-1, self.start_n_top * self.end_n_top)
# start_states = torch.einsum("blh,bl->bh", hidden_states, start_log_probs) # get the representation of START as weighted sum of hidden states
# cls_logits = self.answer_class(hidden_states, start_states=start_states, cls_index=cls_index) # Shape (batch size,): one single `cls_logits` for each sample
# outputs = (start_top_log_probs, start_top_index, end_top_log_probs, end_top_index, cls_logits) + outputs
# # return start_top_log_probs, start_top_index, end_top_log_probs, end_top_index, cls_logits
# # or (if labels are provided) (total_loss,)
# return outputs
...@@ -1487,7 +1487,7 @@ class XLNetForSequenceClassification(XLNetPreTrainedModel): ...@@ -1487,7 +1487,7 @@ class XLNetForSequenceClassification(XLNetPreTrainedModel):
return_dict=None, return_dict=None,
): ):
r""" r"""
labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`) labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`):
Labels for computing the sequence classification/regression loss. Labels for computing the sequence classification/regression loss.
Indices should be in ``[0, ..., config.num_labels - 1]``. Indices should be in ``[0, ..., config.num_labels - 1]``.
If ``config.num_labels == 1`` a regression loss is computed (Mean-Square loss), If ``config.num_labels == 1`` a regression loss is computed (Mean-Square loss),
......
...@@ -352,22 +352,22 @@ class CaptureStd: ...@@ -352,22 +352,22 @@ class CaptureStd:
- out - capture stdout: True/False, default True - out - capture stdout: True/False, default True
- err - capture stdout: True/False, default True - err - capture stdout: True/False, default True
Examples: Examples::
with CaptureStdout() as cs: with CaptureStdout() as cs:
print("Secret message") print("Secret message")
print(f"captured: {cs.out}") print(f"captured: {cs.out}")
import sys import sys
with CaptureStderr() as cs: with CaptureStderr() as cs:
print("Warning: ", file=sys.stderr) print("Warning: ", file=sys.stderr)
print(f"captured: {cs.err}") print(f"captured: {cs.err}")
# to capture just one of the streams, but not the other # to capture just one of the streams, but not the other
with CaptureStd(err=False) as cs: with CaptureStd(err=False) as cs:
print("Secret message") print("Secret message")
print(f"captured: {cs.out}") print(f"captured: {cs.out}")
# but best use the stream-specific subclasses # but best use the stream-specific subclasses
""" """
...@@ -444,17 +444,17 @@ class CaptureLogger: ...@@ -444,17 +444,17 @@ class CaptureLogger:
Results: Results:
The captured output is available via `self.out` The captured output is available via `self.out`
Example: Example::
>>> from transformers import logging >>> from transformers import logging
>>> from transformers.testing_utils import CaptureLogger >>> from transformers.testing_utils import CaptureLogger
>>> msg = "Testing 1, 2, 3" >>> msg = "Testing 1, 2, 3"
>>> logging.set_verbosity_info() >>> logging.set_verbosity_info()
>>> logger = logging.get_logger("transformers.tokenization_bart") >>> logger = logging.get_logger("transformers.tokenization_bart")
>>> with CaptureLogger(logger) as cl: >>> with CaptureLogger(logger) as cl:
... logger.info(msg) ... logger.info(msg)
>>> assert cl.out, msg+"\n" >>> assert cl.out, msg+"\n"
""" """
def __init__(self, logger): def __init__(self, logger):
...@@ -485,24 +485,36 @@ class TestCasePlus(unittest.TestCase): ...@@ -485,24 +485,36 @@ class TestCasePlus(unittest.TestCase):
of test, unless `after=False`. of test, unless `after=False`.
# 1. create a unique temp dir, `tmp_dir` will contain the path to the created temp dir # 1. create a unique temp dir, `tmp_dir` will contain the path to the created temp dir
def test_whatever(self):
tmp_dir = self.get_auto_remove_tmp_dir() ::
def test_whatever(self):
tmp_dir = self.get_auto_remove_tmp_dir()
# 2. create a temp dir of my choice and delete it at the end - useful for debug when you want to # 2. create a temp dir of my choice and delete it at the end - useful for debug when you want to
# monitor a specific directory # monitor a specific directory
def test_whatever(self):
tmp_dir = self.get_auto_remove_tmp_dir(tmp_dir="./tmp/run/test") ::
def test_whatever(self):
tmp_dir = self.get_auto_remove_tmp_dir(tmp_dir="./tmp/run/test")
# 3. create a temp dir of my choice and do not delete it at the end - useful for when you want # 3. create a temp dir of my choice and do not delete it at the end - useful for when you want
# to look at the temp results # to look at the temp results
def test_whatever(self):
tmp_dir = self.get_auto_remove_tmp_dir(tmp_dir="./tmp/run/test", after=False) ::
def test_whatever(self):
tmp_dir = self.get_auto_remove_tmp_dir(tmp_dir="./tmp/run/test", after=False)
# 4. create a temp dir of my choice and ensure to delete it right away - useful for when you # 4. create a temp dir of my choice and ensure to delete it right away - useful for when you
# disabled deletion in the previous test run and want to make sure the that tmp dir is empty # disabled deletion in the previous test run and want to make sure the that tmp dir is empty
# before the new test is run # before the new test is run
def test_whatever(self):
tmp_dir = self.get_auto_remove_tmp_dir(tmp_dir="./tmp/run/test", before=True) ::
def test_whatever(self):
tmp_dir = self.get_auto_remove_tmp_dir(tmp_dir="./tmp/run/test", before=True)
Note 1: In order to run the equivalent of `rm -r` safely, only subdirs of the Note 1: In order to run the equivalent of `rm -r` safely, only subdirs of the
project repository checkout are allowed if an explicit `tmp_dir` is used, so project repository checkout are allowed if an explicit `tmp_dir` is used, so
......
...@@ -488,6 +488,7 @@ domains and tasks. The basic logic is this: ...@@ -488,6 +488,7 @@ domains and tasks. The basic logic is this:
# This particular element is used in a couple ways, so we define it # This particular element is used in a couple ways, so we define it
# with a name: # with a name:
# docstyle-ignore
EMOTICONS = r""" EMOTICONS = r"""
(?: (?:
[<>]? [<>]?
...@@ -505,7 +506,7 @@ EMOTICONS = r""" ...@@ -505,7 +506,7 @@ EMOTICONS = r"""
# URL pattern due to John Gruber, modified by Tom Winzig. See # URL pattern due to John Gruber, modified by Tom Winzig. See
# https://gist.github.com/winzig/8894715 # https://gist.github.com/winzig/8894715
# docstyle-ignore
URLS = r""" # Capture 1: entire matched URL URLS = r""" # Capture 1: entire matched URL
(?: (?:
https?: # URL protocol and colon https?: # URL protocol and colon
...@@ -549,6 +550,7 @@ URLS = r""" # Capture 1: entire matched URL ...@@ -549,6 +550,7 @@ URLS = r""" # Capture 1: entire matched URL
) )
""" """
# docstyle-ignore
# The components of the tokenizer: # The components of the tokenizer:
REGEXPS = ( REGEXPS = (
URLS, URLS,
...@@ -628,18 +630,16 @@ def _replace_html_entities(text, keep=(), remove_illegal=True, encoding="utf-8") ...@@ -628,18 +630,16 @@ def _replace_html_entities(text, keep=(), remove_illegal=True, encoding="utf-8")
Remove entities from text by converting them to their Remove entities from text by converting them to their
corresponding unicode character. corresponding unicode character.
:param text: a unicode string or a byte string encoded in the given Args:
`encoding` (which defaults to 'utf-8'). text:
A unicode string or a byte string encoded in the given `encoding` (which defaults to 'utf-8').
:param list keep: list of entity names which should not be replaced.\ keep (list):
This supports both numeric entities (``&#nnnn;`` and ``&#hhhh;``) List of entity names which should not be replaced. This supports both numeric entities (``&#nnnn;`` and ``&#hhhh;``)
and named entities (such as ``&nbsp;`` or ``&gt;``). and named entities (such as ``&nbsp;`` or ``&gt;``).
remove_illegal (bool):
:param bool remove_illegal: If `True`, entities that can't be converted are\ If `True`, entities that can't be converted are removed. Otherwise, entities that can't be converted are kept "as is".
removed. Otherwise, entities that can't be converted are kept "as
is".
:returns: A unicode string with the entities removed. Returns: A unicode string with the entities removed.
See https://github.com/scrapy/w3lib/blob/master/w3lib/html.py See https://github.com/scrapy/w3lib/blob/master/w3lib/html.py
...@@ -688,16 +688,16 @@ def _replace_html_entities(text, keep=(), remove_illegal=True, encoding="utf-8") ...@@ -688,16 +688,16 @@ def _replace_html_entities(text, keep=(), remove_illegal=True, encoding="utf-8")
class TweetTokenizer: class TweetTokenizer:
r""" r"""
Tokenizer for tweets. Examples::
>>> # Tokenizer for tweets.
>>> from nltk.tokenize import TweetTokenizer >>> from nltk.tokenize import TweetTokenizer
>>> tknzr = TweetTokenizer() >>> tknzr = TweetTokenizer()
>>> s0 = "This is a cooool #dummysmiley: :-) :-P <3 and some arrows < > -> <--" >>> s0 = "This is a cooool #dummysmiley: :-) :-P <3 and some arrows < > -> <--"
>>> tknzr.tokenize(s0) >>> tknzr.tokenize(s0)
['This', 'is', 'a', 'cooool', '#dummysmiley', ':', ':-)', ':-P', '<3', 'and', 'some', 'arrows', '<', '>', '->', '<--'] ['This', 'is', 'a', 'cooool', '#dummysmiley', ':', ':-)', ':-P', '<3', 'and', 'some', 'arrows', '<', '>', '->', '<--']
Examples using `strip_handles` and `reduce_len parameters`: >>> # Examples using `strip_handles` and `reduce_len parameters`:
>>> tknzr = TweetTokenizer(strip_handles=True, reduce_len=True) >>> tknzr = TweetTokenizer(strip_handles=True, reduce_len=True)
>>> s1 = '@remy: This is waaaaayyyy too much for you!!!!!!' >>> s1 = '@remy: This is waaaaayyyy too much for you!!!!!!'
>>> tknzr.tokenize(s1) >>> tknzr.tokenize(s1)
...@@ -711,10 +711,11 @@ class TweetTokenizer: ...@@ -711,10 +711,11 @@ class TweetTokenizer:
def tokenize(self, text): def tokenize(self, text):
""" """
:param text: str Args:
:rtype: list(str) text: str
:return: a tokenized list of strings; concatenating this list returns\
the original string if `preserve_case=False` Returns: list(str)
A tokenized list of strings; concatenating this list returns the original string if `preserve_case=False`
""" """
# Fix HTML character entities: # Fix HTML character entities:
text = _replace_html_entities(text) text = _replace_html_entities(text)
......
...@@ -628,13 +628,16 @@ class DebertaTokenizer(PreTrainedTokenizer): ...@@ -628,13 +628,16 @@ class DebertaTokenizer(PreTrainedTokenizer):
def create_token_type_ids_from_sequences(self, token_ids_0, token_ids_1=None): def create_token_type_ids_from_sequences(self, token_ids_0, token_ids_1=None):
""" """
Creates a mask from the two sequences passed to be used in a sequence-pair classification task. Create a mask from the two sequences passed to be used in a sequence-pair classification task.
A BERT sequence pair mask has the following format: A DeBERTa sequence pair mask has the following format:
0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1
| first sequence | second sequence ::
0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
| first sequence | second sequence |
If :obj:`token_ids_1` is :obj:`None`, this method only returns the first portion of the mask (0s).
if token_ids_1 is None, only returns the first portion of the mask (0's).
~
Args: Args:
token_ids_0 (:obj:`List[int]`): token_ids_0 (:obj:`List[int]`):
List of IDs. List of IDs.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment