Unverified Commit bcb55d33 authored by Sylvain Gugger's avatar Sylvain Gugger Committed by GitHub
Browse files

Upgrade styler to better handle lists (#9423)

* Add missing lines before a new list.

* Update doc styler and restyle some files.

* Fix docstrings of LED and Longformer
parent b7e54897
...@@ -40,7 +40,6 @@ Tips: ...@@ -40,7 +40,6 @@ Tips:
*Longformer*'s *chunked self-attention* layer. :class:`~transformers.LEDTokenizer` is an alias of *Longformer*'s *chunked self-attention* layer. :class:`~transformers.LEDTokenizer` is an alias of
:class:`~transformers.BartTokenizer`. :class:`~transformers.BartTokenizer`.
- LED works very well on long-range *sequence-to-sequence* tasks where the ``input_ids`` largely exceed a length of - LED works very well on long-range *sequence-to-sequence* tasks where the ``input_ids`` largely exceed a length of
1024 tokens. 1024 tokens.
- LED pads the ``input_ids`` to be a multiple of ``config.attention_window`` if required. Therefore a small speed-up is - LED pads the ``input_ids`` to be a multiple of ``config.attention_window`` if required. Therefore a small speed-up is
gained, when :class:`~transformers.LEDTokenizer` is used with the ``pad_to_multiple_of`` argument. gained, when :class:`~transformers.LEDTokenizer` is used with the ``pad_to_multiple_of`` argument.
......
...@@ -443,16 +443,22 @@ TF_DPR_START_DOCSTRING = r""" ...@@ -443,16 +443,22 @@ TF_DPR_START_DOCSTRING = r"""
.. note:: .. note::
TF 2.0 models accepts two formats as inputs: - having all inputs as keyword arguments (like PyTorch models), or TF 2.0 models accepts two formats as inputs:
- having all inputs as a list, tuple or dict in the first positional arguments. This second option is useful
when using :meth:`tf.keras.Model.fit` method which currently requires having all the tensors in the first - having all inputs as keyword arguments (like PyTorch models), or
argument of the model call function: :obj:`model(inputs)`. If you choose this second option, there are three - having all inputs as a list, tuple or dict in the first positional arguments.
possibilities you can use to gather all the input Tensors in the first positional argument : - a single Tensor
with :obj:`input_ids` only and nothing else: :obj:`model(inputs_ids)` - a list of varying length with one or This second option is useful when using :meth:`tf.keras.Model.fit` method which currently requires having all
several input Tensors IN THE ORDER given in the docstring: :obj:`model([input_ids, attention_mask])` or the tensors in the first argument of the model call function: :obj:`model(inputs)`.
:obj:`model([input_ids, attention_mask, token_type_ids])` - a dictionary with one or several input Tensors
associated to the input names given in the docstring: :obj:`model({"input_ids": input_ids, "token_type_ids": If you choose this second option, there are three possibilities you can use to gather all the input Tensors in
token_type_ids})` the first positional argument :
- a single Tensor with :obj:`input_ids` only and nothing else: :obj:`model(inputs_ids)`
- a list of varying length with one or several input Tensors IN THE ORDER given in the docstring:
:obj:`model([input_ids, attention_mask])` or :obj:`model([input_ids, attention_mask, token_type_ids])`
- a dictionary with one or several input Tensors associated to the input names given in the docstring:
:obj:`model({"input_ids": input_ids, "token_type_ids": token_type_ids})`
Parameters: Parameters:
config (:class:`~transformers.DPRConfig`): Model configuration class with all the parameters of the model. config (:class:`~transformers.DPRConfig`): Model configuration class with all the parameters of the model.
......
...@@ -638,8 +638,8 @@ LAYOUTLM_INPUTS_DOCSTRING = r""" ...@@ -638,8 +638,8 @@ LAYOUTLM_INPUTS_DOCSTRING = r"""
`What are input IDs? <../glossary.html#input-ids>`__ `What are input IDs? <../glossary.html#input-ids>`__
bbox (:obj:`torch.LongTensor` of shape :obj:`({0}, 4)`, `optional`): bbox (:obj:`torch.LongTensor` of shape :obj:`({0}, 4)`, `optional`):
Bounding Boxes of each input sequence tokens. Selected in the range ``[0, config.max_2d_position_embeddings Bounding Boxes of each input sequence tokens. Selected in the range ``[0,
- 1]``. config.max_2d_position_embeddings-1]``.
attention_mask (:obj:`torch.FloatTensor` of shape :obj:`({0})`, `optional`): attention_mask (:obj:`torch.FloatTensor` of shape :obj:`({0})`, `optional`):
Mask to avoid performing attention on padding token indices. Mask values selected in ``[0, 1]``: ``1`` for Mask to avoid performing attention on padding token indices. Mask values selected in ``[0, 1]``: ``1`` for
tokens that are NOT MASKED, ``0`` for MASKED tokens. tokens that are NOT MASKED, ``0`` for MASKED tokens.
......
...@@ -172,11 +172,11 @@ class LEDEncoderSelfAttention(nn.Module): ...@@ -172,11 +172,11 @@ class LEDEncoderSelfAttention(nn.Module):
:class:`LEDEncoderSelfAttention` expects `len(hidden_states)` to be multiple of `attention_window`. Padding to :class:`LEDEncoderSelfAttention` expects `len(hidden_states)` to be multiple of `attention_window`. Padding to
`attention_window` happens in :meth:`LEDEncoderModel.forward` to avoid redoing the padding on each layer. `attention_window` happens in :meth:`LEDEncoderModel.forward` to avoid redoing the padding on each layer.
The `attention_mask` is changed in :meth:`BertModel.forward` from 0, 1, 2 to -ve: no attention The `attention_mask` is changed in :meth:`LEDEncoderModel.forward` from 0, 1, 2 to:
0: local attention
+ve: global attention
* -10000: no attention
* 0: local attention
* +10000: global attention
""" """
hidden_states = hidden_states.transpose(0, 1) hidden_states = hidden_states.transpose(0, 1)
......
...@@ -190,11 +190,11 @@ class TFLEDEncoderSelfAttention(tf.keras.layers.Layer): ...@@ -190,11 +190,11 @@ class TFLEDEncoderSelfAttention(tf.keras.layers.Layer):
LongformerSelfAttention expects `len(hidden_states)` to be multiple of `attention_window`. Padding to LongformerSelfAttention expects `len(hidden_states)` to be multiple of `attention_window`. Padding to
`attention_window` happens in LongformerModel.forward to avoid redoing the padding on each layer. `attention_window` happens in LongformerModel.forward to avoid redoing the padding on each layer.
The `attention_mask` is changed in `BertModel.forward` from 0, 1, 2 to -ve: no attention The `attention_mask` is changed in :meth:`LongformerModel.forward` from 0, 1, 2 to:
0: local attention
+ve: global attention
* -10000: no attention
* 0: local attention
* +10000: global attention
""" """
# retrieve input args # retrieve input args
( (
......
...@@ -561,11 +561,11 @@ class LongformerSelfAttention(nn.Module): ...@@ -561,11 +561,11 @@ class LongformerSelfAttention(nn.Module):
:class:`LongformerSelfAttention` expects `len(hidden_states)` to be multiple of `attention_window`. Padding to :class:`LongformerSelfAttention` expects `len(hidden_states)` to be multiple of `attention_window`. Padding to
`attention_window` happens in :meth:`LongformerModel.forward` to avoid redoing the padding on each layer. `attention_window` happens in :meth:`LongformerModel.forward` to avoid redoing the padding on each layer.
The `attention_mask` is changed in :meth:`BertModel.forward` from 0, 1, 2 to -ve: no attention The `attention_mask` is changed in :meth:`LongformerModel.forward` from 0, 1, 2 to:
0: local attention
+ve: global attention
* -10000: no attention
* 0: local attention
* +10000: global attention
""" """
hidden_states = hidden_states.transpose(0, 1) hidden_states = hidden_states.transpose(0, 1)
......
...@@ -768,11 +768,11 @@ class TFLongformerSelfAttention(tf.keras.layers.Layer): ...@@ -768,11 +768,11 @@ class TFLongformerSelfAttention(tf.keras.layers.Layer):
LongformerSelfAttention expects `len(hidden_states)` to be multiple of `attention_window`. Padding to LongformerSelfAttention expects `len(hidden_states)` to be multiple of `attention_window`. Padding to
`attention_window` happens in LongformerModel.forward to avoid redoing the padding on each layer. `attention_window` happens in LongformerModel.forward to avoid redoing the padding on each layer.
The `attention_mask` is changed in `BertModel.forward` from 0, 1, 2 to -ve: no attention The `attention_mask` is changed in :meth:`LongformerModel.forward` from 0, 1, 2 to:
0: local attention
+ve: global attention
* -10000: no attention
* 0: local attention
* +10000: global attention
""" """
# retrieve input args # retrieve input args
( (
......
...@@ -522,6 +522,7 @@ class CaptureLogger: ...@@ -522,6 +522,7 @@ class CaptureLogger:
Context manager to capture `logging` streams Context manager to capture `logging` streams
Args: Args:
- logger: 'logging` logger object - logger: 'logging` logger object
Results: Results:
...@@ -851,9 +852,10 @@ def pytest_terminal_summary_main(tr, id): ...@@ -851,9 +852,10 @@ def pytest_terminal_summary_main(tr, id):
there. there.
Args: Args:
- tr: `terminalreporter` passed from `conftest.py` - tr: `terminalreporter` passed from `conftest.py`
- id: unique id like `tests` or `examples` that will be incorporated into the final reports - id: unique id like `tests` or `examples` that will be incorporated into the final reports filenames - this is
filenames - this is needed as some jobs have multiple runs of pytest, so we can't have them overwrite each other. needed as some jobs have multiple runs of pytest, so we can't have them overwrite each other.
NB: this functions taps into a private _pytest API and while unlikely, it could break should NB: this functions taps into a private _pytest API and while unlikely, it could break should
pytest do internal changes - also it calls default internal methods of terminalreporter which pytest do internal changes - also it calls default internal methods of terminalreporter which
......
...@@ -191,6 +191,7 @@ def speed_metrics(split, start_time, num_samples=None): ...@@ -191,6 +191,7 @@ def speed_metrics(split, start_time, num_samples=None):
should be run immediately after the operation to be measured has completed. should be run immediately after the operation to be measured has completed.
Args: Args:
- split: name to prefix metric (like train, eval, test...) - split: name to prefix metric (like train, eval, test...)
- start_time: operation start time - start_time: operation start time
- num_samples: number of samples processed - num_samples: number of samples processed
......
...@@ -42,7 +42,7 @@ DOC_SPECIAL_WORD = [ ...@@ -42,7 +42,7 @@ DOC_SPECIAL_WORD = [
# Matches any declaration of textual block, like `.. note::`. (ignore case to avoid writing all versions in the list) # Matches any declaration of textual block, like `.. note::`. (ignore case to avoid writing all versions in the list)
_re_textual_blocks = re.compile(r"^\s*\.\.\s+(" + "|".join(TEXTUAL_BLOCKS) + r")\s*::\s*$", re.IGNORECASE) _re_textual_blocks = re.compile(r"^\s*\.\.\s+(" + "|".join(TEXTUAL_BLOCKS) + r")\s*::\s*$", re.IGNORECASE)
# Matches list introduction in rst. # Matches list introduction in rst.
_re_list = re.compile(r"^(\s*-\s+|\s*\*\s+|\s*\d+.\s+)") _re_list = re.compile(r"^(\s*-\s+|\s*\*\s+|\s*\d+\.\s+)")
# Matches the indent in a line. # Matches the indent in a line.
_re_indent = re.compile(r"^(\s*)\S") _re_indent = re.compile(r"^(\s*)\S")
# Matches a table declaration in rst. # Matches a table declaration in rst.
...@@ -355,10 +355,34 @@ rst_styler = CodeStyler() ...@@ -355,10 +355,34 @@ rst_styler = CodeStyler()
doc_styler = DocstringStyler() doc_styler = DocstringStyler()
def _add_new_lines_before_list(text):
"""Add a new empty line before a list begins."""
lines = text.split("\n")
new_lines = []
in_list = False
for idx, line in enumerate(lines):
# Detect if the line is the start of a new list.
if _re_list.search(line) is not None and not in_list:
current_indent = get_indent(line)
in_list = True
# If the line before is non empty, add an extra new line.
if idx > 0 and len(lines[idx - 1]) != 0:
new_lines.append("")
# Detect if we're out of the current list.
if in_list and not line.startswith(current_indent) and _re_list.search(line) is None:
in_list = False
new_lines.append(line)
return "\n".join(new_lines)
def style_rst_file(doc_file, max_len=119, check_only=False): def style_rst_file(doc_file, max_len=119, check_only=False):
""" Style one rst file `doc_file` to `max_len`.""" """ Style one rst file `doc_file` to `max_len`."""
with open(doc_file, "r", encoding="utf-8", newline="\n") as f: with open(doc_file, "r", encoding="utf-8", newline="\n") as f:
doc = f.read() doc = f.read()
# Add missing new lines before lists
doc = _add_new_lines_before_list(doc)
# Style
clean_doc = rst_styler.style(doc, max_len=max_len) clean_doc = rst_styler.style(doc, max_len=max_len)
diff = clean_doc != doc diff = clean_doc != doc
...@@ -391,6 +415,8 @@ def style_docstring(docstring, max_len=119): ...@@ -391,6 +415,8 @@ def style_docstring(docstring, max_len=119):
# Add missing new lines before Args/Returns etc. # Add missing new lines before Args/Returns etc.
docstring = _re_any_doc_special_word.sub(r"\n\n\1\2\3\n", docstring) docstring = _re_any_doc_special_word.sub(r"\n\n\1\2\3\n", docstring)
# Add missing new lines before lists
docstring = _add_new_lines_before_list(docstring)
# Style # Style
styled_doc = doc_styler.style(docstring, max_len=max_len, min_indent=indent) styled_doc = doc_styler.style(docstring, max_len=max_len, min_indent=indent)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment