Unverified Commit 651408a0 authored by Arthur's avatar Arthur Committed by GitHub
Browse files

[`Styling`] stylify using ruff (#27144)



* try to stylify using ruff

* might need to remove these changes?

* use ruf format andruff check

* use isinstance instead of type comparision

* use # fmt: skip

* use # fmt: skip

* nits

* soem styling changes

* update ci job

* nits isinstance

* more files update

* nits

* more nits

* small nits

* check and format

* revert wrong changes

* actually use formatter instead of checker

* nits

* well docbuilder is overwriting this commit

* revert notebook changes

* try to nuke docbuilder

* style

* fix feature exrtaction test

* remve `indent-width = 4`

* fixup

* more nits

* update the ruff version that we use

* style

* nuke docbuilder styling

* leve the print for detected changes

* nits

* Remove file I/O
Co-authored-by: default avatarcharliermarsh <charlie.r.marsh@gmail.com>

* style

* nits

* revert notebook changes

* Add # fmt skip when possible

* Add # fmt skip when possible

* Fix

* More `  # fmt: skip` usage

* More `  # fmt: skip` usage

* More `  # fmt: skip` usage

* NIts

* more fixes

* fix tapas

* Another way to skip

* Recommended way

* Fix two more fiels

* Remove asynch
Remove asynch

---------
Co-authored-by: default avatarcharliermarsh <charlie.r.marsh@gmail.com>
parent acb5b4af
...@@ -114,6 +114,7 @@ class LayoutLMv2Config(PretrainedConfig): ...@@ -114,6 +114,7 @@ class LayoutLMv2Config(PretrainedConfig):
>>> # Accessing the model configuration >>> # Accessing the model configuration
>>> configuration = model.config >>> configuration = model.config
```""" ```"""
model_type = "layoutlmv2" model_type = "layoutlmv2"
def __init__( def __init__(
......
...@@ -43,6 +43,7 @@ class LayoutLMv2Processor(ProcessorMixin): ...@@ -43,6 +43,7 @@ class LayoutLMv2Processor(ProcessorMixin):
tokenizer (`LayoutLMv2Tokenizer` or `LayoutLMv2TokenizerFast`, *optional*): tokenizer (`LayoutLMv2Tokenizer` or `LayoutLMv2TokenizerFast`, *optional*):
An instance of [`LayoutLMv2Tokenizer`] or [`LayoutLMv2TokenizerFast`]. The tokenizer is a required input. An instance of [`LayoutLMv2Tokenizer`] or [`LayoutLMv2TokenizerFast`]. The tokenizer is a required input.
""" """
attributes = ["image_processor", "tokenizer"] attributes = ["image_processor", "tokenizer"]
image_processor_class = "LayoutLMv2ImageProcessor" image_processor_class = "LayoutLMv2ImageProcessor"
tokenizer_class = ("LayoutLMv2Tokenizer", "LayoutLMv2TokenizerFast") tokenizer_class = ("LayoutLMv2Tokenizer", "LayoutLMv2TokenizerFast")
......
...@@ -119,6 +119,7 @@ class LayoutLMv3Config(PretrainedConfig): ...@@ -119,6 +119,7 @@ class LayoutLMv3Config(PretrainedConfig):
>>> # Accessing the model configuration >>> # Accessing the model configuration
>>> configuration = model.config >>> configuration = model.config
```""" ```"""
model_type = "layoutlmv3" model_type = "layoutlmv3"
def __init__( def __init__(
......
...@@ -904,8 +904,9 @@ class LayoutLMv3Model(LayoutLMv3PreTrainedModel): ...@@ -904,8 +904,9 @@ class LayoutLMv3Model(LayoutLMv3PreTrainedModel):
final_bbox = final_position_ids = None final_bbox = final_position_ids = None
patch_height = patch_width = None patch_height = patch_width = None
if pixel_values is not None: if pixel_values is not None:
patch_height, patch_width = int(pixel_values.shape[2] / self.config.patch_size), int( patch_height, patch_width = (
pixel_values.shape[3] / self.config.patch_size int(pixel_values.shape[2] / self.config.patch_size),
int(pixel_values.shape[3] / self.config.patch_size),
) )
visual_embeddings = self.forward_image(pixel_values) visual_embeddings = self.forward_image(pixel_values)
visual_attention_mask = torch.ones( visual_attention_mask = torch.ones(
......
...@@ -43,6 +43,7 @@ class LayoutLMv3Processor(ProcessorMixin): ...@@ -43,6 +43,7 @@ class LayoutLMv3Processor(ProcessorMixin):
tokenizer (`LayoutLMv3Tokenizer` or `LayoutLMv3TokenizerFast`, *optional*): tokenizer (`LayoutLMv3Tokenizer` or `LayoutLMv3TokenizerFast`, *optional*):
An instance of [`LayoutLMv3Tokenizer`] or [`LayoutLMv3TokenizerFast`]. The tokenizer is a required input. An instance of [`LayoutLMv3Tokenizer`] or [`LayoutLMv3TokenizerFast`]. The tokenizer is a required input.
""" """
attributes = ["image_processor", "tokenizer"] attributes = ["image_processor", "tokenizer"]
image_processor_class = "LayoutLMv3ImageProcessor" image_processor_class = "LayoutLMv3ImageProcessor"
tokenizer_class = ("LayoutLMv3Tokenizer", "LayoutLMv3TokenizerFast") tokenizer_class = ("LayoutLMv3Tokenizer", "LayoutLMv3TokenizerFast")
......
...@@ -268,6 +268,7 @@ class LayoutLMv3Tokenizer(PreTrainedTokenizer): ...@@ -268,6 +268,7 @@ class LayoutLMv3Tokenizer(PreTrainedTokenizer):
only_label_first_subword (`bool`, *optional*, defaults to `True`): only_label_first_subword (`bool`, *optional*, defaults to `True`):
Whether or not to only label the first subword, in case word labels are provided. Whether or not to only label the first subword, in case word labels are provided.
""" """
vocab_files_names = VOCAB_FILES_NAMES vocab_files_names = VOCAB_FILES_NAMES
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
......
...@@ -97,6 +97,7 @@ class LEDConfig(PretrainedConfig): ...@@ -97,6 +97,7 @@ class LEDConfig(PretrainedConfig):
>>> # Accessing the model configuration >>> # Accessing the model configuration
>>> configuration = model.config >>> configuration = model.config
```""" ```"""
model_type = "led" model_type = "led"
attribute_map = { attribute_map = {
"num_attention_heads": "encoder_attention_heads", "num_attention_heads": "encoder_attention_heads",
......
...@@ -87,6 +87,7 @@ class LevitConfig(PretrainedConfig): ...@@ -87,6 +87,7 @@ class LevitConfig(PretrainedConfig):
>>> # Accessing the model configuration >>> # Accessing the model configuration
>>> configuration = model.config >>> configuration = model.config
```""" ```"""
model_type = "levit" model_type = "levit"
def __init__( def __init__(
......
...@@ -90,6 +90,7 @@ class LiltConfig(PretrainedConfig): ...@@ -90,6 +90,7 @@ class LiltConfig(PretrainedConfig):
>>> # Accessing the model configuration >>> # Accessing the model configuration
>>> configuration = model.config >>> configuration = model.config
```""" ```"""
model_type = "lilt" model_type = "lilt"
def __init__( def __init__(
......
...@@ -110,6 +110,7 @@ class LlamaConfig(PretrainedConfig): ...@@ -110,6 +110,7 @@ class LlamaConfig(PretrainedConfig):
>>> # Accessing the model configuration >>> # Accessing the model configuration
>>> configuration = model.config >>> configuration = model.config
```""" ```"""
model_type = "llama" model_type = "llama"
keys_to_ignore_at_inference = ["past_key_values"] keys_to_ignore_at_inference = ["past_key_values"]
......
...@@ -104,6 +104,7 @@ class LongformerConfig(PretrainedConfig): ...@@ -104,6 +104,7 @@ class LongformerConfig(PretrainedConfig):
>>> # Accessing the model configuration >>> # Accessing the model configuration
>>> configuration = model.config >>> configuration = model.config
```""" ```"""
model_type = "longformer" model_type = "longformer"
def __init__( def __init__(
......
...@@ -416,8 +416,8 @@ class LongformerTokenizer(PreTrainedTokenizer): ...@@ -416,8 +416,8 @@ class LongformerTokenizer(PreTrainedTokenizer):
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
) -> List[int]: ) -> List[int]:
""" """
Create a mask from the two sequences passed to be used in a sequence-pair classification task. Longformer does Create a mask from the two sequences passed to be used in a sequence-pair classification task. Longformer does not
not make use of token type ids, therefore a list of zeros is returned. make use of token type ids, therefore a list of zeros is returned.
Args: Args:
token_ids_0 (`List[int]`): token_ids_0 (`List[int]`):
......
...@@ -254,8 +254,8 @@ class LongformerTokenizerFast(PreTrainedTokenizerFast): ...@@ -254,8 +254,8 @@ class LongformerTokenizerFast(PreTrainedTokenizerFast):
`str`: Mask token, to use when training a model with masked-language modeling. Log an error if used while not `str`: Mask token, to use when training a model with masked-language modeling. Log an error if used while not
having been set. having been set.
Longformer tokenizer has a special mask token to be usable in the fill-mask pipeline. The mask token will Longformer tokenizer has a special mask token to be usable in the fill-mask pipeline. The mask token will greedily
greedily comprise the space before the *<mask>*. comprise the space before the *<mask>*.
""" """
if self._mask_token is None: if self._mask_token is None:
if self.verbose: if self.verbose:
...@@ -309,8 +309,8 @@ class LongformerTokenizerFast(PreTrainedTokenizerFast): ...@@ -309,8 +309,8 @@ class LongformerTokenizerFast(PreTrainedTokenizerFast):
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
) -> List[int]: ) -> List[int]:
""" """
Create a mask from the two sequences passed to be used in a sequence-pair classification task. Longformer does Create a mask from the two sequences passed to be used in a sequence-pair classification task. Longformer does not
not make use of token type ids, therefore a list of zeros is returned. make use of token type ids, therefore a list of zeros is returned.
Args: Args:
token_ids_0 (`List[int]`): token_ids_0 (`List[int]`):
......
...@@ -82,6 +82,7 @@ class LongT5Config(PretrainedConfig): ...@@ -82,6 +82,7 @@ class LongT5Config(PretrainedConfig):
use_cache (`bool`, *optional*, defaults to `True`): use_cache (`bool`, *optional*, defaults to `True`):
Whether or not the model should return the last key/values attentions (not used by all models). Whether or not the model should return the last key/values attentions (not used by all models).
""" """
model_type = "longt5" model_type = "longt5"
keys_to_ignore_at_inference = ["past_key_values"] keys_to_ignore_at_inference = ["past_key_values"]
attribute_map = {"hidden_size": "d_model", "num_attention_heads": "num_heads", "num_hidden_layers": "num_layers"} attribute_map = {"hidden_size": "d_model", "num_attention_heads": "num_heads", "num_hidden_layers": "num_layers"}
......
...@@ -97,6 +97,7 @@ class LukeConfig(PretrainedConfig): ...@@ -97,6 +97,7 @@ class LukeConfig(PretrainedConfig):
>>> # Accessing the model configuration >>> # Accessing the model configuration
>>> configuration = model.config >>> configuration = model.config
```""" ```"""
model_type = "luke" model_type = "luke"
def __init__( def __init__(
......
...@@ -254,8 +254,8 @@ class LxmertTokenizer(PreTrainedTokenizer): ...@@ -254,8 +254,8 @@ class LxmertTokenizer(PreTrainedTokenizer):
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
) -> List[int]: ) -> List[int]:
""" """
Create a mask from the two sequences passed to be used in a sequence-pair classification task. A Lxmert Create a mask from the two sequences passed to be used in a sequence-pair classification task. A Lxmert sequence
sequence pair mask has the following format: pair mask has the following format:
``` ```
0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
......
...@@ -161,8 +161,8 @@ class LxmertTokenizerFast(PreTrainedTokenizerFast): ...@@ -161,8 +161,8 @@ class LxmertTokenizerFast(PreTrainedTokenizerFast):
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
) -> List[int]: ) -> List[int]:
""" """
Create a mask from the two sequences passed to be used in a sequence-pair classification task. A Lxmert Create a mask from the two sequences passed to be used in a sequence-pair classification task. A Lxmert sequence
sequence pair mask has the following format: pair mask has the following format:
``` ```
0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
......
...@@ -99,6 +99,7 @@ class M2M100Config(PretrainedConfig): ...@@ -99,6 +99,7 @@ class M2M100Config(PretrainedConfig):
>>> # Accessing the model configuration >>> # Accessing the model configuration
>>> configuration = model.config >>> configuration = model.config
```""" ```"""
model_type = "m2m_100" model_type = "m2m_100"
keys_to_ignore_at_inference = ["past_key_values"] keys_to_ignore_at_inference = ["past_key_values"]
attribute_map = {"num_attention_heads": "encoder_attention_heads", "hidden_size": "d_model"} attribute_map = {"num_attention_heads": "encoder_attention_heads", "hidden_size": "d_model"}
......
...@@ -102,6 +102,7 @@ class MarianConfig(PretrainedConfig): ...@@ -102,6 +102,7 @@ class MarianConfig(PretrainedConfig):
>>> # Accessing the model configuration >>> # Accessing the model configuration
>>> configuration = model.config >>> configuration = model.config
```""" ```"""
model_type = "marian" model_type = "marian"
keys_to_ignore_at_inference = ["past_key_values"] keys_to_ignore_at_inference = ["past_key_values"]
attribute_map = {"num_attention_heads": "encoder_attention_heads", "hidden_size": "d_model"} attribute_map = {"num_attention_heads": "encoder_attention_heads", "hidden_size": "d_model"}
......
...@@ -97,6 +97,7 @@ class MarkupLMConfig(PretrainedConfig): ...@@ -97,6 +97,7 @@ class MarkupLMConfig(PretrainedConfig):
>>> # Accessing the model configuration >>> # Accessing the model configuration
>>> configuration = model.config >>> configuration = model.config
```""" ```"""
model_type = "markuplm" model_type = "markuplm"
def __init__( def __init__(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment