Unverified Commit 651408a0 authored by Arthur's avatar Arthur Committed by GitHub
Browse files

[`Styling`] stylify using ruff (#27144)



* try to stylify using ruff

* might need to remove these changes?

* use ruf format andruff check

* use isinstance instead of type comparision

* use # fmt: skip

* use # fmt: skip

* nits

* soem styling changes

* update ci job

* nits isinstance

* more files update

* nits

* more nits

* small nits

* check and format

* revert wrong changes

* actually use formatter instead of checker

* nits

* well docbuilder is overwriting this commit

* revert notebook changes

* try to nuke docbuilder

* style

* fix feature exrtaction test

* remve `indent-width = 4`

* fixup

* more nits

* update the ruff version that we use

* style

* nuke docbuilder styling

* leve the print for detected changes

* nits

* Remove file I/O
Co-authored-by: default avatarcharliermarsh <charlie.r.marsh@gmail.com>

* style

* nits

* revert notebook changes

* Add # fmt skip when possible

* Add # fmt skip when possible

* Fix

* More `  # fmt: skip` usage

* More `  # fmt: skip` usage

* More `  # fmt: skip` usage

* NIts

* more fixes

* fix tapas

* Another way to skip

* Recommended way

* Fix two more fiels

* Remove asynch
Remove asynch

---------
Co-authored-by: default avatarcharliermarsh <charlie.r.marsh@gmail.com>
parent acb5b4af
......@@ -114,6 +114,7 @@ class LayoutLMv2Config(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "layoutlmv2"
def __init__(
......
......@@ -43,6 +43,7 @@ class LayoutLMv2Processor(ProcessorMixin):
tokenizer (`LayoutLMv2Tokenizer` or `LayoutLMv2TokenizerFast`, *optional*):
An instance of [`LayoutLMv2Tokenizer`] or [`LayoutLMv2TokenizerFast`]. The tokenizer is a required input.
"""
attributes = ["image_processor", "tokenizer"]
image_processor_class = "LayoutLMv2ImageProcessor"
tokenizer_class = ("LayoutLMv2Tokenizer", "LayoutLMv2TokenizerFast")
......
......@@ -119,6 +119,7 @@ class LayoutLMv3Config(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "layoutlmv3"
def __init__(
......
......@@ -904,8 +904,9 @@ class LayoutLMv3Model(LayoutLMv3PreTrainedModel):
final_bbox = final_position_ids = None
patch_height = patch_width = None
if pixel_values is not None:
patch_height, patch_width = int(pixel_values.shape[2] / self.config.patch_size), int(
pixel_values.shape[3] / self.config.patch_size
patch_height, patch_width = (
int(pixel_values.shape[2] / self.config.patch_size),
int(pixel_values.shape[3] / self.config.patch_size),
)
visual_embeddings = self.forward_image(pixel_values)
visual_attention_mask = torch.ones(
......
......@@ -43,6 +43,7 @@ class LayoutLMv3Processor(ProcessorMixin):
tokenizer (`LayoutLMv3Tokenizer` or `LayoutLMv3TokenizerFast`, *optional*):
An instance of [`LayoutLMv3Tokenizer`] or [`LayoutLMv3TokenizerFast`]. The tokenizer is a required input.
"""
attributes = ["image_processor", "tokenizer"]
image_processor_class = "LayoutLMv3ImageProcessor"
tokenizer_class = ("LayoutLMv3Tokenizer", "LayoutLMv3TokenizerFast")
......
......@@ -268,6 +268,7 @@ class LayoutLMv3Tokenizer(PreTrainedTokenizer):
only_label_first_subword (`bool`, *optional*, defaults to `True`):
Whether or not to only label the first subword, in case word labels are provided.
"""
vocab_files_names = VOCAB_FILES_NAMES
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
......
......@@ -97,6 +97,7 @@ class LEDConfig(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "led"
attribute_map = {
"num_attention_heads": "encoder_attention_heads",
......
......@@ -87,6 +87,7 @@ class LevitConfig(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "levit"
def __init__(
......
......@@ -90,6 +90,7 @@ class LiltConfig(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "lilt"
def __init__(
......
......@@ -110,6 +110,7 @@ class LlamaConfig(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "llama"
keys_to_ignore_at_inference = ["past_key_values"]
......
......@@ -104,6 +104,7 @@ class LongformerConfig(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "longformer"
def __init__(
......
......@@ -416,8 +416,8 @@ class LongformerTokenizer(PreTrainedTokenizer):
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
) -> List[int]:
"""
Create a mask from the two sequences passed to be used in a sequence-pair classification task. Longformer does
not make use of token type ids, therefore a list of zeros is returned.
Create a mask from the two sequences passed to be used in a sequence-pair classification task. Longformer does not
make use of token type ids, therefore a list of zeros is returned.
Args:
token_ids_0 (`List[int]`):
......
......@@ -254,8 +254,8 @@ class LongformerTokenizerFast(PreTrainedTokenizerFast):
`str`: Mask token, to use when training a model with masked-language modeling. Log an error if used while not
having been set.
Longformer tokenizer has a special mask token to be usable in the fill-mask pipeline. The mask token will
greedily comprise the space before the *<mask>*.
Longformer tokenizer has a special mask token to be usable in the fill-mask pipeline. The mask token will greedily
comprise the space before the *<mask>*.
"""
if self._mask_token is None:
if self.verbose:
......@@ -309,8 +309,8 @@ class LongformerTokenizerFast(PreTrainedTokenizerFast):
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
) -> List[int]:
"""
Create a mask from the two sequences passed to be used in a sequence-pair classification task. Longformer does
not make use of token type ids, therefore a list of zeros is returned.
Create a mask from the two sequences passed to be used in a sequence-pair classification task. Longformer does not
make use of token type ids, therefore a list of zeros is returned.
Args:
token_ids_0 (`List[int]`):
......
......@@ -82,6 +82,7 @@ class LongT5Config(PretrainedConfig):
use_cache (`bool`, *optional*, defaults to `True`):
Whether or not the model should return the last key/values attentions (not used by all models).
"""
model_type = "longt5"
keys_to_ignore_at_inference = ["past_key_values"]
attribute_map = {"hidden_size": "d_model", "num_attention_heads": "num_heads", "num_hidden_layers": "num_layers"}
......
......@@ -97,6 +97,7 @@ class LukeConfig(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "luke"
def __init__(
......
......@@ -254,8 +254,8 @@ class LxmertTokenizer(PreTrainedTokenizer):
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
) -> List[int]:
"""
Create a mask from the two sequences passed to be used in a sequence-pair classification task. A Lxmert
sequence pair mask has the following format:
Create a mask from the two sequences passed to be used in a sequence-pair classification task. A Lxmert sequence
pair mask has the following format:
```
0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
......
......@@ -161,8 +161,8 @@ class LxmertTokenizerFast(PreTrainedTokenizerFast):
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
) -> List[int]:
"""
Create a mask from the two sequences passed to be used in a sequence-pair classification task. A Lxmert
sequence pair mask has the following format:
Create a mask from the two sequences passed to be used in a sequence-pair classification task. A Lxmert sequence
pair mask has the following format:
```
0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
......
......@@ -99,6 +99,7 @@ class M2M100Config(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "m2m_100"
keys_to_ignore_at_inference = ["past_key_values"]
attribute_map = {"num_attention_heads": "encoder_attention_heads", "hidden_size": "d_model"}
......
......@@ -102,6 +102,7 @@ class MarianConfig(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "marian"
keys_to_ignore_at_inference = ["past_key_values"]
attribute_map = {"num_attention_heads": "encoder_attention_heads", "hidden_size": "d_model"}
......
......@@ -97,6 +97,7 @@ class MarkupLMConfig(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "markuplm"
def __init__(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment