Unverified Commit 651408a0 authored by Arthur's avatar Arthur Committed by GitHub
Browse files

[`Styling`] stylify using ruff (#27144)



* try to stylify using ruff

* might need to remove these changes?

* use ruf format andruff check

* use isinstance instead of type comparision

* use # fmt: skip

* use # fmt: skip

* nits

* soem styling changes

* update ci job

* nits isinstance

* more files update

* nits

* more nits

* small nits

* check and format

* revert wrong changes

* actually use formatter instead of checker

* nits

* well docbuilder is overwriting this commit

* revert notebook changes

* try to nuke docbuilder

* style

* fix feature exrtaction test

* remve `indent-width = 4`

* fixup

* more nits

* update the ruff version that we use

* style

* nuke docbuilder styling

* leve the print for detected changes

* nits

* Remove file I/O
Co-authored-by: default avatarcharliermarsh <charlie.r.marsh@gmail.com>

* style

* nits

* revert notebook changes

* Add # fmt skip when possible

* Add # fmt skip when possible

* Fix

* More `  # fmt: skip` usage

* More `  # fmt: skip` usage

* More `  # fmt: skip` usage

* NIts

* more fixes

* fix tapas

* Another way to skip

* Recommended way

* Fix two more fiels

* Remove asynch
Remove asynch

---------
Co-authored-by: default avatarcharliermarsh <charlie.r.marsh@gmail.com>
parent acb5b4af
......@@ -174,8 +174,7 @@ class AltCLIPOutput(ModelOutput):
text_embeds(`torch.FloatTensor` of shape `(batch_size, output_dim`):
The text embeddings obtained by applying the projection layer to the pooled output of [`AltCLIPTextModel`].
image_embeds(`torch.FloatTensor` of shape `(batch_size, output_dim`):
The image embeddings obtained by applying the projection layer to the pooled output of
[`AltCLIPVisionModel`].
The image embeddings obtained by applying the projection layer to the pooled output of [`AltCLIPVisionModel`].
text_model_output(`BaseModelOutputWithPooling`):
The output of the [`AltCLIPTextModel`].
vision_model_output(`BaseModelOutputWithPooling`):
......@@ -1049,9 +1048,7 @@ class AltCLIPPreTrainedModel(PreTrainedModel):
nn.init.normal_(module.out_proj.weight, std=out_proj_std)
elif isinstance(module, AltCLIPMLP):
factor = self.config.initializer_factor
in_proj_std = (
(module.config.hidden_size**-0.5) * ((2 * module.config.num_hidden_layers) ** -0.5) * factor
)
in_proj_std = (module.config.hidden_size**-0.5) * ((2 * module.config.num_hidden_layers) ** -0.5) * factor
fc_std = (2 * module.config.hidden_size) ** -0.5 * factor
nn.init.normal_(module.fc1.weight, std=fc_std)
nn.init.normal_(module.fc2.weight, std=in_proj_std)
......
......@@ -35,6 +35,7 @@ class AltCLIPProcessor(ProcessorMixin):
tokenizer ([`XLMRobertaTokenizerFast`], *optional*):
The tokenizer is a required input.
"""
attributes = ["image_processor", "tokenizer"]
image_processor_class = "CLIPImageProcessor"
tokenizer_class = ("XLMRobertaTokenizer", "XLMRobertaTokenizerFast")
......
......@@ -86,6 +86,7 @@ class ASTConfig(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "audio-spectrogram-transformer"
def __init__(
......
......@@ -131,6 +131,7 @@ class AutoformerConfig(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "autoformer"
attribute_map = {
"hidden_size": "d_model",
......
......@@ -46,6 +46,7 @@ class BarkProcessor(ProcessorMixin):
a list of `voice_preset_names`.
"""
tokenizer_class = "AutoTokenizer"
attributes = ["tokenizer"]
......
......@@ -107,6 +107,7 @@ class BartConfig(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "bart"
keys_to_ignore_at_inference = ["past_key_values"]
attribute_map = {"num_attention_heads": "encoder_attention_heads", "hidden_size": "d_model"}
......
......@@ -147,6 +147,7 @@ class BartTokenizerFast(PreTrainedTokenizerFast):
trim_offsets (`bool`, *optional*, defaults to `True`):
Whether the post processing step should trim offsets to avoid including whitespaces.
"""
vocab_files_names = VOCAB_FILES_NAMES
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
......
......@@ -115,6 +115,7 @@ class BeitConfig(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "beit"
def __init__(
......
......@@ -136,6 +136,7 @@ class BertConfig(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "bert"
def __init__(
......
......@@ -84,6 +84,7 @@ class BertGenerationConfig(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "bert-generation"
def __init__(
......
......@@ -104,6 +104,7 @@ class BigBirdConfig(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "big_bird"
def __init__(
......
......@@ -896,15 +896,11 @@ class BigBirdBlockSparseAttention(nn.Module):
# global keys (corresponding to 1st key block)
attention_probs[:, :, 2 * from_block_size : -2 * from_block_size, :to_block_size] = attn_weights[
:, :, :, :, :to_block_size
].view(
bsz, n_heads, -1, to_block_size
) # first_band_product
].view(bsz, n_heads, -1, to_block_size) # first_band_product
# global keys (corresponding to last key block)
attention_probs[:, :, 2 * from_block_size : -2 * from_block_size, -to_block_size:] = attn_weights[
:, :, :, :, -to_block_size:
].view(
bsz, n_heads, -1, to_block_size
) # last_band_product
].view(bsz, n_heads, -1, to_block_size) # last_band_product
# random keys
for p1, i1, w1 in zip(range(bsz), rand_attn, attn_weights):
# p1, i1, w1 corresponds to batch_dim i.e. following operation is done for each sequence in batch
......
......@@ -120,6 +120,7 @@ class BigBirdPegasusConfig(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "bigbird_pegasus"
keys_to_ignore_at_inference = ["past_key_values"]
attribute_map = {
......
......@@ -683,15 +683,11 @@ class BigBirdPegasusBlockSparseAttention(nn.Module):
# global keys (corresponding to 1st key block)
attention_probs[:, :, 2 * from_block_size : -2 * from_block_size, :to_block_size] = attn_weights[
:, :, :, :, :to_block_size
].view(
bsz, n_heads, -1, to_block_size
) # first_band_product
].view(bsz, n_heads, -1, to_block_size) # first_band_product
# global keys (corresponding to last key block)
attention_probs[:, :, 2 * from_block_size : -2 * from_block_size, -to_block_size:] = attn_weights[
:, :, :, :, -to_block_size:
].view(
bsz, n_heads, -1, to_block_size
) # last_band_product
].view(bsz, n_heads, -1, to_block_size) # last_band_product
# random keys
for p1, i1, w1 in zip(range(bsz), rand_attn, attn_weights):
# p1, i1, w1 corresponds to batch_dim i.e. following operation is done for each sequence in batch
......
......@@ -93,6 +93,7 @@ class BioGptConfig(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "biogpt"
def __init__(
......
......@@ -85,6 +85,7 @@ class BitConfig(BackboneConfigMixin, PretrainedConfig):
>>> configuration = model.config
```
"""
model_type = "bit"
layer_types = ["preactivation", "bottleneck"]
supported_padding = ["SAME", "VALID"]
......
......@@ -104,6 +104,7 @@ class BlenderbotConfig(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "blenderbot"
keys_to_ignore_at_inference = ["past_key_values"]
attribute_map = {"num_attention_heads": "encoder_attention_heads", "hidden_size": "d_model"}
......
......@@ -1511,9 +1511,7 @@ class BlenderbotForCausalLM(BlenderbotPreTrainedModel):
>>> from transformers import AutoTokenizer, BlenderbotForCausalLM
>>> tokenizer = AutoTokenizer.from_pretrained("facebook/blenderbot-400M-distill")
>>> model = BlenderbotForCausalLM.from_pretrained(
... "facebook/blenderbot-400M-distill", add_cross_attention=False
... )
>>> model = BlenderbotForCausalLM.from_pretrained("facebook/blenderbot-400M-distill", add_cross_attention=False)
>>> assert model.config.is_decoder, f"{model.__class__} has to be configured as a decoder."
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> outputs = model(**inputs)
......
......@@ -376,8 +376,8 @@ class BlenderbotTokenizer(PreTrainedTokenizer):
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
) -> List[int]:
"""
Create a mask from the two sequences passed to be used in a sequence-pair classification task. Blenderbot does
not make use of token type ids, therefore a list of zeros is returned.
Create a mask from the two sequences passed to be used in a sequence-pair classification task. Blenderbot does not
make use of token type ids, therefore a list of zeros is returned.
Args:
token_ids_0 (`List[int]`):
......
......@@ -212,8 +212,8 @@ class BlenderbotTokenizerFast(PreTrainedTokenizerFast):
`str`: Mask token, to use when training a model with masked-language modeling. Log an error if used while not
having been set.
Blenderbot tokenizer has a special mask token to be usable in the fill-mask pipeline. The mask token will
greedily comprise the space before the *<mask>*.
Blenderbot tokenizer has a special mask token to be usable in the fill-mask pipeline. The mask token will greedily
comprise the space before the *<mask>*.
"""
if self._mask_token is None:
if self.verbose:
......@@ -264,8 +264,8 @@ class BlenderbotTokenizerFast(PreTrainedTokenizerFast):
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
) -> List[int]:
"""
Create a mask from the two sequences passed to be used in a sequence-pair classification task. Blenderbot does
not make use of token type ids, therefore a list of zeros is returned.
Create a mask from the two sequences passed to be used in a sequence-pair classification task. Blenderbot does not
make use of token type ids, therefore a list of zeros is returned.
Args:
token_ids_0 (`List[int]`):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment