Unverified Commit 651408a0 authored by Arthur's avatar Arthur Committed by GitHub
Browse files

[`Styling`] stylify using ruff (#27144)



* try to stylify using ruff

* might need to remove these changes?

* use ruf format andruff check

* use isinstance instead of type comparision

* use # fmt: skip

* use # fmt: skip

* nits

* soem styling changes

* update ci job

* nits isinstance

* more files update

* nits

* more nits

* small nits

* check and format

* revert wrong changes

* actually use formatter instead of checker

* nits

* well docbuilder is overwriting this commit

* revert notebook changes

* try to nuke docbuilder

* style

* fix feature exrtaction test

* remve `indent-width = 4`

* fixup

* more nits

* update the ruff version that we use

* style

* nuke docbuilder styling

* leve the print for detected changes

* nits

* Remove file I/O
Co-authored-by: default avatarcharliermarsh <charlie.r.marsh@gmail.com>

* style

* nits

* revert notebook changes

* Add # fmt skip when possible

* Add # fmt skip when possible

* Fix

* More `  # fmt: skip` usage

* More `  # fmt: skip` usage

* More `  # fmt: skip` usage

* NIts

* more fixes

* fix tapas

* Another way to skip

* Recommended way

* Fix two more fiels

* Remove asynch
Remove asynch

---------
Co-authored-by: default avatarcharliermarsh <charlie.r.marsh@gmail.com>
parent acb5b4af
......@@ -67,7 +67,7 @@ class MarkupLMFeatureExtractor(FeatureExtractionMixin):
string2xsubs_seq = []
for element in html_code.descendants:
if type(element) == bs4.element.NavigableString:
if isinstance(element, bs4.element.NavigableString):
if type(element.parent) != bs4.element.Tag:
continue
......
......@@ -41,6 +41,7 @@ class MarkupLMProcessor(ProcessorMixin):
parse_html (`bool`, *optional*, defaults to `True`):
Whether or not to use `MarkupLMFeatureExtractor` to parse HTML strings into nodes and corresponding xpaths.
"""
feature_extractor_class = "MarkupLMFeatureExtractor"
tokenizer_class = ("MarkupLMTokenizer", "MarkupLMTokenizerFast")
parse_html = True
......
......@@ -119,6 +119,7 @@ class Mask2FormerConfig(PretrainedConfig):
```
"""
model_type = "mask2former"
backbones_supported = ["swin"]
attribute_map = {"hidden_size": "hidden_dim"}
......
......@@ -2537,5 +2537,5 @@ class Mask2FormerForUniversalSegmentation(Mask2FormerPreTrainedModel):
if not return_dict:
output = tuple(v for v in output.values() if v is not None)
if loss is not None:
output = ((loss)) + output
output = (loss) + output
return output
......@@ -94,6 +94,7 @@ class MaskFormerConfig(PretrainedConfig):
```
"""
model_type = "maskformer"
attribute_map = {"hidden_size": "mask_feature_size"}
backbones_supported = ["resnet", "swin"]
......
......@@ -90,6 +90,7 @@ class MaskFormerSwinConfig(BackboneConfigMixin, PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "maskformer-swin"
attribute_map = {
......
......@@ -104,6 +104,7 @@ class MBartConfig(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "mbart"
keys_to_ignore_at_inference = ["past_key_values"]
attribute_map = {"num_attention_heads": "encoder_attention_heads", "hidden_size": "d_model"}
......
......@@ -45,9 +45,7 @@ PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
"facebook/mbart-large-cc25": 1024,
}
# fmt: off
FAIRSEQ_LANGUAGE_CODES = ["ar_AR", "cs_CZ", "de_DE", "en_XX", "es_XX", "et_EE", "fi_FI", "fr_XX", "gu_IN", "hi_IN", "it_IT", "ja_XX", "kk_KZ", "ko_KR", "lt_LT", "lv_LV", "my_MM", "ne_NP", "nl_XX", "ro_RO", "ru_RU", "si_LK", "tr_TR", "vi_VN", "zh_CN"]
# fmt: on
FAIRSEQ_LANGUAGE_CODES = ["ar_AR", "cs_CZ", "de_DE", "en_XX", "es_XX", "et_EE", "fi_FI", "fr_XX", "gu_IN", "hi_IN", "it_IT", "ja_XX", "kk_KZ", "ko_KR", "lt_LT", "lv_LV", "my_MM", "ne_NP", "nl_XX", "ro_RO", "ru_RU", "si_LK", "tr_TR", "vi_VN", "zh_CN"] # fmt: skip
class MBartTokenizer(PreTrainedTokenizer):
......
......@@ -55,9 +55,7 @@ PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
"facebook/mbart-large-cc25": 1024,
}
# fmt: off
FAIRSEQ_LANGUAGE_CODES = ["ar_AR", "cs_CZ", "de_DE", "en_XX", "es_XX", "et_EE", "fi_FI", "fr_XX", "gu_IN", "hi_IN", "it_IT", "ja_XX", "kk_KZ", "ko_KR", "lt_LT", "lv_LV", "my_MM", "ne_NP", "nl_XX", "ro_RO", "ru_RU", "si_LK", "tr_TR", "vi_VN", "zh_CN"]
# fmt: on
FAIRSEQ_LANGUAGE_CODES = ["ar_AR", "cs_CZ", "de_DE", "en_XX", "es_XX", "et_EE", "fi_FI", "fr_XX", "gu_IN", "hi_IN", "it_IT", "ja_XX", "kk_KZ", "ko_KR", "lt_LT", "lv_LV", "my_MM", "ne_NP", "nl_XX", "ro_RO", "ru_RU", "si_LK", "tr_TR", "vi_VN", "zh_CN"] # fmt: skip
class MBartTokenizerFast(PreTrainedTokenizerFast):
......
......@@ -41,9 +41,7 @@ PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
"facebook/mbart-large-50-one-to-many-mmt": 1024,
}
# fmt: off
FAIRSEQ_LANGUAGE_CODES = ["ar_AR", "cs_CZ", "de_DE", "en_XX", "es_XX", "et_EE", "fi_FI", "fr_XX", "gu_IN", "hi_IN", "it_IT", "ja_XX", "kk_KZ", "ko_KR", "lt_LT", "lv_LV", "my_MM", "ne_NP", "nl_XX", "ro_RO", "ru_RU", "si_LK", "tr_TR", "vi_VN", "zh_CN", "af_ZA", "az_AZ", "bn_IN", "fa_IR", "he_IL", "hr_HR", "id_ID", "ka_GE", "km_KH", "mk_MK", "ml_IN", "mn_MN", "mr_IN", "pl_PL", "ps_AF", "pt_XX", "sv_SE", "sw_KE", "ta_IN", "te_IN", "th_TH", "tl_XX", "uk_UA", "ur_PK", "xh_ZA", "gl_ES", "sl_SI"]
# fmt: on
FAIRSEQ_LANGUAGE_CODES = ["ar_AR", "cs_CZ", "de_DE", "en_XX", "es_XX", "et_EE", "fi_FI", "fr_XX", "gu_IN", "hi_IN", "it_IT", "ja_XX", "kk_KZ", "ko_KR", "lt_LT", "lv_LV", "my_MM", "ne_NP", "nl_XX", "ro_RO", "ru_RU", "si_LK", "tr_TR", "vi_VN", "zh_CN", "af_ZA", "az_AZ", "bn_IN", "fa_IR", "he_IL", "hr_HR", "id_ID", "ka_GE", "km_KH", "mk_MK", "ml_IN", "mn_MN", "mr_IN", "pl_PL", "ps_AF", "pt_XX", "sv_SE", "sw_KE", "ta_IN", "te_IN", "th_TH", "tl_XX", "uk_UA", "ur_PK", "xh_ZA", "gl_ES", "sl_SI"] # fmt: skip
class MBart50Tokenizer(PreTrainedTokenizer):
......
......@@ -51,9 +51,7 @@ PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
"facebook/mbart-large-50-one-to-many-mmt": 1024,
}
# fmt: off
FAIRSEQ_LANGUAGE_CODES = ["ar_AR", "cs_CZ", "de_DE", "en_XX", "es_XX", "et_EE", "fi_FI", "fr_XX", "gu_IN", "hi_IN", "it_IT", "ja_XX", "kk_KZ", "ko_KR", "lt_LT", "lv_LV", "my_MM", "ne_NP", "nl_XX", "ro_RO", "ru_RU", "si_LK", "tr_TR", "vi_VN", "zh_CN", "af_ZA", "az_AZ", "bn_IN", "fa_IR", "he_IL", "hr_HR", "id_ID", "ka_GE", "km_KH", "mk_MK", "ml_IN", "mn_MN", "mr_IN", "pl_PL", "ps_AF", "pt_XX", "sv_SE", "sw_KE", "ta_IN", "te_IN", "th_TH", "tl_XX", "uk_UA", "ur_PK", "xh_ZA", "gl_ES", "sl_SI"]
# fmt: on
FAIRSEQ_LANGUAGE_CODES = ["ar_AR", "cs_CZ", "de_DE", "en_XX", "es_XX", "et_EE", "fi_FI", "fr_XX", "gu_IN", "hi_IN", "it_IT", "ja_XX", "kk_KZ", "ko_KR", "lt_LT", "lv_LV", "my_MM", "ne_NP", "nl_XX", "ro_RO", "ru_RU", "si_LK", "tr_TR", "vi_VN", "zh_CN", "af_ZA", "az_AZ", "bn_IN", "fa_IR", "he_IL", "hr_HR", "id_ID", "ka_GE", "km_KH", "mk_MK", "ml_IN", "mn_MN", "mr_IN", "pl_PL", "ps_AF", "pt_XX", "sv_SE", "sw_KE", "ta_IN", "te_IN", "th_TH", "tl_XX", "uk_UA", "ur_PK", "xh_ZA", "gl_ES", "sl_SI"] # fmt: skip
class MBart50TokenizerFast(PreTrainedTokenizerFast):
......
......@@ -145,6 +145,7 @@ class MegaConfig(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "mega"
def __init__(
......
......@@ -539,9 +539,7 @@ class MegaGatedCrossAttention(nn.Module):
self.config = config
self.activation = ACT2FN[self.config.activation]
self.attention_activation = self.config.attention_activation
self.scaling = (
self.config.shared_representation_size**-0.5 if self.attention_activation == "softmax" else None
)
self.scaling = self.config.shared_representation_size**-0.5 if self.attention_activation == "softmax" else None
self.dropout = MegaDropout(self.config.dropout_prob, is_featurewise=self.config.use_feature_dropout)
self.hidden_dropout = MegaDropout(
......
......@@ -90,6 +90,7 @@ class MegatronBertConfig(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "megatron-bert"
def __init__(
......
......@@ -89,6 +89,7 @@ class MgpstrConfig(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "mgp-str"
def __init__(
......
......@@ -49,6 +49,7 @@ class MgpstrProcessor(ProcessorMixin):
tokenizer ([`MgpstrTokenizer`], *optional*):
The tokenizer is a required input.
"""
attributes = ["image_processor", "char_tokenizer"]
image_processor_class = "ViTImageProcessor"
char_tokenizer_class = "MgpstrTokenizer"
......
......@@ -108,6 +108,7 @@ class MobileBertConfig(PretrainedConfig):
Attributes: pretrained_config_archive_map (Dict[str, str]): A dictionary containing all the available pre-trained
checkpoints.
"""
pretrained_config_archive_map = MOBILEBERT_PRETRAINED_CONFIG_ARCHIVE_MAP
model_type = "mobilebert"
......
......@@ -252,8 +252,8 @@ class MobileBertTokenizer(PreTrainedTokenizer):
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
) -> List[int]:
"""
Create a mask from the two sequences passed to be used in a sequence-pair classification task. A MobileBERT
sequence pair mask has the following format:
Create a mask from the two sequences passed to be used in a sequence-pair classification task. A MobileBERT sequence
pair mask has the following format:
```
0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
......
......@@ -159,8 +159,8 @@ class MobileBertTokenizerFast(PreTrainedTokenizerFast):
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
) -> List[int]:
"""
Create a mask from the two sequences passed to be used in a sequence-pair classification task. A MobileBERT
sequence pair mask has the following format:
Create a mask from the two sequences passed to be used in a sequence-pair classification task. A MobileBERT sequence
pair mask has the following format:
```
0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
......
......@@ -78,6 +78,7 @@ class MobileNetV1Config(PretrainedConfig):
>>> # Accessing the model configuration
>>> configuration = model.config
```"""
model_type = "mobilenet_v1"
def __init__(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment