Unverified Commit e13f72fb authored by Stas Bekman's avatar Stas Bekman Committed by GitHub
Browse files

[doc] :obj: hunt (#14954)

* redo sans examples

* style
parent 133c5e40
......@@ -260,7 +260,7 @@ class VisionEncoderDecoderModel(PreTrainedModel):
the model, you need to first set it back in training mode with `model.train()`.
Params:
encoder_pretrained_model_name_or_path (:obj: *str*, *optional*):
encoder_pretrained_model_name_or_path (`str`, *optional*):
Information necessary to initiate the image encoder. Can be either:
- A string, the *model id* of a pretrained model hosted inside a model repo on huggingface.co. An
......@@ -272,7 +272,7 @@ class VisionEncoderDecoderModel(PreTrainedModel):
`config` argument. This loading path is slower than converting the TensorFlow checkpoint in a
PyTorch model using the provided conversion scripts and loading the PyTorch model afterwards.
decoder_pretrained_model_name_or_path (:obj: *str*, *optional*, defaults to `None`):
decoder_pretrained_model_name_or_path (`str`, *optional*, defaults to `None`):
Information necessary to initiate the text decoder. Can be either:
- A string, the *model id* of a pretrained model hosted inside a model repo on huggingface.co.
......
......@@ -403,7 +403,7 @@ class FlaxVisionTextDualEncoderModel(FlaxPreTrainedModel):
) -> FlaxPreTrainedModel:
"""
Params:
vision_model_name_or_path (:obj: *str*, *optional*, defaults to `None`):
vision_model_name_or_path (`str`, *optional*, defaults to `None`):
Information necessary to initiate the vision model. Can be either:
- A string, the *model id* of a pretrained model hosted inside a model repo on huggingface.co.
......@@ -416,7 +416,7 @@ class FlaxVisionTextDualEncoderModel(FlaxPreTrainedModel):
loading path is slower than converting the PyTorch checkpoint in a Flax model using the provided
conversion scripts and loading the Flax model afterwards.
text_model_name_or_path (:obj: *str*, *optional*):
text_model_name_or_path (`str`, *optional*):
Information necessary to initiate the text model. Can be either:
- A string, the *model id* of a pretrained model hosted inside a model repo on huggingface.co.
......
......@@ -404,7 +404,7 @@ class VisionTextDualEncoderModel(PreTrainedModel):
) -> PreTrainedModel:
"""
Params:
vision_model_name_or_path (:obj: *str*, *optional*, defaults to `None`):
vision_model_name_or_path (`str`, *optional*, defaults to `None`):
Information necessary to initiate the vision model. Can be either:
- A string, the *model id* of a pretrained model hosted inside a model repo on huggingface.co.
......@@ -417,7 +417,7 @@ class VisionTextDualEncoderModel(PreTrainedModel):
loading path is slower than converting the PyTorch checkpoint in a Flax model using the provided
conversion scripts and loading the Flax model afterwards.
text_model_name_or_path (:obj: *str*, *optional*):
text_model_name_or_path (`str`, *optional*):
Information necessary to initiate the text model. Can be either:
- A string, the *model id* of a pretrained model hosted inside a model repo on huggingface.co.
......
......@@ -73,7 +73,7 @@ class Wav2Vec2Config(PretrainedConfig):
feat_extract_activation (`str, `optional`, defaults to `"gelu"`):
The non-linear activation function (function or string) in the 1D convolutional layers of the feature
extractor. If string, `"gelu"`, `"relu"`, `"selu"` and `"gelu_new"` are supported.
feat_quantizer_dropout (obj:*float*, *optional*, defaults to 0.0):
feat_quantizer_dropout (`float`, *optional*, defaults to 0.0):
The dropout probabilitiy for quantized feature extractor states.
conv_dim (`Tuple[int]`, *optional*, defaults to `(512, 512, 512, 512, 512, 512, 512)`):
A tuple of integers defining the number of input and output channels of each 1D convolutional layer in the
......
......@@ -72,7 +72,7 @@ class WavLMConfig(PretrainedConfig):
feat_extract_activation (`str, `optional`, defaults to `"gelu"`):
The non-linear activation function (function or string) in the 1D convolutional layers of the feature
extractor. If string, `"gelu"`, `"relu"`, `"selu"` and `"gelu_new"` are supported.
feat_quantizer_dropout (obj:*float*, *optional*, defaults to 0.0):
feat_quantizer_dropout (`float`, *optional*, defaults to 0.0):
The dropout probabilitiy for quantized feature extractor states.
conv_dim (`Tuple[int]`, *optional*, defaults to `(512, 512, 512, 512, 512, 512, 512)`):
A tuple of integers defining the number of input and output channels of each 1D convolutional layer in the
......
......@@ -512,15 +512,15 @@ class TFXLNetMainLayer(tf.keras.layers.Layer):
curr_out = curr_out[: self.reuse_len]
if self.mem_len is None or self.mem_len == 0:
# If :obj:`use_mems` is active but no `mem_len` is defined, the model behaves like GPT-2 at inference time
# If `use_mems` is active but no `mem_len` is defined, the model behaves like GPT-2 at inference time
# and returns all of the past and current hidden states.
cutoff = 0
else:
# If :obj:`use_mems` is active and `mem_len` is defined, the model returns the last `mem_len` hidden
# If `use_mems` is active and `mem_len` is defined, the model returns the last `mem_len` hidden
# states. This is the preferred setting for training and long-form generation.
cutoff = -self.mem_len
if prev_mem is None:
# if :obj:`use_mems` is active and `mem_len` is defined, the model
# if `use_mems` is active and `mem_len` is defined, the model
new_mem = curr_out[cutoff:]
else:
new_mem = tf.concat([prev_mem, curr_out], 0)[cutoff:]
......
......@@ -1000,15 +1000,15 @@ class XLNetModel(XLNetPreTrainedModel):
curr_out = curr_out[: self.reuse_len]
if self.mem_len is None or self.mem_len == 0:
# If :obj:`use_mems` is active but no `mem_len` is defined, the model behaves like GPT-2 at inference time
# If `use_mems` is active but no `mem_len` is defined, the model behaves like GPT-2 at inference time
# and returns all of the past and current hidden states.
cutoff = 0
else:
# If :obj:`use_mems` is active and `mem_len` is defined, the model returns the last `mem_len` hidden
# If `use_mems` is active and `mem_len` is defined, the model returns the last `mem_len` hidden
# states. This is the preferred setting for training and long-form generation.
cutoff = -self.mem_len
if prev_mem is None:
# if :obj:`use_mems` is active and `mem_len` is defined, the model
# if `use_mems` is active and `mem_len` is defined, the model
new_mem = curr_out[cutoff:]
else:
new_mem = torch.cat([prev_mem, curr_out], dim=0)[cutoff:]
......
......@@ -2466,7 +2466,7 @@ class Trainer:
ignore_keys: Optional[List[str]] = None,
) -> Tuple[Optional[torch.Tensor], Optional[torch.Tensor], Optional[torch.Tensor]]:
"""
Perform an evaluation step on `model` using obj:*inputs*.
Perform an evaluation step on `model` using `inputs`.
Subclass and override to inject custom behavior.
......
......@@ -226,8 +226,8 @@ def torch_distributed_zero_first(local_rank: int):
class DistributedSamplerWithLoop(DistributedSampler):
"""
Like a :obj:torch.utils.data.distributed.DistributedSampler` but loops at the end back to the beginning of the
shuffled samples to make each process have a round multiple of batch_size samples.
Like a torch.utils.data.distributed.DistributedSampler` but loops at the end back to the beginning of the shuffled
samples to make each process have a round multiple of batch_size samples.
Args:
dataset (`torch.utils.data.Dataset`):
......
......@@ -126,7 +126,7 @@ class Seq2SeqTrainer(Trainer):
ignore_keys: Optional[List[str]] = None,
) -> Tuple[Optional[float], Optional[torch.Tensor], Optional[torch.Tensor]]:
"""
Perform an evaluation step on `model` using obj:*inputs*.
Perform an evaluation step on `model` using `inputs`.
Subclass and override to inject custom behavior.
......
......@@ -175,8 +175,8 @@ class TrainingArguments:
logging_steps (`int`, *optional*, defaults to 500):
Number of update steps between two logs if `logging_strategy="steps"`.
logging_nan_inf_filter (`bool`, *optional*, defaults to `True`):
Whether to filter `nan` and `inf` losses for logging. If set to obj:`True` the loss of every step that is
`nan` or `inf` is filtered and the average loss of the current logging window is taken instead.
Whether to filter `nan` and `inf` losses for logging. If set to `True` the loss of every step that is `nan`
or `inf` is filtered and the average loss of the current logging window is taken instead.
<Tip>
......
......@@ -45,11 +45,11 @@ class TestCodeExamples(unittest.TestCase):
the doctests in those files
Args:
directory (:obj:`Path`): Directory containing the files
identifier (:obj:`str`): Will parse files containing this
ignore_files (:obj:`List[str]`): List of files to skip
n_identifier (:obj:`str` or :obj:`List[str]`): Will not parse files containing this/these identifiers.
only_modules (:obj:`bool`): Whether to only analyze modules
directory (`Path`): Directory containing the files
identifier (`str`): Will parse files containing this
ignore_files (`List[str]`): List of files to skip
n_identifier (`str` or `List[str]`): Will not parse files containing this/these identifiers.
only_modules (`bool`): Whether to only analyze modules
"""
files = [file for file in os.listdir(directory) if os.path.isfile(os.path.join(directory, file))]
......
......@@ -556,7 +556,7 @@ class XLNetModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase)
self.model_tester.create_and_check_xlnet_base_model(*config_and_inputs)
def test_xlnet_base_model_use_mems(self):
# checking that in auto-regressive mode, :obj:`use_mems` gives the same results
# checking that in auto-regressive mode, `use_mems` gives the same results
self.model_tester.set_seed()
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_xlnet_model_use_mems(*config_and_inputs)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment