"...git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "3cff4cc58730409c68f8afa2f3b9c61efa0e85c6"
Unverified Commit 91c2278b authored by amyeroberts's avatar amyeroberts Committed by GitHub
Browse files

Update modeling doc strings FE -> IP (#21106)

* Update docs examples FE -> IP

* Remove _IMAGE_PROCESSOR_FOR_DOC
parent 5d3cb760
...@@ -857,7 +857,7 @@ GROUPVIT_VISION_INPUTS_DOCSTRING = r""" ...@@ -857,7 +857,7 @@ GROUPVIT_VISION_INPUTS_DOCSTRING = r"""
Args: Args:
pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`): pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
Pixel values. Padding will be ignored by default should you provide it. Pixel values can be obtained using Pixel values. Padding will be ignored by default should you provide it. Pixel values can be obtained using
[`CLIPFeatureExtractor`]. See [`CLIPFeatureExtractor.__call__`] for details. [`CLIPImageProcessor`]. See [`CLIPImageProcessor.__call__`] for details.
output_attentions (`bool`, *optional*): output_attentions (`bool`, *optional*):
Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
tensors for more detail. tensors for more detail.
...@@ -891,8 +891,8 @@ GROUPVIT_INPUTS_DOCSTRING = r""" ...@@ -891,8 +891,8 @@ GROUPVIT_INPUTS_DOCSTRING = r"""
[What are position IDs?](../glossary#position-ids) [What are position IDs?](../glossary#position-ids)
pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`): pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
Pixel values. Pixel values can be obtained using [`CLIPFeatureExtractor`]. See Pixel values. Pixel values can be obtained using [`CLIPImageProcessor`]. See
[`CLIPFeatureExtractor.__call__`] for details. [`CLIPImageProcessor.__call__`] for details.
return_loss (`bool`, *optional*): return_loss (`bool`, *optional*):
Whether or not to return the contrastive loss. Whether or not to return the contrastive loss.
output_attentions (`bool`, *optional*): output_attentions (`bool`, *optional*):
......
...@@ -1555,8 +1555,8 @@ GROUPVIT_TEXT_INPUTS_DOCSTRING = r""" ...@@ -1555,8 +1555,8 @@ GROUPVIT_TEXT_INPUTS_DOCSTRING = r"""
GROUPVIT_VISION_INPUTS_DOCSTRING = r""" GROUPVIT_VISION_INPUTS_DOCSTRING = r"""
Args: Args:
pixel_values (`np.ndarray`, `tf.Tensor`, `List[tf.Tensor]`, `Dict[str, tf.Tensor]` or `Dict[str, np.ndarray]` and each example must have the shape `(batch_size, num_channels, height, width)`): pixel_values (`np.ndarray`, `tf.Tensor`, `List[tf.Tensor]`, `Dict[str, tf.Tensor]` or `Dict[str, np.ndarray]` and each example must have the shape `(batch_size, num_channels, height, width)`):
Pixel values. Pixel values can be obtained using [`CLIPFeatureExtractor`]. See Pixel values. Pixel values can be obtained using [`CLIPImageProcessor`]. See
[`CLIPFeatureExtractor.__call__`] for details. [`CLIPImageProcessor.__call__`] for details.
output_attentions (`bool`, *optional*): output_attentions (`bool`, *optional*):
Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the
...@@ -1583,8 +1583,8 @@ GROUPVIT_INPUTS_DOCSTRING = r""" ...@@ -1583,8 +1583,8 @@ GROUPVIT_INPUTS_DOCSTRING = r"""
[What are input IDs?](../glossary#input-ids) [What are input IDs?](../glossary#input-ids)
pixel_values (`np.ndarray`, `tf.Tensor`, `List[tf.Tensor]` `Dict[str, tf.Tensor]` or `Dict[str, np.ndarray]` and each example must have the shape `(batch_size, num_channels, height, width)`): pixel_values (`np.ndarray`, `tf.Tensor`, `List[tf.Tensor]` `Dict[str, tf.Tensor]` or `Dict[str, np.ndarray]` and each example must have the shape `(batch_size, num_channels, height, width)`):
Pixel values. Pixel values can be obtained using [`CLIPFeatureExtractor`]. See Pixel values. Pixel values can be obtained using [`CLIPImageProcessor`]. See
[`CLIPFeatureExtractor.__call__`] for details. [`CLIPImageProcessor.__call__`] for details.
attention_mask (`np.ndarray` or `tf.Tensor` of shape `({0})`, *optional*): attention_mask (`np.ndarray` or `tf.Tensor` of shape `({0})`, *optional*):
Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`: Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:
......
...@@ -38,7 +38,6 @@ logger = logging.get_logger(__name__) ...@@ -38,7 +38,6 @@ logger = logging.get_logger(__name__)
# General docstring # General docstring
_CONFIG_FOR_DOC = "LevitConfig" _CONFIG_FOR_DOC = "LevitConfig"
_FEAT_EXTRACTOR_FOR_DOC = "LevitImageProcessor"
# Base docstring # Base docstring
_CHECKPOINT_FOR_DOC = "facebook/levit-128S" _CHECKPOINT_FOR_DOC = "facebook/levit-128S"
...@@ -549,7 +548,6 @@ class LevitModel(LevitPreTrainedModel): ...@@ -549,7 +548,6 @@ class LevitModel(LevitPreTrainedModel):
@add_start_docstrings_to_model_forward(LEVIT_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(LEVIT_INPUTS_DOCSTRING)
@add_code_sample_docstrings( @add_code_sample_docstrings(
processor_class=_FEAT_EXTRACTOR_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
output_type=BaseModelOutputWithPoolingAndNoAttention, output_type=BaseModelOutputWithPoolingAndNoAttention,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
...@@ -618,7 +616,6 @@ class LevitForImageClassification(LevitPreTrainedModel): ...@@ -618,7 +616,6 @@ class LevitForImageClassification(LevitPreTrainedModel):
@add_start_docstrings_to_model_forward(LEVIT_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(LEVIT_INPUTS_DOCSTRING)
@add_code_sample_docstrings( @add_code_sample_docstrings(
processor_class=_FEAT_EXTRACTOR_FOR_DOC,
checkpoint=_IMAGE_CLASS_CHECKPOINT, checkpoint=_IMAGE_CLASS_CHECKPOINT,
output_type=ImageClassifierOutputWithNoAttention, output_type=ImageClassifierOutputWithNoAttention,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
...@@ -711,7 +708,6 @@ class LevitForImageClassificationWithTeacher(LevitPreTrainedModel): ...@@ -711,7 +708,6 @@ class LevitForImageClassificationWithTeacher(LevitPreTrainedModel):
@add_start_docstrings_to_model_forward(LEVIT_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(LEVIT_INPUTS_DOCSTRING)
@add_code_sample_docstrings( @add_code_sample_docstrings(
processor_class=_FEAT_EXTRACTOR_FOR_DOC,
checkpoint=_IMAGE_CLASS_CHECKPOINT, checkpoint=_IMAGE_CLASS_CHECKPOINT,
output_type=LevitForImageClassificationWithTeacherOutput, output_type=LevitForImageClassificationWithTeacherOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
......
...@@ -51,7 +51,6 @@ logger = logging.get_logger(__name__) ...@@ -51,7 +51,6 @@ logger = logging.get_logger(__name__)
_CONFIG_FOR_DOC = "MaskFormerConfig" _CONFIG_FOR_DOC = "MaskFormerConfig"
_CHECKPOINT_FOR_DOC = "facebook/maskformer-swin-base-ade" _CHECKPOINT_FOR_DOC = "facebook/maskformer-swin-base-ade"
_FEAT_EXTRACTOR_FOR_DOC = "MaskFormerImageProcessor"
MASKFORMER_PRETRAINED_MODEL_ARCHIVE_LIST = [ MASKFORMER_PRETRAINED_MODEL_ARCHIVE_LIST = [
"facebook/maskformer-swin-base-ade", "facebook/maskformer-swin-base-ade",
......
...@@ -33,7 +33,6 @@ logger = logging.get_logger(__name__) ...@@ -33,7 +33,6 @@ logger = logging.get_logger(__name__)
# General docstring # General docstring
_CONFIG_FOR_DOC = "MobileNetV1Config" _CONFIG_FOR_DOC = "MobileNetV1Config"
_FEAT_EXTRACTOR_FOR_DOC = "MobileNetV1ImageProcessor"
# Base docstring # Base docstring
_CHECKPOINT_FOR_DOC = "google/mobilenet_v1_1.0_224" _CHECKPOINT_FOR_DOC = "google/mobilenet_v1_1.0_224"
...@@ -355,7 +354,6 @@ class MobileNetV1Model(MobileNetV1PreTrainedModel): ...@@ -355,7 +354,6 @@ class MobileNetV1Model(MobileNetV1PreTrainedModel):
@add_start_docstrings_to_model_forward(MOBILENET_V1_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(MOBILENET_V1_INPUTS_DOCSTRING)
@add_code_sample_docstrings( @add_code_sample_docstrings(
processor_class=_FEAT_EXTRACTOR_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
output_type=BaseModelOutputWithPoolingAndNoAttention, output_type=BaseModelOutputWithPoolingAndNoAttention,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
...@@ -428,7 +426,6 @@ class MobileNetV1ForImageClassification(MobileNetV1PreTrainedModel): ...@@ -428,7 +426,6 @@ class MobileNetV1ForImageClassification(MobileNetV1PreTrainedModel):
@add_start_docstrings_to_model_forward(MOBILENET_V1_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(MOBILENET_V1_INPUTS_DOCSTRING)
@add_code_sample_docstrings( @add_code_sample_docstrings(
processor_class=_FEAT_EXTRACTOR_FOR_DOC,
checkpoint=_IMAGE_CLASS_CHECKPOINT, checkpoint=_IMAGE_CLASS_CHECKPOINT,
output_type=ImageClassifierOutputWithNoAttention, output_type=ImageClassifierOutputWithNoAttention,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
......
...@@ -43,7 +43,6 @@ logger = logging.get_logger(__name__) ...@@ -43,7 +43,6 @@ logger = logging.get_logger(__name__)
# General docstring # General docstring
_CONFIG_FOR_DOC = "MobileNetV2Config" _CONFIG_FOR_DOC = "MobileNetV2Config"
_FEAT_EXTRACTOR_FOR_DOC = "MobileNetV2ImageProcessor"
# Base docstring # Base docstring
_CHECKPOINT_FOR_DOC = "google/mobilenet_v2_1.0_224" _CHECKPOINT_FOR_DOC = "google/mobilenet_v2_1.0_224"
...@@ -566,7 +565,6 @@ class MobileNetV2Model(MobileNetV2PreTrainedModel): ...@@ -566,7 +565,6 @@ class MobileNetV2Model(MobileNetV2PreTrainedModel):
@add_start_docstrings_to_model_forward(MOBILENET_V2_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(MOBILENET_V2_INPUTS_DOCSTRING)
@add_code_sample_docstrings( @add_code_sample_docstrings(
processor_class=_FEAT_EXTRACTOR_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
output_type=BaseModelOutputWithPoolingAndNoAttention, output_type=BaseModelOutputWithPoolingAndNoAttention,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
...@@ -639,8 +637,6 @@ class MobileNetV2ForImageClassification(MobileNetV2PreTrainedModel): ...@@ -639,8 +637,6 @@ class MobileNetV2ForImageClassification(MobileNetV2PreTrainedModel):
@add_start_docstrings_to_model_forward(MOBILENET_V2_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(MOBILENET_V2_INPUTS_DOCSTRING)
@add_code_sample_docstrings( @add_code_sample_docstrings(
processor_class=_FEAT_EXTRACTOR_FOR_DOC,
checkpoint=_IMAGE_CLASS_CHECKPOINT,
output_type=ImageClassifierOutputWithNoAttention, output_type=ImageClassifierOutputWithNoAttention,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
expected_output=_IMAGE_CLASS_EXPECTED_OUTPUT, expected_output=_IMAGE_CLASS_EXPECTED_OUTPUT,
......
...@@ -49,7 +49,6 @@ logger = logging.get_logger(__name__) ...@@ -49,7 +49,6 @@ logger = logging.get_logger(__name__)
# General docstring # General docstring
_CONFIG_FOR_DOC = "MobileViTConfig" _CONFIG_FOR_DOC = "MobileViTConfig"
_FEAT_EXTRACTOR_FOR_DOC = "MobileViTImageProcessor"
# Base docstring # Base docstring
_CHECKPOINT_FOR_DOC = "apple/mobilevit-small" _CHECKPOINT_FOR_DOC = "apple/mobilevit-small"
...@@ -745,7 +744,6 @@ class MobileViTModel(MobileViTPreTrainedModel): ...@@ -745,7 +744,6 @@ class MobileViTModel(MobileViTPreTrainedModel):
@add_start_docstrings_to_model_forward(MOBILEVIT_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(MOBILEVIT_INPUTS_DOCSTRING)
@add_code_sample_docstrings( @add_code_sample_docstrings(
processor_class=_FEAT_EXTRACTOR_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
output_type=BaseModelOutputWithPoolingAndNoAttention, output_type=BaseModelOutputWithPoolingAndNoAttention,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
...@@ -819,7 +817,6 @@ class MobileViTForImageClassification(MobileViTPreTrainedModel): ...@@ -819,7 +817,6 @@ class MobileViTForImageClassification(MobileViTPreTrainedModel):
@add_start_docstrings_to_model_forward(MOBILEVIT_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(MOBILEVIT_INPUTS_DOCSTRING)
@add_code_sample_docstrings( @add_code_sample_docstrings(
processor_class=_FEAT_EXTRACTOR_FOR_DOC,
checkpoint=_IMAGE_CLASS_CHECKPOINT, checkpoint=_IMAGE_CLASS_CHECKPOINT,
output_type=ImageClassifierOutputWithNoAttention, output_type=ImageClassifierOutputWithNoAttention,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
......
...@@ -43,7 +43,6 @@ logger = logging.get_logger(__name__) ...@@ -43,7 +43,6 @@ logger = logging.get_logger(__name__)
# General docstring # General docstring
_CONFIG_FOR_DOC = "MobileViTConfig" _CONFIG_FOR_DOC = "MobileViTConfig"
_FEAT_EXTRACTOR_FOR_DOC = "MobileViTImageProcessor"
# Base docstring # Base docstring
_CHECKPOINT_FOR_DOC = "apple/mobilevit-small" _CHECKPOINT_FOR_DOC = "apple/mobilevit-small"
...@@ -839,7 +838,6 @@ class TFMobileViTModel(TFMobileViTPreTrainedModel): ...@@ -839,7 +838,6 @@ class TFMobileViTModel(TFMobileViTPreTrainedModel):
@unpack_inputs @unpack_inputs
@add_start_docstrings_to_model_forward(MOBILEVIT_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(MOBILEVIT_INPUTS_DOCSTRING)
@add_code_sample_docstrings( @add_code_sample_docstrings(
processor_class=_FEAT_EXTRACTOR_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
output_type=TFBaseModelOutputWithPooling, output_type=TFBaseModelOutputWithPooling,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
...@@ -889,7 +887,6 @@ class TFMobileViTForImageClassification(TFMobileViTPreTrainedModel, TFSequenceCl ...@@ -889,7 +887,6 @@ class TFMobileViTForImageClassification(TFMobileViTPreTrainedModel, TFSequenceCl
@unpack_inputs @unpack_inputs
@add_start_docstrings_to_model_forward(MOBILEVIT_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(MOBILEVIT_INPUTS_DOCSTRING)
@add_code_sample_docstrings( @add_code_sample_docstrings(
processor_class=_FEAT_EXTRACTOR_FOR_DOC,
checkpoint=_IMAGE_CLASS_CHECKPOINT, checkpoint=_IMAGE_CLASS_CHECKPOINT,
output_type=TFImageClassifierOutputWithNoAttention, output_type=TFImageClassifierOutputWithNoAttention,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
......
...@@ -57,7 +57,6 @@ logger = logging.get_logger(__name__) ...@@ -57,7 +57,6 @@ logger = logging.get_logger(__name__)
# General docstring # General docstring
_CONFIG_FOR_DOC = "NatConfig" _CONFIG_FOR_DOC = "NatConfig"
_FEAT_EXTRACTOR_FOR_DOC = "AutoImageProcessor"
# Base docstring # Base docstring
_CHECKPOINT_FOR_DOC = "shi-labs/nat-mini-in1k-224" _CHECKPOINT_FOR_DOC = "shi-labs/nat-mini-in1k-224"
...@@ -708,7 +707,6 @@ class NatModel(NatPreTrainedModel): ...@@ -708,7 +707,6 @@ class NatModel(NatPreTrainedModel):
@add_start_docstrings_to_model_forward(NAT_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(NAT_INPUTS_DOCSTRING)
@add_code_sample_docstrings( @add_code_sample_docstrings(
processor_class=_FEAT_EXTRACTOR_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
output_type=NatModelOutput, output_type=NatModelOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
...@@ -788,7 +786,6 @@ class NatForImageClassification(NatPreTrainedModel): ...@@ -788,7 +786,6 @@ class NatForImageClassification(NatPreTrainedModel):
@add_start_docstrings_to_model_forward(NAT_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(NAT_INPUTS_DOCSTRING)
@add_code_sample_docstrings( @add_code_sample_docstrings(
processor_class=_FEAT_EXTRACTOR_FOR_DOC,
checkpoint=_IMAGE_CLASS_CHECKPOINT, checkpoint=_IMAGE_CLASS_CHECKPOINT,
output_type=NatImageClassifierOutput, output_type=NatImageClassifierOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
......
...@@ -204,7 +204,7 @@ class OwlViTObjectDetectionOutput(ModelOutput): ...@@ -204,7 +204,7 @@ class OwlViTObjectDetectionOutput(ModelOutput):
pred_boxes (`torch.FloatTensor` of shape `(batch_size, num_patches, 4)`): pred_boxes (`torch.FloatTensor` of shape `(batch_size, num_patches, 4)`):
Normalized boxes coordinates for all queries, represented as (center_x, center_y, width, height). These Normalized boxes coordinates for all queries, represented as (center_x, center_y, width, height). These
values are normalized in [0, 1], relative to the size of each individual image in the batch (disregarding values are normalized in [0, 1], relative to the size of each individual image in the batch (disregarding
possible padding). You can use [`~OwlViTFeatureExtractor.post_process_object_detection`] to retrieve the possible padding). You can use [`~OwlViTImageProcessor.post_process_object_detection`] to retrieve the
unnormalized bounding boxes. unnormalized bounding boxes.
text_embeds (`torch.FloatTensor` of shape `(batch_size, num_max_text_queries, output_dim`): text_embeds (`torch.FloatTensor` of shape `(batch_size, num_max_text_queries, output_dim`):
The text embeddings obtained by applying the projection layer to the pooled output of [`OwlViTTextModel`]. The text embeddings obtained by applying the projection layer to the pooled output of [`OwlViTTextModel`].
...@@ -248,12 +248,12 @@ class OwlViTImageGuidedObjectDetectionOutput(ModelOutput): ...@@ -248,12 +248,12 @@ class OwlViTImageGuidedObjectDetectionOutput(ModelOutput):
target_pred_boxes (`torch.FloatTensor` of shape `(batch_size, num_patches, 4)`): target_pred_boxes (`torch.FloatTensor` of shape `(batch_size, num_patches, 4)`):
Normalized boxes coordinates for all queries, represented as (center_x, center_y, width, height). These Normalized boxes coordinates for all queries, represented as (center_x, center_y, width, height). These
values are normalized in [0, 1], relative to the size of each individual target image in the batch values are normalized in [0, 1], relative to the size of each individual target image in the batch
(disregarding possible padding). You can use [`~OwlViTFeatureExtractor.post_process_object_detection`] to (disregarding possible padding). You can use [`~OwlViTImageProcessor.post_process_object_detection`] to
retrieve the unnormalized bounding boxes. retrieve the unnormalized bounding boxes.
query_pred_boxes (`torch.FloatTensor` of shape `(batch_size, num_patches, 4)`): query_pred_boxes (`torch.FloatTensor` of shape `(batch_size, num_patches, 4)`):
Normalized boxes coordinates for all queries, represented as (center_x, center_y, width, height). These Normalized boxes coordinates for all queries, represented as (center_x, center_y, width, height). These
values are normalized in [0, 1], relative to the size of each individual query image in the batch values are normalized in [0, 1], relative to the size of each individual query image in the batch
(disregarding possible padding). You can use [`~OwlViTFeatureExtractor.post_process_object_detection`] to (disregarding possible padding). You can use [`~OwlViTImageProcessor.post_process_object_detection`] to
retrieve the unnormalized bounding boxes. retrieve the unnormalized bounding boxes.
image_embeds (`torch.FloatTensor` of shape `(batch_size, patch_size, patch_size, output_dim`): image_embeds (`torch.FloatTensor` of shape `(batch_size, patch_size, patch_size, output_dim`):
Pooled output of [`OwlViTVisionModel`]. OWL-ViT represents images as a set of image patches and computes Pooled output of [`OwlViTVisionModel`]. OWL-ViT represents images as a set of image patches and computes
......
...@@ -34,7 +34,6 @@ logger = logging.get_logger(__name__) ...@@ -34,7 +34,6 @@ logger = logging.get_logger(__name__)
# General docstring # General docstring
_CONFIG_FOR_DOC = "PoolFormerConfig" _CONFIG_FOR_DOC = "PoolFormerConfig"
_FEAT_EXTRACTOR_FOR_DOC = "PoolFormerImageProcessor"
# Base docstring # Base docstring
_CHECKPOINT_FOR_DOC = "sail/poolformer_s12" _CHECKPOINT_FOR_DOC = "sail/poolformer_s12"
...@@ -326,7 +325,6 @@ class PoolFormerModel(PoolFormerPreTrainedModel): ...@@ -326,7 +325,6 @@ class PoolFormerModel(PoolFormerPreTrainedModel):
@add_start_docstrings_to_model_forward(POOLFORMER_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(POOLFORMER_INPUTS_DOCSTRING)
@add_code_sample_docstrings( @add_code_sample_docstrings(
processor_class=_FEAT_EXTRACTOR_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
output_type=BaseModelOutputWithNoAttention, output_type=BaseModelOutputWithNoAttention,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
...@@ -397,7 +395,6 @@ class PoolFormerForImageClassification(PoolFormerPreTrainedModel): ...@@ -397,7 +395,6 @@ class PoolFormerForImageClassification(PoolFormerPreTrainedModel):
@add_start_docstrings_to_model_forward(POOLFORMER_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(POOLFORMER_INPUTS_DOCSTRING)
@add_code_sample_docstrings( @add_code_sample_docstrings(
processor_class=_FEAT_EXTRACTOR_FOR_DOC,
checkpoint=_IMAGE_CLASS_CHECKPOINT, checkpoint=_IMAGE_CLASS_CHECKPOINT,
output_type=ImageClassifierOutputWithNoAttention, output_type=ImageClassifierOutputWithNoAttention,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
......
...@@ -37,7 +37,6 @@ logger = logging.get_logger(__name__) ...@@ -37,7 +37,6 @@ logger = logging.get_logger(__name__)
# General docstring # General docstring
_CONFIG_FOR_DOC = "RegNetConfig" _CONFIG_FOR_DOC = "RegNetConfig"
_FEAT_EXTRACTOR_FOR_DOC = "AutoImageProcessor"
# Base docstring # Base docstring
_CHECKPOINT_FOR_DOC = "facebook/regnet-y-040" _CHECKPOINT_FOR_DOC = "facebook/regnet-y-040"
...@@ -341,7 +340,6 @@ class RegNetModel(RegNetPreTrainedModel): ...@@ -341,7 +340,6 @@ class RegNetModel(RegNetPreTrainedModel):
@add_start_docstrings_to_model_forward(REGNET_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(REGNET_INPUTS_DOCSTRING)
@add_code_sample_docstrings( @add_code_sample_docstrings(
processor_class=_FEAT_EXTRACTOR_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
output_type=BaseModelOutputWithPoolingAndNoAttention, output_type=BaseModelOutputWithPoolingAndNoAttention,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
...@@ -399,7 +397,6 @@ class RegNetForImageClassification(RegNetPreTrainedModel): ...@@ -399,7 +397,6 @@ class RegNetForImageClassification(RegNetPreTrainedModel):
@add_start_docstrings_to_model_forward(REGNET_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(REGNET_INPUTS_DOCSTRING)
@add_code_sample_docstrings( @add_code_sample_docstrings(
processor_class=_FEAT_EXTRACTOR_FOR_DOC,
checkpoint=_IMAGE_CLASS_CHECKPOINT, checkpoint=_IMAGE_CLASS_CHECKPOINT,
output_type=ImageClassifierOutputWithNoAttention, output_type=ImageClassifierOutputWithNoAttention,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
......
...@@ -35,7 +35,6 @@ logger = logging.get_logger(__name__) ...@@ -35,7 +35,6 @@ logger = logging.get_logger(__name__)
# General docstring # General docstring
_CONFIG_FOR_DOC = "RegNetConfig" _CONFIG_FOR_DOC = "RegNetConfig"
_FEAT_EXTRACTOR_FOR_DOC = "AutoImageProcessor"
# Base docstring # Base docstring
_CHECKPOINT_FOR_DOC = "facebook/regnet-y-040" _CHECKPOINT_FOR_DOC = "facebook/regnet-y-040"
...@@ -411,7 +410,6 @@ class TFRegNetModel(TFRegNetPreTrainedModel): ...@@ -411,7 +410,6 @@ class TFRegNetModel(TFRegNetPreTrainedModel):
@unpack_inputs @unpack_inputs
@add_start_docstrings_to_model_forward(REGNET_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(REGNET_INPUTS_DOCSTRING)
@add_code_sample_docstrings( @add_code_sample_docstrings(
processor_class=_FEAT_EXTRACTOR_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
output_type=TFBaseModelOutputWithPoolingAndNoAttention, output_type=TFBaseModelOutputWithPoolingAndNoAttention,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
...@@ -477,7 +475,6 @@ class TFRegNetForImageClassification(TFRegNetPreTrainedModel, TFSequenceClassifi ...@@ -477,7 +475,6 @@ class TFRegNetForImageClassification(TFRegNetPreTrainedModel, TFSequenceClassifi
@unpack_inputs @unpack_inputs
@add_start_docstrings_to_model_forward(REGNET_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(REGNET_INPUTS_DOCSTRING)
@add_code_sample_docstrings( @add_code_sample_docstrings(
processor_class=_FEAT_EXTRACTOR_FOR_DOC,
checkpoint=_IMAGE_CLASS_CHECKPOINT, checkpoint=_IMAGE_CLASS_CHECKPOINT,
output_type=TFSequenceClassifierOutput, output_type=TFSequenceClassifierOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
......
...@@ -43,7 +43,6 @@ logger = logging.get_logger(__name__) ...@@ -43,7 +43,6 @@ logger = logging.get_logger(__name__)
# General docstring # General docstring
_CONFIG_FOR_DOC = "ResNetConfig" _CONFIG_FOR_DOC = "ResNetConfig"
_FEAT_EXTRACTOR_FOR_DOC = "AutoImageProcessor"
# Base docstring # Base docstring
_CHECKPOINT_FOR_DOC = "microsoft/resnet-50" _CHECKPOINT_FOR_DOC = "microsoft/resnet-50"
...@@ -312,7 +311,6 @@ class ResNetModel(ResNetPreTrainedModel): ...@@ -312,7 +311,6 @@ class ResNetModel(ResNetPreTrainedModel):
@add_start_docstrings_to_model_forward(RESNET_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(RESNET_INPUTS_DOCSTRING)
@add_code_sample_docstrings( @add_code_sample_docstrings(
processor_class=_FEAT_EXTRACTOR_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
output_type=BaseModelOutputWithPoolingAndNoAttention, output_type=BaseModelOutputWithPoolingAndNoAttention,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
...@@ -369,7 +367,6 @@ class ResNetForImageClassification(ResNetPreTrainedModel): ...@@ -369,7 +367,6 @@ class ResNetForImageClassification(ResNetPreTrainedModel):
@add_start_docstrings_to_model_forward(RESNET_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(RESNET_INPUTS_DOCSTRING)
@add_code_sample_docstrings( @add_code_sample_docstrings(
processor_class=_FEAT_EXTRACTOR_FOR_DOC,
checkpoint=_IMAGE_CLASS_CHECKPOINT, checkpoint=_IMAGE_CLASS_CHECKPOINT,
output_type=ImageClassifierOutputWithNoAttention, output_type=ImageClassifierOutputWithNoAttention,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
......
...@@ -34,7 +34,6 @@ logger = logging.get_logger(__name__) ...@@ -34,7 +34,6 @@ logger = logging.get_logger(__name__)
# General docstring # General docstring
_CONFIG_FOR_DOC = "ResNetConfig" _CONFIG_FOR_DOC = "ResNetConfig"
_FEAT_EXTRACTOR_FOR_DOC = "AutoImageProcessor"
# Base docstring # Base docstring
_CHECKPOINT_FOR_DOC = "microsoft/resnet-50" _CHECKPOINT_FOR_DOC = "microsoft/resnet-50"
...@@ -393,7 +392,6 @@ class TFResNetModel(TFResNetPreTrainedModel): ...@@ -393,7 +392,6 @@ class TFResNetModel(TFResNetPreTrainedModel):
@add_start_docstrings_to_model_forward(RESNET_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(RESNET_INPUTS_DOCSTRING)
@add_code_sample_docstrings( @add_code_sample_docstrings(
processor_class=_FEAT_EXTRACTOR_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
output_type=TFBaseModelOutputWithPoolingAndNoAttention, output_type=TFBaseModelOutputWithPoolingAndNoAttention,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
...@@ -458,7 +456,6 @@ class TFResNetForImageClassification(TFResNetPreTrainedModel, TFSequenceClassifi ...@@ -458,7 +456,6 @@ class TFResNetForImageClassification(TFResNetPreTrainedModel, TFSequenceClassifi
@add_start_docstrings_to_model_forward(RESNET_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(RESNET_INPUTS_DOCSTRING)
@add_code_sample_docstrings( @add_code_sample_docstrings(
processor_class=_FEAT_EXTRACTOR_FOR_DOC,
checkpoint=_IMAGE_CLASS_CHECKPOINT, checkpoint=_IMAGE_CLASS_CHECKPOINT,
output_type=TFImageClassifierOutputWithNoAttention, output_type=TFImageClassifierOutputWithNoAttention,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
......
...@@ -42,7 +42,6 @@ logger = logging.get_logger(__name__) ...@@ -42,7 +42,6 @@ logger = logging.get_logger(__name__)
# General docstring # General docstring
_CONFIG_FOR_DOC = "SegformerConfig" _CONFIG_FOR_DOC = "SegformerConfig"
_FEAT_EXTRACTOR_FOR_DOC = "SegformerImageProcessor"
# Base docstring # Base docstring
_CHECKPOINT_FOR_DOC = "nvidia/mit-b0" _CHECKPOINT_FOR_DOC = "nvidia/mit-b0"
...@@ -529,7 +528,6 @@ class SegformerModel(SegformerPreTrainedModel): ...@@ -529,7 +528,6 @@ class SegformerModel(SegformerPreTrainedModel):
@add_start_docstrings_to_model_forward(SEGFORMER_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_start_docstrings_to_model_forward(SEGFORMER_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
processor_class=_FEAT_EXTRACTOR_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
output_type=BaseModelOutput, output_type=BaseModelOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
...@@ -589,7 +587,6 @@ class SegformerForImageClassification(SegformerPreTrainedModel): ...@@ -589,7 +587,6 @@ class SegformerForImageClassification(SegformerPreTrainedModel):
@add_start_docstrings_to_model_forward(SEGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(SEGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
processor_class=_FEAT_EXTRACTOR_FOR_DOC,
checkpoint=_IMAGE_CLASS_CHECKPOINT, checkpoint=_IMAGE_CLASS_CHECKPOINT,
output_type=SegFormerImageClassifierOutput, output_type=SegFormerImageClassifierOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
......
...@@ -37,7 +37,6 @@ logger = logging.get_logger(__name__) ...@@ -37,7 +37,6 @@ logger = logging.get_logger(__name__)
# General docstring # General docstring
_CONFIG_FOR_DOC = "SegformerConfig" _CONFIG_FOR_DOC = "SegformerConfig"
_FEAT_EXTRACTOR_FOR_DOC = "SegformerImageProcessor"
# Base docstring # Base docstring
_CHECKPOINT_FOR_DOC = "nvidia/mit-b0" _CHECKPOINT_FOR_DOC = "nvidia/mit-b0"
...@@ -606,7 +605,6 @@ class TFSegformerModel(TFSegformerPreTrainedModel): ...@@ -606,7 +605,6 @@ class TFSegformerModel(TFSegformerPreTrainedModel):
@unpack_inputs @unpack_inputs
@add_start_docstrings_to_model_forward(SEGFORMER_INPUTS_DOCSTRING.format("(batch_size, sequence_length)")) @add_start_docstrings_to_model_forward(SEGFORMER_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
processor_class=_FEAT_EXTRACTOR_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
output_type=TFBaseModelOutput, output_type=TFBaseModelOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
...@@ -659,7 +657,6 @@ class TFSegformerForImageClassification(TFSegformerPreTrainedModel, TFSequenceCl ...@@ -659,7 +657,6 @@ class TFSegformerForImageClassification(TFSegformerPreTrainedModel, TFSequenceCl
@unpack_inputs @unpack_inputs
@add_start_docstrings_to_model_forward(SEGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length")) @add_start_docstrings_to_model_forward(SEGFORMER_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@add_code_sample_docstrings( @add_code_sample_docstrings(
processor_class=_FEAT_EXTRACTOR_FOR_DOC,
checkpoint=_IMAGE_CLASS_CHECKPOINT, checkpoint=_IMAGE_CLASS_CHECKPOINT,
output_type=TFSequenceClassifierOutput, output_type=TFSequenceClassifierOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
......
...@@ -44,7 +44,6 @@ logger = logging.get_logger(__name__) ...@@ -44,7 +44,6 @@ logger = logging.get_logger(__name__)
# General docstring # General docstring
_CONFIG_FOR_DOC = "SwinConfig" _CONFIG_FOR_DOC = "SwinConfig"
_FEAT_EXTRACTOR_FOR_DOC = "AutoImageProcessor"
# Base docstring # Base docstring
_CHECKPOINT_FOR_DOC = "microsoft/swin-tiny-patch4-window7-224" _CHECKPOINT_FOR_DOC = "microsoft/swin-tiny-patch4-window7-224"
...@@ -963,7 +962,6 @@ class SwinModel(SwinPreTrainedModel): ...@@ -963,7 +962,6 @@ class SwinModel(SwinPreTrainedModel):
@add_start_docstrings_to_model_forward(SWIN_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(SWIN_INPUTS_DOCSTRING)
@add_code_sample_docstrings( @add_code_sample_docstrings(
processor_class=_FEAT_EXTRACTOR_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
output_type=SwinModelOutput, output_type=SwinModelOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
...@@ -1168,7 +1166,6 @@ class SwinForImageClassification(SwinPreTrainedModel): ...@@ -1168,7 +1166,6 @@ class SwinForImageClassification(SwinPreTrainedModel):
@add_start_docstrings_to_model_forward(SWIN_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(SWIN_INPUTS_DOCSTRING)
@add_code_sample_docstrings( @add_code_sample_docstrings(
processor_class=_FEAT_EXTRACTOR_FOR_DOC,
checkpoint=_IMAGE_CLASS_CHECKPOINT, checkpoint=_IMAGE_CLASS_CHECKPOINT,
output_type=SwinImageClassifierOutput, output_type=SwinImageClassifierOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
......
...@@ -47,7 +47,6 @@ logger = logging.get_logger(__name__) ...@@ -47,7 +47,6 @@ logger = logging.get_logger(__name__)
# General docstring # General docstring
_CONFIG_FOR_DOC = "SwinConfig" _CONFIG_FOR_DOC = "SwinConfig"
_FEAT_EXTRACTOR_FOR_DOC = "AutoImageProcessor"
# Base docstring # Base docstring
_CHECKPOINT_FOR_DOC = "microsoft/swin-tiny-patch4-window7-224" _CHECKPOINT_FOR_DOC = "microsoft/swin-tiny-patch4-window7-224"
...@@ -1192,7 +1191,6 @@ class TFSwinModel(TFSwinPreTrainedModel): ...@@ -1192,7 +1191,6 @@ class TFSwinModel(TFSwinPreTrainedModel):
@add_start_docstrings_to_model_forward(SWIN_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(SWIN_INPUTS_DOCSTRING)
@add_code_sample_docstrings( @add_code_sample_docstrings(
processor_class=_FEAT_EXTRACTOR_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
output_type=TFSwinModelOutput, output_type=TFSwinModelOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
...@@ -1429,7 +1427,6 @@ class TFSwinForImageClassification(TFSwinPreTrainedModel, TFSequenceClassificati ...@@ -1429,7 +1427,6 @@ class TFSwinForImageClassification(TFSwinPreTrainedModel, TFSequenceClassificati
@add_start_docstrings_to_model_forward(SWIN_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(SWIN_INPUTS_DOCSTRING)
@add_code_sample_docstrings( @add_code_sample_docstrings(
processor_class=_FEAT_EXTRACTOR_FOR_DOC,
checkpoint=_IMAGE_CLASS_CHECKPOINT, checkpoint=_IMAGE_CLASS_CHECKPOINT,
output_type=TFSwinImageClassifierOutput, output_type=TFSwinImageClassifierOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
......
...@@ -43,7 +43,6 @@ logger = logging.get_logger(__name__) ...@@ -43,7 +43,6 @@ logger = logging.get_logger(__name__)
# General docstring # General docstring
_CONFIG_FOR_DOC = "Swin2SRConfig" _CONFIG_FOR_DOC = "Swin2SRConfig"
_FEAT_EXTRACTOR_FOR_DOC = "AutoImageProcessor"
# Base docstring # Base docstring
_CHECKPOINT_FOR_DOC = "caidas/swin2SR-classical-sr-x2-64" _CHECKPOINT_FOR_DOC = "caidas/swin2SR-classical-sr-x2-64"
...@@ -823,8 +822,8 @@ SWIN2SR_START_DOCSTRING = r""" ...@@ -823,8 +822,8 @@ SWIN2SR_START_DOCSTRING = r"""
SWIN2SR_INPUTS_DOCSTRING = r""" SWIN2SR_INPUTS_DOCSTRING = r"""
Args: Args:
pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`): pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
Pixel values. Pixel values can be obtained using [`AutoFeatureExtractor`]. See Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See
[`AutoFeatureExtractor.__call__`] for details. [`AutoImageProcessor.__call__`] for details.
head_mask (`torch.FloatTensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*): head_mask (`torch.FloatTensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`: Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:
...@@ -897,7 +896,6 @@ class Swin2SRModel(Swin2SRPreTrainedModel): ...@@ -897,7 +896,6 @@ class Swin2SRModel(Swin2SRPreTrainedModel):
@add_start_docstrings_to_model_forward(SWIN2SR_INPUTS_DOCSTRING) @add_start_docstrings_to_model_forward(SWIN2SR_INPUTS_DOCSTRING)
@add_code_sample_docstrings( @add_code_sample_docstrings(
processor_class=_FEAT_EXTRACTOR_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC, checkpoint=_CHECKPOINT_FOR_DOC,
output_type=BaseModelOutput, output_type=BaseModelOutput,
config_class=_CONFIG_FOR_DOC, config_class=_CONFIG_FOR_DOC,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment