Unverified Commit 91c2278b authored by amyeroberts's avatar amyeroberts Committed by GitHub
Browse files

Update modeling doc strings FE -> IP (#21106)

* Update docs examples FE -> IP

* Remove _IMAGE_PROCESSOR_FOR_DOC
parent 5d3cb760
......@@ -43,7 +43,6 @@ logger = logging.get_logger(__name__)
# General docstring
_CONFIG_FOR_DOC = "Swinv2Config"
_FEAT_EXTRACTOR_FOR_DOC = "AutoImageProcessor"
# Base docstring
_CHECKPOINT_FOR_DOC = "microsoft/swinv2-tiny-patch4-window8-256"
......@@ -1043,7 +1042,6 @@ class Swinv2Model(Swinv2PreTrainedModel):
@add_start_docstrings_to_model_forward(SWINV2_INPUTS_DOCSTRING)
@add_code_sample_docstrings(
processor_class=_FEAT_EXTRACTOR_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC,
output_type=Swinv2ModelOutput,
config_class=_CONFIG_FOR_DOC,
......@@ -1251,7 +1249,6 @@ class Swinv2ForImageClassification(Swinv2PreTrainedModel):
@add_start_docstrings_to_model_forward(SWINV2_INPUTS_DOCSTRING)
@add_code_sample_docstrings(
processor_class=_FEAT_EXTRACTOR_FOR_DOC,
checkpoint=_IMAGE_CLASS_CHECKPOINT,
output_type=Swinv2ImageClassifierOutput,
config_class=_CONFIG_FOR_DOC,
......
......@@ -38,7 +38,6 @@ logger = logging.get_logger(__name__)
# General docstring
_CONFIG_FOR_DOC = "VanConfig"
_FEAT_EXTRACTOR_FOR_DOC = "AutoImageProcessor"
# Base docstring
_CHECKPOINT_FOR_DOC = "Visual-Attention-Network/van-base"
......@@ -435,7 +434,6 @@ class VanModel(VanPreTrainedModel):
@add_start_docstrings_to_model_forward(VAN_INPUTS_DOCSTRING)
@add_code_sample_docstrings(
processor_class=_FEAT_EXTRACTOR_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC,
output_type=BaseModelOutputWithPoolingAndNoAttention,
config_class=_CONFIG_FOR_DOC,
......@@ -493,7 +491,6 @@ class VanForImageClassification(VanPreTrainedModel):
@add_start_docstrings_to_model_forward(VAN_INPUTS_DOCSTRING)
@add_code_sample_docstrings(
processor_class=_FEAT_EXTRACTOR_FOR_DOC,
checkpoint=_IMAGE_CLASS_CHECKPOINT,
output_type=ImageClassifierOutputWithNoAttention,
config_class=_CONFIG_FOR_DOC,
......
......@@ -635,8 +635,8 @@ VILT_INPUTS_DOCSTRING = r"""
[What are token type IDs?](../glossary#token-type-ids)
pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
Pixel values. Pixel values can be obtained using [`ViltFeatureExtractor`]. See
[`ViltFeatureExtractor.__call__`] for details.
Pixel values. Pixel values can be obtained using [`ViltImageProcessor`]. See
[`ViltImageProcessor.__call__`] for details.
pixel_mask (`torch.LongTensor` of shape `(batch_size, height, width)`, *optional*):
Mask to avoid performing attention on padding pixel values. Mask values selected in `[0, 1]`:
......@@ -690,8 +690,8 @@ VILT_IMAGES_AND_TEXT_CLASSIFICATION_INPUTS_DOCSTRING = r"""
[What are token type IDs?](../glossary#token-type-ids)
pixel_values (`torch.FloatTensor` of shape `(batch_size, num_images, num_channels, height, width)`):
Pixel values. Pixel values can be obtained using [`ViltFeatureExtractor`]. See
[`ViltFeatureExtractor.__call__`] for details.
Pixel values. Pixel values can be obtained using [`ViltImageProcessor`]. See
[`ViltImageProcessor.__call__`] for details.
pixel_mask (`torch.LongTensor` of shape `(batch_size, num_images, height, width)`, *optional*):
Mask to avoid performing attention on padding pixel values. Mask values selected in `[0, 1]`:
......
......@@ -556,13 +556,13 @@ VISION_TEXT_DUAL_ENCODER_MODEL_DOCSTRING = r"""
>>> from transformers import (
... FlaxVisionTextDualEncoderModel,
... VisionTextDualEncoderProcessor,
... ViTFeatureExtractor,
... ViTImageProcessor,
... BertTokenizer,
... )
>>> tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
>>> feature_extractor = ViTFeatureExtractor.from_pretrained("google/vit-base-patch16-224")
>>> processor = VisionTextDualEncoderProcessor(feature_extractor, tokenizer)
>>> image_processor = ViTImageProcesor.from_pretrained("google/vit-base-patch16-224")
>>> processor = VisionTextDualEncoderProcessor(image_processor, tokenizer)
>>> model = FlaxVisionTextDualEncoderModel.from_vision_text_pretrained(
... "google/vit-base-patch16-224", "bert-base-uncased"
... )
......
......@@ -41,7 +41,6 @@ logger = logging.get_logger(__name__)
# General docstring
_CONFIG_FOR_DOC = "ViTConfig"
_FEAT_EXTRACTOR_FOR_DOC = "ViTImageProcessor"
# Base docstring
_CHECKPOINT_FOR_DOC = "google/vit-base-patch16-224-in21k"
......@@ -670,7 +669,6 @@ class TFViTModel(TFViTPreTrainedModel):
@unpack_inputs
@add_start_docstrings_to_model_forward(VIT_INPUTS_DOCSTRING)
@add_code_sample_docstrings(
processor_class=_FEAT_EXTRACTOR_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC,
output_type=TFBaseModelOutputWithPooling,
config_class=_CONFIG_FOR_DOC,
......@@ -764,7 +762,6 @@ class TFViTForImageClassification(TFViTPreTrainedModel, TFSequenceClassification
@unpack_inputs
@add_start_docstrings_to_model_forward(VIT_INPUTS_DOCSTRING)
@add_code_sample_docstrings(
processor_class=_FEAT_EXTRACTOR_FOR_DOC,
checkpoint=_IMAGE_CLASS_CHECKPOINT,
output_type=TFSequenceClassifierOutput,
config_class=_CONFIG_FOR_DOC,
......
......@@ -42,7 +42,6 @@ logger = logging.get_logger(__name__)
# General docstring
_CONFIG_FOR_DOC = "ViTConfig"
_FEAT_EXTRACTOR_FOR_DOC = "ViTImageProcessor"
# Base docstring
_CHECKPOINT_FOR_DOC = "google/vit-base-patch16-224-in21k"
......@@ -536,7 +535,6 @@ class ViTModel(ViTPreTrainedModel):
@add_start_docstrings_to_model_forward(VIT_INPUTS_DOCSTRING)
@add_code_sample_docstrings(
processor_class=_FEAT_EXTRACTOR_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC,
output_type=BaseModelOutputWithPooling,
config_class=_CONFIG_FOR_DOC,
......@@ -765,7 +763,6 @@ class ViTForImageClassification(ViTPreTrainedModel):
@add_start_docstrings_to_model_forward(VIT_INPUTS_DOCSTRING)
@add_code_sample_docstrings(
processor_class=_FEAT_EXTRACTOR_FOR_DOC,
checkpoint=_IMAGE_CLASS_CHECKPOINT,
output_type=ImageClassifierOutput,
config_class=_CONFIG_FOR_DOC,
......
......@@ -37,7 +37,6 @@ logger = logging.get_logger(__name__)
# General docstring
_CONFIG_FOR_DOC = "ViTHybridConfig"
_FEAT_EXTRACTOR_FOR_DOC = "AutoFeatureExtractor"
# Base docstring
_CHECKPOINT_FOR_DOC = "google/vit-hybrid-base-bit-384"
......@@ -508,8 +507,8 @@ VIT_START_DOCSTRING = r"""
VIT_INPUTS_DOCSTRING = r"""
Args:
pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
Pixel values. Pixel values can be obtained using [`AutoFeatureExtractor`]. See
[`AutoFeatureExtractor.__call__`] for details.
Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See
[`AutoImageProcessor.__call__`] for details.
head_mask (`torch.FloatTensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:
......@@ -560,7 +559,6 @@ class ViTHybridModel(ViTHybridPreTrainedModel):
@add_start_docstrings_to_model_forward(VIT_INPUTS_DOCSTRING)
@add_code_sample_docstrings(
processor_class=_FEAT_EXTRACTOR_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC,
output_type=BaseModelOutputWithPooling,
config_class=_CONFIG_FOR_DOC,
......@@ -664,7 +662,6 @@ class ViTHybridForImageClassification(ViTHybridPreTrainedModel):
@add_start_docstrings_to_model_forward(VIT_INPUTS_DOCSTRING)
@add_code_sample_docstrings(
processor_class=_FEAT_EXTRACTOR_FOR_DOC,
checkpoint=_IMAGE_CLASS_CHECKPOINT,
output_type=ImageClassifierOutput,
config_class=_CONFIG_FOR_DOC,
......
......@@ -584,7 +584,7 @@ X_CLIP_VISION_INPUTS_DOCSTRING = r"""
Args:
pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
Pixel values. Padding will be ignored by default should you provide it. Pixel values can be obtained using
[`CLIPFeatureExtractor`]. See [`CLIPFeatureExtractor.__call__`] for details.
[`CLIPImageProcessor`]. See [`CLIPImageProcessor.__call__`] for details.
output_attentions (`bool`, *optional*):
Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
tensors for more detail.
......@@ -619,7 +619,7 @@ X_CLIP_INPUTS_DOCSTRING = r"""
[What are position IDs?](../glossary#position-ids)
pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
Pixel values. Padding will be ignored by default should you provide it. Pixel values can be obtained using
[`CLIPFeatureExtractor`]. See [`CLIPFeatureExtractor.__call__`] for details.
[`CLIPImageProcessor`]. See [`CLIPImageProcessor.__call__`] for details.
return_loss (`bool`, *optional*):
Whether or not to return the contrastive loss.
output_attentions (`bool`, *optional*):
......
......@@ -53,7 +53,6 @@ logger = logging.get_logger(__name__)
# General docstring
_CONFIG_FOR_DOC = "YolosConfig"
_FEAT_EXTRACTOR_FOR_DOC = "YolosImageProcessor"
# Base docstring
_CHECKPOINT_FOR_DOC = "hustvl/yolos-small"
......@@ -627,7 +626,6 @@ class YolosModel(YolosPreTrainedModel):
@add_start_docstrings_to_model_forward(YOLOS_INPUTS_DOCSTRING)
@add_code_sample_docstrings(
processor_class=_FEAT_EXTRACTOR_FOR_DOC,
checkpoint=_CHECKPOINT_FOR_DOC,
output_type=BaseModelOutputWithPooling,
config_class=_CONFIG_FOR_DOC,
......
......@@ -861,10 +861,10 @@ TF_VISION_BASE_MODEL_SAMPLE = r"""
>>> dataset = load_dataset("huggingface/cats-image")
>>> image = dataset["test"]["image"][0]
>>> feature_extractor = {processor_class}.from_pretrained("{checkpoint}")
>>> image_processor = {processor_class}.from_pretrained("{checkpoint}")
>>> model = {model_class}.from_pretrained("{checkpoint}")
>>> inputs = feature_extractor(image, return_tensors="tf")
>>> inputs = image_processor(image, return_tensors="tf")
>>> outputs = model(**inputs)
>>> last_hidden_states = outputs.last_hidden_state
......@@ -884,10 +884,10 @@ TF_VISION_SEQ_CLASS_SAMPLE = r"""
>>> dataset = load_dataset("huggingface/cats-image")
>>> image = dataset["test"]["image"][0]
>>> feature_extractor = {processor_class}.from_pretrained("{checkpoint}")
>>> image_processor = {processor_class}.from_pretrained("{checkpoint}")
>>> model = {model_class}.from_pretrained("{checkpoint}")
>>> inputs = feature_extractor(image, return_tensors="tf")
>>> inputs = image_processor(image, return_tensors="tf")
>>> logits = model(**inputs).logits
>>> # model predicts one of the 1000 ImageNet classes
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment