Update doc examples feature extractor -> image processor (#20501)

* Update doc example feature extractor -> image processor * Apply suggestions from code review

Update doc examples feature extractor -> image processor (#20501)
* Update doc example feature extractor -> image processor * Apply suggestions from code review
17a7b49b · amyeroberts · GitHub · afad0c18 · 17a7b49b · 17a7b49b
Unverified Commit 17a7b49b authored Nov 30, 2022 by amyeroberts Committed by GitHub Nov 30, 2022
4 changed files
--- a/src/transformers/models/vit_mae/modeling_vit_mae.py
+++ b/src/transformers/models/vit_mae/modeling_vit_mae.py
@@ -612,8 +612,8 @@ VIT_MAE_START_DOCSTRING = r"""
 VIT_MAE_INPUTS_DOCSTRING = r"""
    Args:
        pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
-            Pixel values. Pixel values can be obtained using [`AutoFeatureExtractor`]. See
+            Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See
-            [`AutoFeatureExtractor.__call__`] for details.
+            [`AutoImageProcessor.__call__`] for details.
        head_mask (`torch.FloatTensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
            Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:
@@ -677,17 +677,17 @@ class ViTMAEModel(ViTMAEPreTrainedModel):
        Examples:
        ```python
-        >>> from transformers import AutoFeatureExtractor, ViTMAEModel
+        >>> from transformers import AutoImageProcessor, ViTMAEModel
        >>> from PIL import Image
        >>> import requests
        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
        >>> image = Image.open(requests.get(url, stream=True).raw)
-        >>> feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/vit-mae-base")
+        >>> image_processor = AutoImageProcessor.from_pretrained("facebook/vit-mae-base")
        >>> model = ViTMAEModel.from_pretrained("facebook/vit-mae-base")
-        >>> inputs = feature_extractor(images=image, return_tensors="pt")
+        >>> inputs = image_processor(images=image, return_tensors="pt")
        >>> outputs = model(**inputs)
        >>> last_hidden_states = outputs.last_hidden_state
        ```"""
@@ -978,17 +978,17 @@ class ViTMAEForPreTraining(ViTMAEPreTrainedModel):
        Examples:
        ```python
-        >>> from transformers import AutoFeatureExtractor, ViTMAEForPreTraining
+        >>> from transformers import AutoImageProcessor, ViTMAEForPreTraining
        >>> from PIL import Image
        >>> import requests
        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
        >>> image = Image.open(requests.get(url, stream=True).raw)
-        >>> feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/vit-mae-base")
+        >>> image_processor = AutoImageProcessor.from_pretrained("facebook/vit-mae-base")
        >>> model = ViTMAEForPreTraining.from_pretrained("facebook/vit-mae-base")
-        >>> inputs = feature_extractor(images=image, return_tensors="pt")
+        >>> inputs = image_processor(images=image, return_tensors="pt")
        >>> outputs = model(**inputs)
        >>> loss = outputs.loss
        >>> mask = outputs.mask

--- a/src/transformers/models/vit_msn/modeling_vit_msn.py
+++ b/src/transformers/models/vit_msn/modeling_vit_msn.py
@@ -464,8 +464,8 @@ VIT_MSN_START_DOCSTRING = r"""
 VIT_MSN_INPUTS_DOCSTRING = r"""
    Args:
        pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
-            Pixel values. Pixel values can be obtained using [`AutoFeatureExtractor`]. See
+            Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See
-            [`AutoFeatureExtractor.__call__`] for details.
+            [`AutoImageProcessor.__call__`] for details.
        head_mask (`torch.FloatTensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
            Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:
@@ -532,7 +532,7 @@ class ViTMSNModel(ViTMSNPreTrainedModel):
        Examples:
        ```python
-        >>> from transformers import AutoFeatureExtractor, ViTMSNModel
+        >>> from transformers import AutoImageProcessor, ViTMSNModel
        >>> import torch
        >>> from PIL import Image
        >>> import requests
@@ -540,9 +540,9 @@ class ViTMSNModel(ViTMSNPreTrainedModel):
        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
        >>> image = Image.open(requests.get(url, stream=True).raw)
-        >>> feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/vit-msn-small")
+        >>> image_processor = AutoImageProcessor.from_pretrained("facebook/vit-msn-small")
        >>> model = ViTMSNModel.from_pretrained("facebook/vit-msn-small")
-        >>> inputs = feature_extractor(images=image, return_tensors="pt")
+        >>> inputs = image_processor(images=image, return_tensors="pt")
        >>> with torch.no_grad():
        ...     outputs = model(**inputs)
        >>> last_hidden_states = outputs.last_hidden_state
@@ -627,7 +627,7 @@ class ViTMSNForImageClassification(ViTMSNPreTrainedModel):
        Examples:
        ```python
-        >>> from transformers import AutoFeatureExtractor, ViTMSNForImageClassification
+        >>> from transformers import AutoImageProcessor, ViTMSNForImageClassification
        >>> import torch
        >>> from PIL import Image
        >>> import requests
@@ -637,10 +637,10 @@ class ViTMSNForImageClassification(ViTMSNPreTrainedModel):
        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
        >>> image = Image.open(requests.get(url, stream=True).raw)
-        >>> feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/vit-msn-small")
+        >>> image_processor = AutoImageProcessor.from_pretrained("facebook/vit-msn-small")
        >>> model = ViTMSNForImageClassification.from_pretrained("facebook/vit-msn-small")
-        >>> inputs = feature_extractor(images=image, return_tensors="pt")
+        >>> inputs = image_processor(images=image, return_tensors="pt")
        >>> with torch.no_grad():
        ...     logits = model(**inputs).logits
        >>> # model predicts one of the 1000 ImageNet classes

--- a/src/transformers/models/yolos/modeling_yolos.py
+++ b/src/transformers/models/yolos/modeling_yolos.py
@@ -53,7 +53,7 @@ logger = logging.get_logger(__name__)
 # General docstring
 _CONFIG_FOR_DOC = "YolosConfig"
-_FEAT_EXTRACTOR_FOR_DOC = "YolosFeatureExtractor"
+_FEAT_EXTRACTOR_FOR_DOC = "YolosImageProcessor"
 # Base docstring
 _CHECKPOINT_FOR_DOC = "hustvl/yolos-small"
@@ -83,7 +83,7 @@ class YolosObjectDetectionOutput(ModelOutput):
        pred_boxes (`torch.FloatTensor` of shape `(batch_size, num_queries, 4)`):
            Normalized boxes coordinates for all queries, represented as (center_x, center_y, width, height). These
            values are normalized in [0, 1], relative to the size of each individual image in the batch (disregarding
-            possible padding). You can use [`~DetrFeatureExtractor.post_process`] to retrieve the unnormalized bounding
+            possible padding). You can use [`~DetrImageProcessor.post_process`] to retrieve the unnormalized bounding
            boxes.
        auxiliary_outputs (`list[Dict]`, *optional*):
            Optional, only returned when auxilary losses are activated (i.e. `config.auxiliary_loss` is set to `True`)
@@ -573,8 +573,8 @@ YOLOS_START_DOCSTRING = r"""
 YOLOS_INPUTS_DOCSTRING = r"""
    Args:
        pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
-            Pixel values. Pixel values can be obtained using [`AutoFeatureExtractor`]. See
+            Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See
-            [`AutoFeatureExtractor.__call__`] for details.
+            [`AutoImageProcessor.__call__`] for details.
        head_mask (`torch.FloatTensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
            Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:
@@ -756,7 +756,7 @@ class YolosForObjectDetection(YolosPreTrainedModel):
        Examples:
        ```python
-        >>> from transformers import AutoFeatureExtractor, AutoModelForObjectDetection
+        >>> from transformers import AutoImageProcessor, AutoModelForObjectDetection
        >>> import torch
        >>> from PIL import Image
        >>> import requests
@@ -764,17 +764,17 @@ class YolosForObjectDetection(YolosPreTrainedModel):
        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
        >>> image = Image.open(requests.get(url, stream=True).raw)
-        >>> feature_extractor = AutoFeatureExtractor.from_pretrained("hustvl/yolos-tiny")
+        >>> image_processor = AutoImageProcessor.from_pretrained("hustvl/yolos-tiny")
        >>> model = AutoModelForObjectDetection.from_pretrained("hustvl/yolos-tiny")
-        >>> inputs = feature_extractor(images=image, return_tensors="pt")
+        >>> inputs = image_processor(images=image, return_tensors="pt")
        >>> outputs = model(**inputs)
        >>> # convert outputs (bounding boxes and class logits) to COCO API
        >>> target_sizes = torch.tensor([image.size[::-1]])
-        >>> results = feature_extractor.post_process_object_detection(
+        >>> results = image_processor.post_process_object_detection(outputs, threshold=0.9, target_sizes=target_sizes)[
-        ...     outputs, threshold=0.9, target_sizes=target_sizes
+        ...     0
-        ... )[0]
+        ... ]
        >>> for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
        ...     box = [round(i, 2) for i in box.tolist()]

--- a/templates/adding_a_new_model/README.md
+++ b/templates/adding_a_new_model/README.md
@@ -186,14 +186,14 @@ wish, as it will appear on the Model Hub. Do not forget to include the organisat
 Then you will have to say whether your model re-uses the same processing classes as the model you're cloning:
 ```
-Will your new model use the same processing class as Xxx (XxxTokenizer/XxxFeatureExtractor)
+Will your new model use the same processing class as Xxx (XxxTokenizer/XxxFeatureExtractor/XxxImageProcessor)
 ```
 Answer yes if you have no intentions to make any change to the class used for preprocessing. It can use different
 files (for instance you can reuse the `BertTokenizer` with a new vocab file).
 If you answer no, you will have to give the name of the classes
-for the new tokenizer/feature extractor/processor (depending on the model you're cloning).
+for the new tokenizer/image processor/feature extractor/processor (depending on the model you're cloning).
 Next the questionnaire will ask