Unverified Commit 17a7b49b authored by amyeroberts's avatar amyeroberts Committed by GitHub
Browse files

Update doc examples feature extractor -> image processor (#20501)

* Update doc example feature extractor -> image processor

* Apply suggestions from code review
parent afad0c18
......@@ -612,8 +612,8 @@ VIT_MAE_START_DOCSTRING = r"""
VIT_MAE_INPUTS_DOCSTRING = r"""
Args:
pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
Pixel values. Pixel values can be obtained using [`AutoFeatureExtractor`]. See
[`AutoFeatureExtractor.__call__`] for details.
Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See
[`AutoImageProcessor.__call__`] for details.
head_mask (`torch.FloatTensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:
......@@ -677,17 +677,17 @@ class ViTMAEModel(ViTMAEPreTrainedModel):
Examples:
```python
>>> from transformers import AutoFeatureExtractor, ViTMAEModel
>>> from transformers import AutoImageProcessor, ViTMAEModel
>>> from PIL import Image
>>> import requests
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/vit-mae-base")
>>> image_processor = AutoImageProcessor.from_pretrained("facebook/vit-mae-base")
>>> model = ViTMAEModel.from_pretrained("facebook/vit-mae-base")
>>> inputs = feature_extractor(images=image, return_tensors="pt")
>>> inputs = image_processor(images=image, return_tensors="pt")
>>> outputs = model(**inputs)
>>> last_hidden_states = outputs.last_hidden_state
```"""
......@@ -978,17 +978,17 @@ class ViTMAEForPreTraining(ViTMAEPreTrainedModel):
Examples:
```python
>>> from transformers import AutoFeatureExtractor, ViTMAEForPreTraining
>>> from transformers import AutoImageProcessor, ViTMAEForPreTraining
>>> from PIL import Image
>>> import requests
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/vit-mae-base")
>>> image_processor = AutoImageProcessor.from_pretrained("facebook/vit-mae-base")
>>> model = ViTMAEForPreTraining.from_pretrained("facebook/vit-mae-base")
>>> inputs = feature_extractor(images=image, return_tensors="pt")
>>> inputs = image_processor(images=image, return_tensors="pt")
>>> outputs = model(**inputs)
>>> loss = outputs.loss
>>> mask = outputs.mask
......
......@@ -464,8 +464,8 @@ VIT_MSN_START_DOCSTRING = r"""
VIT_MSN_INPUTS_DOCSTRING = r"""
Args:
pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
Pixel values. Pixel values can be obtained using [`AutoFeatureExtractor`]. See
[`AutoFeatureExtractor.__call__`] for details.
Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See
[`AutoImageProcessor.__call__`] for details.
head_mask (`torch.FloatTensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:
......@@ -532,7 +532,7 @@ class ViTMSNModel(ViTMSNPreTrainedModel):
Examples:
```python
>>> from transformers import AutoFeatureExtractor, ViTMSNModel
>>> from transformers import AutoImageProcessor, ViTMSNModel
>>> import torch
>>> from PIL import Image
>>> import requests
......@@ -540,9 +540,9 @@ class ViTMSNModel(ViTMSNPreTrainedModel):
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/vit-msn-small")
>>> image_processor = AutoImageProcessor.from_pretrained("facebook/vit-msn-small")
>>> model = ViTMSNModel.from_pretrained("facebook/vit-msn-small")
>>> inputs = feature_extractor(images=image, return_tensors="pt")
>>> inputs = image_processor(images=image, return_tensors="pt")
>>> with torch.no_grad():
... outputs = model(**inputs)
>>> last_hidden_states = outputs.last_hidden_state
......@@ -627,7 +627,7 @@ class ViTMSNForImageClassification(ViTMSNPreTrainedModel):
Examples:
```python
>>> from transformers import AutoFeatureExtractor, ViTMSNForImageClassification
>>> from transformers import AutoImageProcessor, ViTMSNForImageClassification
>>> import torch
>>> from PIL import Image
>>> import requests
......@@ -637,10 +637,10 @@ class ViTMSNForImageClassification(ViTMSNPreTrainedModel):
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/vit-msn-small")
>>> image_processor = AutoImageProcessor.from_pretrained("facebook/vit-msn-small")
>>> model = ViTMSNForImageClassification.from_pretrained("facebook/vit-msn-small")
>>> inputs = feature_extractor(images=image, return_tensors="pt")
>>> inputs = image_processor(images=image, return_tensors="pt")
>>> with torch.no_grad():
... logits = model(**inputs).logits
>>> # model predicts one of the 1000 ImageNet classes
......
......@@ -53,7 +53,7 @@ logger = logging.get_logger(__name__)
# General docstring
_CONFIG_FOR_DOC = "YolosConfig"
_FEAT_EXTRACTOR_FOR_DOC = "YolosFeatureExtractor"
_FEAT_EXTRACTOR_FOR_DOC = "YolosImageProcessor"
# Base docstring
_CHECKPOINT_FOR_DOC = "hustvl/yolos-small"
......@@ -83,7 +83,7 @@ class YolosObjectDetectionOutput(ModelOutput):
pred_boxes (`torch.FloatTensor` of shape `(batch_size, num_queries, 4)`):
Normalized boxes coordinates for all queries, represented as (center_x, center_y, width, height). These
values are normalized in [0, 1], relative to the size of each individual image in the batch (disregarding
possible padding). You can use [`~DetrFeatureExtractor.post_process`] to retrieve the unnormalized bounding
possible padding). You can use [`~DetrImageProcessor.post_process`] to retrieve the unnormalized bounding
boxes.
auxiliary_outputs (`list[Dict]`, *optional*):
Optional, only returned when auxilary losses are activated (i.e. `config.auxiliary_loss` is set to `True`)
......@@ -573,8 +573,8 @@ YOLOS_START_DOCSTRING = r"""
YOLOS_INPUTS_DOCSTRING = r"""
Args:
pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
Pixel values. Pixel values can be obtained using [`AutoFeatureExtractor`]. See
[`AutoFeatureExtractor.__call__`] for details.
Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See
[`AutoImageProcessor.__call__`] for details.
head_mask (`torch.FloatTensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:
......@@ -756,7 +756,7 @@ class YolosForObjectDetection(YolosPreTrainedModel):
Examples:
```python
>>> from transformers import AutoFeatureExtractor, AutoModelForObjectDetection
>>> from transformers import AutoImageProcessor, AutoModelForObjectDetection
>>> import torch
>>> from PIL import Image
>>> import requests
......@@ -764,17 +764,17 @@ class YolosForObjectDetection(YolosPreTrainedModel):
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = AutoFeatureExtractor.from_pretrained("hustvl/yolos-tiny")
>>> image_processor = AutoImageProcessor.from_pretrained("hustvl/yolos-tiny")
>>> model = AutoModelForObjectDetection.from_pretrained("hustvl/yolos-tiny")
>>> inputs = feature_extractor(images=image, return_tensors="pt")
>>> inputs = image_processor(images=image, return_tensors="pt")
>>> outputs = model(**inputs)
>>> # convert outputs (bounding boxes and class logits) to COCO API
>>> target_sizes = torch.tensor([image.size[::-1]])
>>> results = feature_extractor.post_process_object_detection(
... outputs, threshold=0.9, target_sizes=target_sizes
... )[0]
>>> results = image_processor.post_process_object_detection(outputs, threshold=0.9, target_sizes=target_sizes)[
... 0
... ]
>>> for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
... box = [round(i, 2) for i in box.tolist()]
......
......@@ -186,14 +186,14 @@ wish, as it will appear on the Model Hub. Do not forget to include the organisat
Then you will have to say whether your model re-uses the same processing classes as the model you're cloning:
```
Will your new model use the same processing class as Xxx (XxxTokenizer/XxxFeatureExtractor)
Will your new model use the same processing class as Xxx (XxxTokenizer/XxxFeatureExtractor/XxxImageProcessor)
```
Answer yes if you have no intentions to make any change to the class used for preprocessing. It can use different
files (for instance you can reuse the `BertTokenizer` with a new vocab file).
If you answer no, you will have to give the name of the classes
for the new tokenizer/feature extractor/processor (depending on the model you're cloning).
for the new tokenizer/image processor/feature extractor/processor (depending on the model you're cloning).
Next the questionnaire will ask
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment