"...wan/git@developer.sourcefind.cn:Wenxuan/LightX2V.git" did not exist on "486e62798d310b59ea040aecb8e50ccf4943139e"
Unverified Commit 17a7b49b authored by amyeroberts's avatar amyeroberts Committed by GitHub
Browse files

Update doc examples feature extractor -> image processor (#20501)

* Update doc example feature extractor -> image processor

* Apply suggestions from code review
parent afad0c18
...@@ -612,8 +612,8 @@ VIT_MAE_START_DOCSTRING = r""" ...@@ -612,8 +612,8 @@ VIT_MAE_START_DOCSTRING = r"""
VIT_MAE_INPUTS_DOCSTRING = r""" VIT_MAE_INPUTS_DOCSTRING = r"""
Args: Args:
pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`): pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
Pixel values. Pixel values can be obtained using [`AutoFeatureExtractor`]. See Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See
[`AutoFeatureExtractor.__call__`] for details. [`AutoImageProcessor.__call__`] for details.
head_mask (`torch.FloatTensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*): head_mask (`torch.FloatTensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`: Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:
...@@ -677,17 +677,17 @@ class ViTMAEModel(ViTMAEPreTrainedModel): ...@@ -677,17 +677,17 @@ class ViTMAEModel(ViTMAEPreTrainedModel):
Examples: Examples:
```python ```python
>>> from transformers import AutoFeatureExtractor, ViTMAEModel >>> from transformers import AutoImageProcessor, ViTMAEModel
>>> from PIL import Image >>> from PIL import Image
>>> import requests >>> import requests
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw) >>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/vit-mae-base") >>> image_processor = AutoImageProcessor.from_pretrained("facebook/vit-mae-base")
>>> model = ViTMAEModel.from_pretrained("facebook/vit-mae-base") >>> model = ViTMAEModel.from_pretrained("facebook/vit-mae-base")
>>> inputs = feature_extractor(images=image, return_tensors="pt") >>> inputs = image_processor(images=image, return_tensors="pt")
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
>>> last_hidden_states = outputs.last_hidden_state >>> last_hidden_states = outputs.last_hidden_state
```""" ```"""
...@@ -978,17 +978,17 @@ class ViTMAEForPreTraining(ViTMAEPreTrainedModel): ...@@ -978,17 +978,17 @@ class ViTMAEForPreTraining(ViTMAEPreTrainedModel):
Examples: Examples:
```python ```python
>>> from transformers import AutoFeatureExtractor, ViTMAEForPreTraining >>> from transformers import AutoImageProcessor, ViTMAEForPreTraining
>>> from PIL import Image >>> from PIL import Image
>>> import requests >>> import requests
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw) >>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/vit-mae-base") >>> image_processor = AutoImageProcessor.from_pretrained("facebook/vit-mae-base")
>>> model = ViTMAEForPreTraining.from_pretrained("facebook/vit-mae-base") >>> model = ViTMAEForPreTraining.from_pretrained("facebook/vit-mae-base")
>>> inputs = feature_extractor(images=image, return_tensors="pt") >>> inputs = image_processor(images=image, return_tensors="pt")
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
>>> loss = outputs.loss >>> loss = outputs.loss
>>> mask = outputs.mask >>> mask = outputs.mask
......
...@@ -464,8 +464,8 @@ VIT_MSN_START_DOCSTRING = r""" ...@@ -464,8 +464,8 @@ VIT_MSN_START_DOCSTRING = r"""
VIT_MSN_INPUTS_DOCSTRING = r""" VIT_MSN_INPUTS_DOCSTRING = r"""
Args: Args:
pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`): pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
Pixel values. Pixel values can be obtained using [`AutoFeatureExtractor`]. See Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See
[`AutoFeatureExtractor.__call__`] for details. [`AutoImageProcessor.__call__`] for details.
head_mask (`torch.FloatTensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*): head_mask (`torch.FloatTensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`: Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:
...@@ -532,7 +532,7 @@ class ViTMSNModel(ViTMSNPreTrainedModel): ...@@ -532,7 +532,7 @@ class ViTMSNModel(ViTMSNPreTrainedModel):
Examples: Examples:
```python ```python
>>> from transformers import AutoFeatureExtractor, ViTMSNModel >>> from transformers import AutoImageProcessor, ViTMSNModel
>>> import torch >>> import torch
>>> from PIL import Image >>> from PIL import Image
>>> import requests >>> import requests
...@@ -540,9 +540,9 @@ class ViTMSNModel(ViTMSNPreTrainedModel): ...@@ -540,9 +540,9 @@ class ViTMSNModel(ViTMSNPreTrainedModel):
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw) >>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/vit-msn-small") >>> image_processor = AutoImageProcessor.from_pretrained("facebook/vit-msn-small")
>>> model = ViTMSNModel.from_pretrained("facebook/vit-msn-small") >>> model = ViTMSNModel.from_pretrained("facebook/vit-msn-small")
>>> inputs = feature_extractor(images=image, return_tensors="pt") >>> inputs = image_processor(images=image, return_tensors="pt")
>>> with torch.no_grad(): >>> with torch.no_grad():
... outputs = model(**inputs) ... outputs = model(**inputs)
>>> last_hidden_states = outputs.last_hidden_state >>> last_hidden_states = outputs.last_hidden_state
...@@ -627,7 +627,7 @@ class ViTMSNForImageClassification(ViTMSNPreTrainedModel): ...@@ -627,7 +627,7 @@ class ViTMSNForImageClassification(ViTMSNPreTrainedModel):
Examples: Examples:
```python ```python
>>> from transformers import AutoFeatureExtractor, ViTMSNForImageClassification >>> from transformers import AutoImageProcessor, ViTMSNForImageClassification
>>> import torch >>> import torch
>>> from PIL import Image >>> from PIL import Image
>>> import requests >>> import requests
...@@ -637,10 +637,10 @@ class ViTMSNForImageClassification(ViTMSNPreTrainedModel): ...@@ -637,10 +637,10 @@ class ViTMSNForImageClassification(ViTMSNPreTrainedModel):
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw) >>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/vit-msn-small") >>> image_processor = AutoImageProcessor.from_pretrained("facebook/vit-msn-small")
>>> model = ViTMSNForImageClassification.from_pretrained("facebook/vit-msn-small") >>> model = ViTMSNForImageClassification.from_pretrained("facebook/vit-msn-small")
>>> inputs = feature_extractor(images=image, return_tensors="pt") >>> inputs = image_processor(images=image, return_tensors="pt")
>>> with torch.no_grad(): >>> with torch.no_grad():
... logits = model(**inputs).logits ... logits = model(**inputs).logits
>>> # model predicts one of the 1000 ImageNet classes >>> # model predicts one of the 1000 ImageNet classes
......
...@@ -53,7 +53,7 @@ logger = logging.get_logger(__name__) ...@@ -53,7 +53,7 @@ logger = logging.get_logger(__name__)
# General docstring # General docstring
_CONFIG_FOR_DOC = "YolosConfig" _CONFIG_FOR_DOC = "YolosConfig"
_FEAT_EXTRACTOR_FOR_DOC = "YolosFeatureExtractor" _FEAT_EXTRACTOR_FOR_DOC = "YolosImageProcessor"
# Base docstring # Base docstring
_CHECKPOINT_FOR_DOC = "hustvl/yolos-small" _CHECKPOINT_FOR_DOC = "hustvl/yolos-small"
...@@ -83,7 +83,7 @@ class YolosObjectDetectionOutput(ModelOutput): ...@@ -83,7 +83,7 @@ class YolosObjectDetectionOutput(ModelOutput):
pred_boxes (`torch.FloatTensor` of shape `(batch_size, num_queries, 4)`): pred_boxes (`torch.FloatTensor` of shape `(batch_size, num_queries, 4)`):
Normalized boxes coordinates for all queries, represented as (center_x, center_y, width, height). These Normalized boxes coordinates for all queries, represented as (center_x, center_y, width, height). These
values are normalized in [0, 1], relative to the size of each individual image in the batch (disregarding values are normalized in [0, 1], relative to the size of each individual image in the batch (disregarding
possible padding). You can use [`~DetrFeatureExtractor.post_process`] to retrieve the unnormalized bounding possible padding). You can use [`~DetrImageProcessor.post_process`] to retrieve the unnormalized bounding
boxes. boxes.
auxiliary_outputs (`list[Dict]`, *optional*): auxiliary_outputs (`list[Dict]`, *optional*):
Optional, only returned when auxilary losses are activated (i.e. `config.auxiliary_loss` is set to `True`) Optional, only returned when auxilary losses are activated (i.e. `config.auxiliary_loss` is set to `True`)
...@@ -573,8 +573,8 @@ YOLOS_START_DOCSTRING = r""" ...@@ -573,8 +573,8 @@ YOLOS_START_DOCSTRING = r"""
YOLOS_INPUTS_DOCSTRING = r""" YOLOS_INPUTS_DOCSTRING = r"""
Args: Args:
pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`): pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
Pixel values. Pixel values can be obtained using [`AutoFeatureExtractor`]. See Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See
[`AutoFeatureExtractor.__call__`] for details. [`AutoImageProcessor.__call__`] for details.
head_mask (`torch.FloatTensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*): head_mask (`torch.FloatTensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`: Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:
...@@ -756,7 +756,7 @@ class YolosForObjectDetection(YolosPreTrainedModel): ...@@ -756,7 +756,7 @@ class YolosForObjectDetection(YolosPreTrainedModel):
Examples: Examples:
```python ```python
>>> from transformers import AutoFeatureExtractor, AutoModelForObjectDetection >>> from transformers import AutoImageProcessor, AutoModelForObjectDetection
>>> import torch >>> import torch
>>> from PIL import Image >>> from PIL import Image
>>> import requests >>> import requests
...@@ -764,17 +764,17 @@ class YolosForObjectDetection(YolosPreTrainedModel): ...@@ -764,17 +764,17 @@ class YolosForObjectDetection(YolosPreTrainedModel):
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw) >>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = AutoFeatureExtractor.from_pretrained("hustvl/yolos-tiny") >>> image_processor = AutoImageProcessor.from_pretrained("hustvl/yolos-tiny")
>>> model = AutoModelForObjectDetection.from_pretrained("hustvl/yolos-tiny") >>> model = AutoModelForObjectDetection.from_pretrained("hustvl/yolos-tiny")
>>> inputs = feature_extractor(images=image, return_tensors="pt") >>> inputs = image_processor(images=image, return_tensors="pt")
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
>>> # convert outputs (bounding boxes and class logits) to COCO API >>> # convert outputs (bounding boxes and class logits) to COCO API
>>> target_sizes = torch.tensor([image.size[::-1]]) >>> target_sizes = torch.tensor([image.size[::-1]])
>>> results = feature_extractor.post_process_object_detection( >>> results = image_processor.post_process_object_detection(outputs, threshold=0.9, target_sizes=target_sizes)[
... outputs, threshold=0.9, target_sizes=target_sizes ... 0
... )[0] ... ]
>>> for score, label, box in zip(results["scores"], results["labels"], results["boxes"]): >>> for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
... box = [round(i, 2) for i in box.tolist()] ... box = [round(i, 2) for i in box.tolist()]
......
...@@ -186,14 +186,14 @@ wish, as it will appear on the Model Hub. Do not forget to include the organisat ...@@ -186,14 +186,14 @@ wish, as it will appear on the Model Hub. Do not forget to include the organisat
Then you will have to say whether your model re-uses the same processing classes as the model you're cloning: Then you will have to say whether your model re-uses the same processing classes as the model you're cloning:
``` ```
Will your new model use the same processing class as Xxx (XxxTokenizer/XxxFeatureExtractor) Will your new model use the same processing class as Xxx (XxxTokenizer/XxxFeatureExtractor/XxxImageProcessor)
``` ```
Answer yes if you have no intentions to make any change to the class used for preprocessing. It can use different Answer yes if you have no intentions to make any change to the class used for preprocessing. It can use different
files (for instance you can reuse the `BertTokenizer` with a new vocab file). files (for instance you can reuse the `BertTokenizer` with a new vocab file).
If you answer no, you will have to give the name of the classes If you answer no, you will have to give the name of the classes
for the new tokenizer/feature extractor/processor (depending on the model you're cloning). for the new tokenizer/image processor/feature extractor/processor (depending on the model you're cloning).
Next the questionnaire will ask Next the questionnaire will ask
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment