Unverified Commit 17a7b49b authored by amyeroberts's avatar amyeroberts Committed by GitHub
Browse files

Update doc examples feature extractor -> image processor (#20501)

* Update doc example feature extractor -> image processor

* Apply suggestions from code review
parent afad0c18
......@@ -37,7 +37,7 @@ logger = logging.get_logger(__name__)
# General docstring
_CONFIG_FOR_DOC = "ConvNextConfig"
_FEAT_EXTRACTOR_FOR_DOC = "ConvNextFeatureExtractor"
_FEAT_EXTRACTOR_FOR_DOC = "ConvNextImageProcessor"
# Base docstring
_CHECKPOINT_FOR_DOC = "facebook/convnext-tiny-224"
......@@ -308,8 +308,8 @@ CONVNEXT_START_DOCSTRING = r"""
CONVNEXT_INPUTS_DOCSTRING = r"""
Args:
pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
Pixel values. Pixel values can be obtained using [`AutoFeatureExtractor`]. See
[`AutoFeatureExtractor.__call__`] for details.
Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See
[`AutoImageProcessor.__call__`] for details.
output_hidden_states (`bool`, *optional*):
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
......
......@@ -432,8 +432,8 @@ CONVNEXT_START_DOCSTRING = r"""
CONVNEXT_INPUTS_DOCSTRING = r"""
Args:
pixel_values (`np.ndarray`, `tf.Tensor`, `List[tf.Tensor]` ``Dict[str, tf.Tensor]` or `Dict[str, np.ndarray]` and each example must have the shape `(batch_size, num_channels, height, width)`):
Pixel values. Pixel values can be obtained using [`ConvNextFeatureExtractor`]. See
[`ConvNextFeatureExtractor.__call__`] for details.
Pixel values. Pixel values can be obtained using [`ConvNextImageProcessor`]. See
[`ConvNextImageProcessor.__call__`] for details.
output_hidden_states (`bool`, *optional*):
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
......@@ -470,17 +470,17 @@ class TFConvNextModel(TFConvNextPreTrainedModel):
Examples:
```python
>>> from transformers import ConvNextFeatureExtractor, TFConvNextModel
>>> from transformers import ConvNextImageProcessor, TFConvNextModel
>>> from PIL import Image
>>> import requests
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = ConvNextFeatureExtractor.from_pretrained("facebook/convnext-tiny-224")
>>> image_processor = ConvNextImageProcessor.from_pretrained("facebook/convnext-tiny-224")
>>> model = TFConvNextModel.from_pretrained("facebook/convnext-tiny-224")
>>> inputs = feature_extractor(images=image, return_tensors="tf")
>>> inputs = image_processor(images=image, return_tensors="tf")
>>> outputs = model(**inputs)
>>> last_hidden_states = outputs.last_hidden_state
```"""
......@@ -561,7 +561,7 @@ class TFConvNextForImageClassification(TFConvNextPreTrainedModel, TFSequenceClas
Examples:
```python
>>> from transformers import ConvNextFeatureExtractor, TFConvNextForImageClassification
>>> from transformers import ConvNextImageProcessor, TFConvNextForImageClassification
>>> import tensorflow as tf
>>> from PIL import Image
>>> import requests
......@@ -569,10 +569,10 @@ class TFConvNextForImageClassification(TFConvNextPreTrainedModel, TFSequenceClas
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = ConvNextFeatureExtractor.from_pretrained("facebook/convnext-tiny-224")
>>> image_processor = ConvNextImageProcessor.from_pretrained("facebook/convnext-tiny-224")
>>> model = TFConvNextForImageClassification.from_pretrained("facebook/convnext-tiny-224")
>>> inputs = feature_extractor(images=image, return_tensors="tf")
>>> inputs = image_processor(images=image, return_tensors="tf")
>>> outputs = model(**inputs)
>>> logits = outputs.logits
>>> # model predicts one of the 1000 ImageNet classes
......
......@@ -35,7 +35,7 @@ logger = logging.get_logger(__name__)
# General docstring
_CONFIG_FOR_DOC = "CvtConfig"
_FEAT_EXTRACTOR_FOR_DOC = "AutoFeatureExtractor"
_FEAT_EXTRACTOR_FOR_DOC = "AutoImageProcessor"
# Base docstring
_CHECKPOINT_FOR_DOC = "microsoft/cvt-13"
......@@ -573,8 +573,8 @@ CVT_START_DOCSTRING = r"""
CVT_INPUTS_DOCSTRING = r"""
Args:
pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
Pixel values. Pixel values can be obtained using [`CvtFeatureExtractor`]. See
[`CvtFeatureExtractor.__call__`] for details.
Pixel values. Pixel values can be obtained using [`CvtImageProcessor`]. See [`CvtImageProcessor.__call__`]
for details.
output_hidden_states (`bool`, *optional*):
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
more detail.
......
......@@ -766,8 +766,8 @@ TFCVT_START_DOCSTRING = r"""
TFCVT_INPUTS_DOCSTRING = r"""
Args:
pixel_values (`np.ndarray`, `tf.Tensor`, `List[tf.Tensor]` ``Dict[str, tf.Tensor]` or `Dict[str, np.ndarray]` and each example must have the shape `(batch_size, num_channels, height, width)`):
Pixel values. Pixel values can be obtained using [`AutoFeatureExtractor`]. See
[`AutoFeatureExtractor.__call__`] for details.
Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See
[`AutoImageProcessor.__call__`] for details.
output_hidden_states (`bool`, *optional*):
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
......@@ -808,17 +808,17 @@ class TFCvtModel(TFCvtPreTrainedModel):
Examples:
```python
>>> from transformers import AutoFeatureExtractor, TFCvtModel
>>> from transformers import AutoImageProcessor, TFCvtModel
>>> from PIL import Image
>>> import requests
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = AutoFeatureExtractor.from_pretrained("microsoft/cvt-13")
>>> image_processor = AutoImageProcessor.from_pretrained("microsoft/cvt-13")
>>> model = TFCvtModel.from_pretrained("microsoft/cvt-13")
>>> inputs = feature_extractor(images=image, return_tensors="tf")
>>> inputs = image_processor(images=image, return_tensors="tf")
>>> outputs = model(**inputs)
>>> last_hidden_states = outputs.last_hidden_state
```"""
......@@ -897,7 +897,7 @@ class TFCvtForImageClassification(TFCvtPreTrainedModel, TFSequenceClassification
Examples:
```python
>>> from transformers import AutoFeatureExtractor, TFCvtForImageClassification
>>> from transformers import AutoImageProcessor, TFCvtForImageClassification
>>> import tensorflow as tf
>>> from PIL import Image
>>> import requests
......@@ -905,10 +905,10 @@ class TFCvtForImageClassification(TFCvtPreTrainedModel, TFSequenceClassification
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = AutoFeatureExtractor.from_pretrained("microsoft/cvt-13")
>>> image_processor = AutoImageProcessor.from_pretrained("microsoft/cvt-13")
>>> model = TFCvtForImageClassification.from_pretrained("microsoft/cvt-13")
>>> inputs = feature_extractor(images=image, return_tensors="tf")
>>> inputs = image_processor(images=image, return_tensors="tf")
>>> outputs = model(**inputs)
>>> logits = outputs.logits
>>> # model predicts one of the 1000 ImageNet classes
......
......@@ -48,7 +48,7 @@ logger = logging.get_logger(__name__)
# General docstring
_CONFIG_FOR_DOC = "Data2VecVisionConfig"
_FEAT_EXTRACTOR_FOR_DOC = "BeitFeatureExtractor"
_FEAT_EXTRACTOR_FOR_DOC = "BeitImageProcessor"
# Base docstring
_CHECKPOINT_FOR_DOC = "facebook/data2vec-vision-base"
......@@ -606,8 +606,8 @@ DATA2VEC_VISION_START_DOCSTRING = r"""
DATA2VEC_VISION_INPUTS_DOCSTRING = r"""
Args:
pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
Pixel values. Pixel values can be obtained using [`BeitFeatureExtractor`]. See
[`BeitFeatureExtractor.__call__`] for details.
Pixel values. Pixel values can be obtained using [`BeitImageProcessor`]. See
[`BeitImageProcessor.__call__`] for details.
head_mask (`torch.FloatTensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:
......@@ -1146,17 +1146,17 @@ class Data2VecVisionForSemanticSegmentation(Data2VecVisionPreTrainedModel):
Examples:
```python
>>> from transformers import AutoFeatureExtractor, Data2VecVisionForSemanticSegmentation
>>> from transformers import AutoImageProcessor, Data2VecVisionForSemanticSegmentation
>>> from PIL import Image
>>> import requests
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/data2vec-vision-base")
>>> image_processor = AutoImageProcessor.from_pretrained("facebook/data2vec-vision-base")
>>> model = Data2VecVisionForSemanticSegmentation.from_pretrained("facebook/data2vec-vision-base")
>>> inputs = feature_extractor(images=image, return_tensors="pt")
>>> inputs = image_processor(images=image, return_tensors="pt")
>>> outputs = model(**inputs)
>>> # logits are of shape (batch_size, num_labels, height, width)
>>> logits = outputs.logits
......
......@@ -53,7 +53,7 @@ logger = logging.get_logger(__name__)
# General docstring
_CONFIG_FOR_DOC = "Data2VecVisionConfig"
_FEAT_EXTRACTOR_FOR_DOC = "BeitFeatureExtractor"
_FEAT_EXTRACTOR_FOR_DOC = "BeitImageProcessor"
# Base docstring
_CHECKPOINT_FOR_DOC = "facebook/data2vec-vision-base"
......@@ -849,8 +849,8 @@ DATA2VEC_VISION_START_DOCSTRING = r"""
DATA2VEC_VISION_INPUTS_DOCSTRING = r"""
Args:
pixel_values (`np.ndarray`, `tf.Tensor`, `List[tf.Tensor]` ``Dict[str, tf.Tensor]` or `Dict[str, np.ndarray]` and each example must have the shape `(batch_size, num_channels, height, width)`):
Pixel values. Pixel values can be obtained using [`BeitFeatureExtractor`]. See
[`BeitFeatureExtractor.__call__`] for details.
Pixel values. Pixel values can be obtained using [`BeitImageProcessor`]. See
[`BeitImageProcessor.__call__`] for details.
head_mask (`np.ndarray` or `tf.Tensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:
......@@ -1397,17 +1397,17 @@ class TFData2VecVisionForSemanticSegmentation(TFData2VecVisionPreTrainedModel):
Examples:
```python
>>> from transformers import AutoFeatureExtractor, TFData2VecVisionForSemanticSegmentation
>>> from transformers import AutoImageProcessor, TFData2VecVisionForSemanticSegmentation
>>> from PIL import Image
>>> import requests
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/data2vec-vision-base")
>>> image_processor = AutoImageProcessor.from_pretrained("facebook/data2vec-vision-base")
>>> model = TFData2VecVisionForSemanticSegmentation.from_pretrained("facebook/data2vec-vision-base")
>>> inputs = feature_extractor(images=image, return_tensors="pt")
>>> inputs = image_processor(images=image, return_tensors="pt")
>>> outputs = model(**inputs)
>>> # logits are of shape (batch_size, num_labels, height, width)
>>> logits = outputs.logits
......
......@@ -241,7 +241,7 @@ class DeformableDetrObjectDetectionOutput(ModelOutput):
pred_boxes (`torch.FloatTensor` of shape `(batch_size, num_queries, 4)`):
Normalized boxes coordinates for all queries, represented as (center_x, center_y, width, height). These
values are normalized in [0, 1], relative to the size of each individual image in the batch (disregarding
possible padding). You can use [`~AutoFeatureExtractor.post_process_object_detection`] to retrieve the
possible padding). You can use [`~AutoImageProcessor.post_process_object_detection`] to retrieve the
unnormalized bounding boxes.
auxiliary_outputs (`list[Dict]`, *optional*):
Optional, only returned when auxilary losses are activated (i.e. `config.auxiliary_loss` is set to `True`)
......@@ -1073,8 +1073,7 @@ DEFORMABLE_DETR_INPUTS_DOCSTRING = r"""
pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
Pixel values. Padding will be ignored by default should you provide it.
Pixel values can be obtained using [`AutoFeatureExtractor`]. See [`AutoFeatureExtractor.__call__`] for
details.
Pixel values can be obtained using [`AutoImageProcessor`]. See [`AutoImageProcessor.__call__`] for details.
pixel_mask (`torch.LongTensor` of shape `(batch_size, height, width)`, *optional*):
Mask to avoid performing attention on padding pixel values. Mask values selected in `[0, 1]`:
......@@ -1603,17 +1602,17 @@ class DeformableDetrModel(DeformableDetrPreTrainedModel):
Examples:
```python
>>> from transformers import AutoFeatureExtractor, DeformableDetrModel
>>> from transformers import AutoImageProcessor, DeformableDetrModel
>>> from PIL import Image
>>> import requests
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = AutoFeatureExtractor.from_pretrained("SenseTime/deformable-detr")
>>> image_processor = AutoImageProcessor.from_pretrained("SenseTime/deformable-detr")
>>> model = DeformableDetrModel.from_pretrained("SenseTime/deformable-detr")
>>> inputs = feature_extractor(images=image, return_tensors="pt")
>>> inputs = image_processor(images=image, return_tensors="pt")
>>> outputs = model(**inputs)
......@@ -1873,24 +1872,24 @@ class DeformableDetrForObjectDetection(DeformableDetrPreTrainedModel):
Examples:
```python
>>> from transformers import AutoFeatureExtractor, DeformableDetrForObjectDetection
>>> from transformers import AutoImageProcessor, DeformableDetrForObjectDetection
>>> from PIL import Image
>>> import requests
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = AutoFeatureExtractor.from_pretrained("SenseTime/deformable-detr")
>>> image_processor = AutoImageProcessor.from_pretrained("SenseTime/deformable-detr")
>>> model = DeformableDetrForObjectDetection.from_pretrained("SenseTime/deformable-detr")
>>> inputs = feature_extractor(images=image, return_tensors="pt")
>>> inputs = image_processor(images=image, return_tensors="pt")
>>> outputs = model(**inputs)
>>> # convert outputs (bounding boxes and class logits) to COCO API
>>> target_sizes = torch.tensor([image.size[::-1]])
>>> results = feature_extractor.post_process_object_detection(
... outputs, threshold=0.5, target_sizes=target_sizes
... )[0]
>>> results = image_processor.post_process_object_detection(outputs, threshold=0.5, target_sizes=target_sizes)[
... 0
... ]
>>> for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
... box = [round(i, 2) for i in box.tolist()]
... print(
......
......@@ -44,7 +44,7 @@ logger = logging.get_logger(__name__)
# General docstring
_CONFIG_FOR_DOC = "DeiTConfig"
_FEAT_EXTRACTOR_FOR_DOC = "DeiTFeatureExtractor"
_FEAT_EXTRACTOR_FOR_DOC = "DeiTImageProcessor"
# Base docstring
_CHECKPOINT_FOR_DOC = "facebook/deit-base-distilled-patch16-224"
......@@ -433,8 +433,8 @@ DEIT_START_DOCSTRING = r"""
DEIT_INPUTS_DOCSTRING = r"""
Args:
pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
Pixel values. Pixel values can be obtained using [`DeiTFeatureExtractor`]. See
[`DeiTFeatureExtractor.__call__`] for details.
Pixel values. Pixel values can be obtained using [`DeiTImageProcessor`]. See
[`DeiTImageProcessor.__call__`] for details.
head_mask (`torch.FloatTensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:
......@@ -611,7 +611,7 @@ class DeiTForMaskedImageModeling(DeiTPreTrainedModel):
Examples:
```python
>>> from transformers import DeiTFeatureExtractor, DeiTForMaskedImageModeling
>>> from transformers import DeiTImageProcessor, DeiTForMaskedImageModeling
>>> import torch
>>> from PIL import Image
>>> import requests
......@@ -619,11 +619,11 @@ class DeiTForMaskedImageModeling(DeiTPreTrainedModel):
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = DeiTFeatureExtractor.from_pretrained("facebook/deit-base-distilled-patch16-224")
>>> image_processor = DeiTImageProcessor.from_pretrained("facebook/deit-base-distilled-patch16-224")
>>> model = DeiTForMaskedImageModeling.from_pretrained("facebook/deit-base-distilled-patch16-224")
>>> num_patches = (model.config.image_size // model.config.patch_size) ** 2
>>> pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values
>>> pixel_values = image_processor(images=image, return_tensors="pt").pixel_values
>>> # create random boolean mask of shape (batch_size, num_patches)
>>> bool_masked_pos = torch.randint(low=0, high=2, size=(1, num_patches)).bool()
......@@ -721,7 +721,7 @@ class DeiTForImageClassification(DeiTPreTrainedModel):
Examples:
```python
>>> from transformers import DeiTFeatureExtractor, DeiTForImageClassification
>>> from transformers import DeiTImageProcessor, DeiTForImageClassification
>>> import torch
>>> from PIL import Image
>>> import requests
......@@ -732,10 +732,10 @@ class DeiTForImageClassification(DeiTPreTrainedModel):
>>> # note: we are loading a DeiTForImageClassificationWithTeacher from the hub here,
>>> # so the head will be randomly initialized, hence the predictions will be random
>>> feature_extractor = DeiTFeatureExtractor.from_pretrained("facebook/deit-base-distilled-patch16-224")
>>> image_processor = DeiTImageProcessor.from_pretrained("facebook/deit-base-distilled-patch16-224")
>>> model = DeiTForImageClassification.from_pretrained("facebook/deit-base-distilled-patch16-224")
>>> inputs = feature_extractor(images=image, return_tensors="pt")
>>> inputs = image_processor(images=image, return_tensors="pt")
>>> outputs = model(**inputs)
>>> logits = outputs.logits
>>> # model predicts one of the 1000 ImageNet classes
......
......@@ -52,7 +52,7 @@ logger = logging.get_logger(__name__)
# General docstring
_CONFIG_FOR_DOC = "DeiTConfig"
_FEAT_EXTRACTOR_FOR_DOC = "DeiTFeatureExtractor"
_FEAT_EXTRACTOR_FOR_DOC = "DeiTImageProcessor"
# Base docstring
_CHECKPOINT_FOR_DOC = "facebook/deit-base-distilled-patch16-224"
......@@ -614,8 +614,8 @@ DEIT_START_DOCSTRING = r"""
DEIT_INPUTS_DOCSTRING = r"""
Args:
pixel_values (`tf.Tensor` of shape `(batch_size, num_channels, height, width)`):
Pixel values. Pixel values can be obtained using [`DeiTFeatureExtractor`]. See
[`DeiTFeatureExtractor.__call__`] for details.
Pixel values. Pixel values can be obtained using [`DeiTImageProcessor`]. See
[`DeiTImageProcessor.__call__`] for details.
head_mask (`tf.Tensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:
......@@ -786,7 +786,7 @@ class TFDeiTForMaskedImageModeling(TFDeiTPreTrainedModel):
Examples:
```python
>>> from transformers import DeiTFeatureExtractor, TFDeiTForMaskedImageModeling
>>> from transformers import DeiTImageProcessor, TFDeiTForMaskedImageModeling
>>> import tensorflow as tf
>>> from PIL import Image
>>> import requests
......@@ -794,11 +794,11 @@ class TFDeiTForMaskedImageModeling(TFDeiTPreTrainedModel):
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = DeiTFeatureExtractor.from_pretrained("facebook/deit-base-distilled-patch16-224")
>>> image_processor = DeiTImageProcessor.from_pretrained("facebook/deit-base-distilled-patch16-224")
>>> model = TFDeiTForMaskedImageModeling.from_pretrained("facebook/deit-base-distilled-patch16-224")
>>> num_patches = (model.config.image_size // model.config.patch_size) ** 2
>>> pixel_values = feature_extractor(images=image, return_tensors="tf").pixel_values
>>> pixel_values = image_processor(images=image, return_tensors="tf").pixel_values
>>> # create random boolean mask of shape (batch_size, num_patches)
>>> bool_masked_pos = tf.cast(tf.random.uniform((1, num_patches), minval=0, maxval=2, dtype=tf.int32), tf.bool)
......@@ -917,7 +917,7 @@ class TFDeiTForImageClassification(TFDeiTPreTrainedModel, TFSequenceClassificati
Examples:
```python
>>> from transformers import DeiTFeatureExtractor, TFDeiTForImageClassification
>>> from transformers import DeiTImageProcessor, TFDeiTForImageClassification
>>> import tensorflow as tf
>>> from PIL import Image
>>> import requests
......@@ -928,10 +928,10 @@ class TFDeiTForImageClassification(TFDeiTPreTrainedModel, TFSequenceClassificati
>>> # note: we are loading a TFDeiTForImageClassificationWithTeacher from the hub here,
>>> # so the head will be randomly initialized, hence the predictions will be random
>>> feature_extractor = DeiTFeatureExtractor.from_pretrained("facebook/deit-base-distilled-patch16-224")
>>> image_processor = DeiTImageProcessor.from_pretrained("facebook/deit-base-distilled-patch16-224")
>>> model = TFDeiTForImageClassification.from_pretrained("facebook/deit-base-distilled-patch16-224")
>>> inputs = feature_extractor(images=image, return_tensors="tf")
>>> inputs = image_processor(images=image, return_tensors="tf")
>>> outputs = model(**inputs)
>>> logits = outputs.logits
>>> # model predicts one of the 1000 ImageNet classes
......
......@@ -148,7 +148,7 @@ class DetrObjectDetectionOutput(ModelOutput):
pred_boxes (`torch.FloatTensor` of shape `(batch_size, num_queries, 4)`):
Normalized boxes coordinates for all queries, represented as (center_x, center_y, width, height). These
values are normalized in [0, 1], relative to the size of each individual image in the batch (disregarding
possible padding). You can use [`~DetrFeatureExtractor.post_process_object_detection`] to retrieve the
possible padding). You can use [`~DetrImageProcessor.post_process_object_detection`] to retrieve the
unnormalized bounding boxes.
auxiliary_outputs (`list[Dict]`, *optional*):
Optional, only returned when auxilary losses are activated (i.e. `config.auxiliary_loss` is set to `True`)
......@@ -211,13 +211,13 @@ class DetrSegmentationOutput(ModelOutput):
pred_boxes (`torch.FloatTensor` of shape `(batch_size, num_queries, 4)`):
Normalized boxes coordinates for all queries, represented as (center_x, center_y, width, height). These
values are normalized in [0, 1], relative to the size of each individual image in the batch (disregarding
possible padding). You can use [`~DetrFeatureExtractor.post_process_object_detection`] to retrieve the
possible padding). You can use [`~DetrImageProcessor.post_process_object_detection`] to retrieve the
unnormalized bounding boxes.
pred_masks (`torch.FloatTensor` of shape `(batch_size, num_queries, height/4, width/4)`):
Segmentation masks logits for all queries. See also
[`~DetrFeatureExtractor.post_process_semantic_segmentation`] or
[`~DetrFeatureExtractor.post_process_instance_segmentation`]
[`~DetrFeatureExtractor.post_process_panoptic_segmentation`] to evaluate semantic, instance and panoptic
[`~DetrImageProcessor.post_process_semantic_segmentation`] or
[`~DetrImageProcessor.post_process_instance_segmentation`]
[`~DetrImageProcessor.post_process_panoptic_segmentation`] to evaluate semantic, instance and panoptic
segmentation masks respectively.
auxiliary_outputs (`list[Dict]`, *optional*):
Optional, only returned when auxiliary losses are activated (i.e. `config.auxiliary_loss` is set to `True`)
......@@ -856,8 +856,7 @@ DETR_INPUTS_DOCSTRING = r"""
pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
Pixel values. Padding will be ignored by default should you provide it.
Pixel values can be obtained using [`DetrFeatureExtractor`]. See [`DetrFeatureExtractor.__call__`] for
details.
Pixel values can be obtained using [`DetrImageProcessor`]. See [`DetrImageProcessor.__call__`] for details.
pixel_mask (`torch.LongTensor` of shape `(batch_size, height, width)`, *optional*):
Mask to avoid performing attention on padding pixel values. Mask values selected in `[0, 1]`:
......@@ -1243,18 +1242,18 @@ class DetrModel(DetrPreTrainedModel):
Examples:
```python
>>> from transformers import DetrFeatureExtractor, DetrModel
>>> from transformers import DetrImageProcessor, DetrModel
>>> from PIL import Image
>>> import requests
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = DetrFeatureExtractor.from_pretrained("facebook/detr-resnet-50")
>>> image_processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
>>> model = DetrModel.from_pretrained("facebook/detr-resnet-50")
>>> # prepare image for the model
>>> inputs = feature_extractor(images=image, return_tensors="pt")
>>> inputs = image_processor(images=image, return_tensors="pt")
>>> # forward pass
>>> outputs = model(**inputs)
......@@ -1410,7 +1409,7 @@ class DetrForObjectDetection(DetrPreTrainedModel):
Examples:
```python
>>> from transformers import DetrFeatureExtractor, DetrForObjectDetection
>>> from transformers import DetrImageProcessor, DetrForObjectDetection
>>> import torch
>>> from PIL import Image
>>> import requests
......@@ -1418,17 +1417,17 @@ class DetrForObjectDetection(DetrPreTrainedModel):
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = DetrFeatureExtractor.from_pretrained("facebook/detr-resnet-50")
>>> image_processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
>>> model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
>>> inputs = feature_extractor(images=image, return_tensors="pt")
>>> inputs = image_processor(images=image, return_tensors="pt")
>>> outputs = model(**inputs)
>>> # convert outputs (bounding boxes and class logits) to COCO API
>>> target_sizes = torch.tensor([image.size[::-1]])
>>> results = feature_extractor.post_process_object_detection(
... outputs, threshold=0.9, target_sizes=target_sizes
... )[0]
>>> results = image_processor.post_process_object_detection(outputs, threshold=0.9, target_sizes=target_sizes)[
... 0
... ]
>>> for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
... box = [round(i, 2) for i in box.tolist()]
......@@ -1588,24 +1587,24 @@ class DetrForSegmentation(DetrPreTrainedModel):
>>> import torch
>>> import numpy
>>> from transformers import DetrFeatureExtractor, DetrForSegmentation
>>> from transformers import DetrImageProcessor, DetrForSegmentation
>>> from transformers.image_transforms import rgb_to_id
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = DetrFeatureExtractor.from_pretrained("facebook/detr-resnet-50-panoptic")
>>> image_processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50-panoptic")
>>> model = DetrForSegmentation.from_pretrained("facebook/detr-resnet-50-panoptic")
>>> # prepare image for the model
>>> inputs = feature_extractor(images=image, return_tensors="pt")
>>> inputs = image_processor(images=image, return_tensors="pt")
>>> # forward pass
>>> outputs = model(**inputs)
>>> # Use the `post_process_panoptic_segmentation` method of `DetrFeatureExtractor` to retrieve post-processed panoptic segmentation maps
>>> # Use the `post_process_panoptic_segmentation` method of `DetrImageProcessor` to retrieve post-processed panoptic segmentation maps
>>> # Segmentation results are returned as a list of dictionaries
>>> result = feature_extractor.post_process_panoptic_segmentation(outputs, target_sizes=[(300, 500)])
>>> result = image_processor.post_process_panoptic_segmentation(outputs, target_sizes=[(300, 500)])
>>> # A tensor of shape (height, width) where each value denotes a segment id, filled with -1 if no segment is found
>>> panoptic_seg = result[0]["segmentation"]
......
......@@ -52,7 +52,7 @@ logger = logging.get_logger(__name__)
# General docstring
_CONFIG_FOR_DOC = "DPTConfig"
_FEAT_EXTRACTOR_FOR_DOC = "DPTFeatureExtractor"
_FEAT_EXTRACTOR_FOR_DOC = "DPTImageProcessor"
# Base docstring
_CHECKPOINT_FOR_DOC = "Intel/dpt-large"
......@@ -651,8 +651,8 @@ DPT_START_DOCSTRING = r"""
DPT_INPUTS_DOCSTRING = r"""
Args:
pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
Pixel values. Pixel values can be obtained using [`ViTFeatureExtractor`]. See
[`ViTFeatureExtractor.__call__`] for details.
Pixel values. Pixel values can be obtained using [`ViTImageProcessor`]. See [`ViTImageProcessor.__call__`]
for details.
head_mask (`torch.FloatTensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:
......@@ -890,7 +890,7 @@ class DPTForDepthEstimation(DPTPreTrainedModel):
Examples:
```python
>>> from transformers import DPTFeatureExtractor, DPTForDepthEstimation
>>> from transformers import DPTImageProcessor, DPTForDepthEstimation
>>> import torch
>>> import numpy as np
>>> from PIL import Image
......@@ -899,11 +899,11 @@ class DPTForDepthEstimation(DPTPreTrainedModel):
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large")
>>> image_processor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
>>> model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
>>> # prepare image for the model
>>> inputs = feature_extractor(images=image, return_tensors="pt")
>>> inputs = image_processor(images=image, return_tensors="pt")
>>> with torch.no_grad():
... outputs = model(**inputs)
......@@ -1052,17 +1052,17 @@ class DPTForSemanticSegmentation(DPTPreTrainedModel):
Examples:
```python
>>> from transformers import DPTFeatureExtractor, DPTForSemanticSegmentation
>>> from transformers import DPTImageProcessor, DPTForSemanticSegmentation
>>> from PIL import Image
>>> import requests
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large-ade")
>>> image_processor = DPTImageProcessor.from_pretrained("Intel/dpt-large-ade")
>>> model = DPTForSemanticSegmentation.from_pretrained("Intel/dpt-large-ade")
>>> inputs = feature_extractor(images=image, return_tensors="pt")
>>> inputs = image_processor(images=image, return_tensors="pt")
>>> outputs = model(**inputs)
>>> logits = outputs.logits
......
......@@ -41,7 +41,7 @@ logger = logging.get_logger(__name__)
# General docstring
_CONFIG_FOR_DOC = "GLPNConfig"
_FEAT_EXTRACTOR_FOR_DOC = "GLPNFeatureExtractor"
_FEAT_EXTRACTOR_FOR_DOC = "GLPNImageProcessor"
# Base docstring
_CHECKPOINT_FOR_DOC = "vinvino02/glpn-kitti"
......@@ -464,7 +464,7 @@ GLPN_INPUTS_DOCSTRING = r"""
Args:
pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
Pixel values. Padding will be ignored by default should you provide it. Pixel values can be obtained using
[`GLPNFeatureExtractor`]. See [`GLPNFeatureExtractor.__call__`] for details.
[`GLPNImageProcessor`]. See [`GLPNImageProcessor.__call__`] for details.
output_attentions (`bool`, *optional*):
Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
......@@ -713,7 +713,7 @@ class GLPNForDepthEstimation(GLPNPreTrainedModel):
Examples:
```python
>>> from transformers import GLPNFeatureExtractor, GLPNForDepthEstimation
>>> from transformers import GLPNImageProcessor, GLPNForDepthEstimation
>>> import torch
>>> import numpy as np
>>> from PIL import Image
......@@ -722,11 +722,11 @@ class GLPNForDepthEstimation(GLPNPreTrainedModel):
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = GLPNFeatureExtractor.from_pretrained("vinvino02/glpn-kitti")
>>> image_processor = GLPNImageProcessor.from_pretrained("vinvino02/glpn-kitti")
>>> model = GLPNForDepthEstimation.from_pretrained("vinvino02/glpn-kitti")
>>> # prepare image for the model
>>> inputs = feature_extractor(images=image, return_tensors="pt")
>>> inputs = image_processor(images=image, return_tensors="pt")
>>> with torch.no_grad():
... outputs = model(**inputs)
......
......@@ -556,7 +556,7 @@ IMAGEGPT_INPUTS_DOCSTRING = r"""
If `past_key_values` is used, only `input_ids` that do not have their past calculated should be passed as
`input_ids`.
Indices can be obtained using [`ImageGPTFeatureExtractor`]. See [`ImageGPTFeatureExtractor.__call__`] for
Indices can be obtained using [`ImageGPTImageProcessor`]. See [`ImageGPTImageProcessor.__call__`] for
details.
past_key_values (`Tuple[Tuple[torch.Tensor]]` of length `config.n_layers`):
......@@ -679,17 +679,17 @@ class ImageGPTModel(ImageGPTPreTrainedModel):
Examples:
```python
>>> from transformers import ImageGPTFeatureExtractor, ImageGPTModel
>>> from transformers import ImageGPTImageProcessor, ImageGPTModel
>>> from PIL import Image
>>> import requests
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = ImageGPTFeatureExtractor.from_pretrained("openai/imagegpt-small")
>>> image_processor = ImageGPTImageProcessor.from_pretrained("openai/imagegpt-small")
>>> model = ImageGPTModel.from_pretrained("openai/imagegpt-small")
>>> inputs = feature_extractor(images=image, return_tensors="pt")
>>> inputs = image_processor(images=image, return_tensors="pt")
>>> outputs = model(**inputs)
>>> last_hidden_states = outputs.last_hidden_state
```"""
......@@ -973,12 +973,12 @@ class ImageGPTForCausalImageModeling(ImageGPTPreTrainedModel):
Examples:
```python
>>> from transformers import ImageGPTFeatureExtractor, ImageGPTForCausalImageModeling
>>> from transformers import ImageGPTImageProcessor, ImageGPTForCausalImageModeling
>>> import torch
>>> import matplotlib.pyplot as plt
>>> import numpy as np
>>> feature_extractor = ImageGPTFeatureExtractor.from_pretrained("openai/imagegpt-small")
>>> image_processor = ImageGPTImageProcessor.from_pretrained("openai/imagegpt-small")
>>> model = ImageGPTForCausalImageModeling.from_pretrained("openai/imagegpt-small")
>>> device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
>>> model.to(device)
......@@ -991,9 +991,9 @@ class ImageGPTForCausalImageModeling(ImageGPTPreTrainedModel):
... input_ids=context, max_length=model.config.n_positions + 1, temperature=1.0, do_sample=True, top_k=40
... )
>>> clusters = feature_extractor.clusters
>>> height = feature_extractor.size["height"]
>>> width = feature_extractor.size["width"]
>>> clusters = image_processor.clusters
>>> height = image_processor.size["height"]
>>> width = image_processor.size["width"]
>>> samples = output[:, 1:].cpu().detach().numpy()
>>> samples_img = [
......@@ -1124,17 +1124,17 @@ class ImageGPTForImageClassification(ImageGPTPreTrainedModel):
Examples:
```python
>>> from transformers import ImageGPTFeatureExtractor, ImageGPTForImageClassification
>>> from transformers import ImageGPTImageProcessor, ImageGPTForImageClassification
>>> from PIL import Image
>>> import requests
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = ImageGPTFeatureExtractor.from_pretrained("openai/imagegpt-small")
>>> image_processor = ImageGPTImageProcessor.from_pretrained("openai/imagegpt-small")
>>> model = ImageGPTForImageClassification.from_pretrained("openai/imagegpt-small")
>>> inputs = feature_extractor(images=image, return_tensors="pt")
>>> inputs = image_processor(images=image, return_tensors="pt")
>>> outputs = model(**inputs)
>>> logits = outputs.logits
```"""
......
......@@ -38,7 +38,7 @@ logger = logging.get_logger(__name__)
# General docstring
_CONFIG_FOR_DOC = "LevitConfig"
_FEAT_EXTRACTOR_FOR_DOC = "LevitFeatureExtractor"
_FEAT_EXTRACTOR_FOR_DOC = "LevitImageProcessor"
# Base docstring
_CHECKPOINT_FOR_DOC = "facebook/levit-128S"
......@@ -523,8 +523,8 @@ LEVIT_START_DOCSTRING = r"""
LEVIT_INPUTS_DOCSTRING = r"""
Args:
pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
Pixel values. Pixel values can be obtained using [`AutoFeatureExtractor`]. See
[`AutoFeatureExtractor.__call__`] for details.
Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See
[`AutoImageProcessor.__call__`] for details.
output_hidden_states (`bool`, *optional*):
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
......
......@@ -51,7 +51,7 @@ logger = logging.get_logger(__name__)
_CONFIG_FOR_DOC = "MaskFormerConfig"
_CHECKPOINT_FOR_DOC = "facebook/maskformer-swin-base-ade"
_FEAT_EXTRACTOR_FOR_DOC = "MaskFormerFeatureExtractor"
_FEAT_EXTRACTOR_FOR_DOC = "MaskFormerImageProcessor"
MASKFORMER_PRETRAINED_MODEL_ARCHIVE_LIST = [
"facebook/maskformer-swin-base-ade",
......@@ -192,10 +192,10 @@ class MaskFormerForInstanceSegmentationOutput(ModelOutput):
"""
Class for outputs of [`MaskFormerForInstanceSegmentation`].
This output can be directly passed to [`~MaskFormerFeatureExtractor.post_process_semantic_segmentation`] or or
[`~MaskFormerFeatureExtractor.post_process_instance_segmentation`] or
[`~MaskFormerFeatureExtractor.post_process_panoptic_segmentation`] depending on the task. Please, see
[`~MaskFormerFeatureExtractor] for details regarding usage.
This output can be directly passed to [`~MaskFormerImageProcessor.post_process_semantic_segmentation`] or or
[`~MaskFormerImageProcessor.post_process_instance_segmentation`] or
[`~MaskFormerImageProcessor.post_process_panoptic_segmentation`] depending on the task. Please, see
[`~MaskFormerImageProcessor] for details regarding usage.
Args:
loss (`torch.Tensor`, *optional*):
......@@ -1462,8 +1462,8 @@ MASKFORMER_START_DOCSTRING = r"""
MASKFORMER_INPUTS_DOCSTRING = r"""
Args:
pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
Pixel values. Pixel values can be obtained using [`AutoFeatureExtractor`]. See
[`AutoFeatureExtractor.__call__`] for details.
Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See
[`AutoImageProcessor.__call__`] for details.
pixel_mask (`torch.LongTensor` of shape `(batch_size, height, width)`, *optional*):
Mask to avoid performing attention on padding pixel values. Mask values selected in `[0, 1]`:
......@@ -1562,18 +1562,18 @@ class MaskFormerModel(MaskFormerPreTrainedModel):
Examples:
```python
>>> from transformers import MaskFormerFeatureExtractor, MaskFormerModel
>>> from transformers import MaskFormerImageProcessor, MaskFormerModel
>>> from PIL import Image
>>> import requests
>>> # load MaskFormer fine-tuned on ADE20k semantic segmentation
>>> feature_extractor = MaskFormerFeatureExtractor.from_pretrained("facebook/maskformer-swin-base-ade")
>>> image_processor = MaskFormerImageProcessor.from_pretrained("facebook/maskformer-swin-base-ade")
>>> model = MaskFormerModel.from_pretrained("facebook/maskformer-swin-base-ade")
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> inputs = feature_extractor(image, return_tensors="pt")
>>> inputs = image_processor(image, return_tensors="pt")
>>> # forward pass
>>> outputs = model(**inputs)
......@@ -1741,19 +1741,19 @@ class MaskFormerForInstanceSegmentation(MaskFormerPreTrainedModel):
Semantic segmentation example:
```python
>>> from transformers import MaskFormerFeatureExtractor, MaskFormerForInstanceSegmentation
>>> from transformers import MaskFormerImageProcessor, MaskFormerForInstanceSegmentation
>>> from PIL import Image
>>> import requests
>>> # load MaskFormer fine-tuned on ADE20k semantic segmentation
>>> feature_extractor = MaskFormerFeatureExtractor.from_pretrained("facebook/maskformer-swin-base-ade")
>>> image_processor = MaskFormerImageProcessor.from_pretrained("facebook/maskformer-swin-base-ade")
>>> model = MaskFormerForInstanceSegmentation.from_pretrained("facebook/maskformer-swin-base-ade")
>>> url = (
... "https://huggingface.co/datasets/hf-internal-testing/fixtures_ade20k/resolve/main/ADE_val_00000001.jpg"
... )
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> inputs = feature_extractor(images=image, return_tensors="pt")
>>> inputs = image_processor(images=image, return_tensors="pt")
>>> outputs = model(**inputs)
>>> # model predicts class_queries_logits of shape `(batch_size, num_queries)`
......@@ -1761,8 +1761,8 @@ class MaskFormerForInstanceSegmentation(MaskFormerPreTrainedModel):
>>> class_queries_logits = outputs.class_queries_logits
>>> masks_queries_logits = outputs.masks_queries_logits
>>> # you can pass them to feature_extractor for postprocessing
>>> predicted_semantic_map = feature_extractor.post_process_semantic_segmentation(
>>> # you can pass them to image_processor for postprocessing
>>> predicted_semantic_map = image_processor.post_process_semantic_segmentation(
... outputs, target_sizes=[image.size[::-1]]
... )[0]
......@@ -1774,17 +1774,17 @@ class MaskFormerForInstanceSegmentation(MaskFormerPreTrainedModel):
Panoptic segmentation example:
```python
>>> from transformers import MaskFormerFeatureExtractor, MaskFormerForInstanceSegmentation
>>> from transformers import MaskFormerImageProcessor, MaskFormerForInstanceSegmentation
>>> from PIL import Image
>>> import requests
>>> # load MaskFormer fine-tuned on COCO panoptic segmentation
>>> feature_extractor = MaskFormerFeatureExtractor.from_pretrained("facebook/maskformer-swin-base-coco")
>>> image_processor = MaskFormerImageProcessor.from_pretrained("facebook/maskformer-swin-base-coco")
>>> model = MaskFormerForInstanceSegmentation.from_pretrained("facebook/maskformer-swin-base-coco")
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> inputs = feature_extractor(images=image, return_tensors="pt")
>>> inputs = image_processor(images=image, return_tensors="pt")
>>> outputs = model(**inputs)
>>> # model predicts class_queries_logits of shape `(batch_size, num_queries)`
......@@ -1792,8 +1792,8 @@ class MaskFormerForInstanceSegmentation(MaskFormerPreTrainedModel):
>>> class_queries_logits = outputs.class_queries_logits
>>> masks_queries_logits = outputs.masks_queries_logits
>>> # you can pass them to feature_extractor for postprocessing
>>> result = feature_extractor.post_process_panoptic_segmentation(outputs, target_sizes=[image.size[::-1]])[0]
>>> # you can pass them to image_processor for postprocessing
>>> result = image_processor.post_process_panoptic_segmentation(outputs, target_sizes=[image.size[::-1]])[0]
>>> # we refer to the demo notebooks for visualization (see "Resources" section in the MaskFormer docs)
>>> predicted_panoptic_map = result["segmentation"]
......
......@@ -33,7 +33,7 @@ logger = logging.get_logger(__name__)
# General docstring
_CONFIG_FOR_DOC = "MobileNetV1Config"
_FEAT_EXTRACTOR_FOR_DOC = "MobileNetV1FeatureExtractor"
_FEAT_EXTRACTOR_FOR_DOC = "MobileNetV1ImageProcessor"
# Base docstring
_CHECKPOINT_FOR_DOC = "google/mobilenet_v1_1.0_224"
......@@ -285,8 +285,8 @@ MOBILENET_V1_START_DOCSTRING = r"""
MOBILENET_V1_INPUTS_DOCSTRING = r"""
Args:
pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
Pixel values. Pixel values can be obtained using [`MobileNetV1FeatureExtractor`]. See
[`MobileNetV1FeatureExtractor.__call__`] for details.
Pixel values. Pixel values can be obtained using [`MobileNetV1ImageProcessor`]. See
[`MobileNetV1ImageProcessor.__call__`] for details.
output_hidden_states (`bool`, *optional*):
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
more detail.
......
......@@ -43,7 +43,7 @@ logger = logging.get_logger(__name__)
# General docstring
_CONFIG_FOR_DOC = "MobileNetV2Config"
_FEAT_EXTRACTOR_FOR_DOC = "MobileNetV2FeatureExtractor"
_FEAT_EXTRACTOR_FOR_DOC = "MobileNetV2ImageProcessor"
# Base docstring
_CHECKPOINT_FOR_DOC = "google/mobilenet_v2_1.0_224"
......@@ -486,8 +486,8 @@ MOBILENET_V2_START_DOCSTRING = r"""
MOBILENET_V2_INPUTS_DOCSTRING = r"""
Args:
pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
Pixel values. Pixel values can be obtained using [`MobileNetV2FeatureExtractor`]. See
[`MobileNetV2FeatureExtractor.__call__`] for details.
Pixel values. Pixel values can be obtained using [`MobileNetV2ImageProcessor`]. See
[`MobileNetV2ImageProcessor.__call__`] for details.
output_hidden_states (`bool`, *optional*):
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
more detail.
......@@ -811,17 +811,17 @@ class MobileNetV2ForSemanticSegmentation(MobileNetV2PreTrainedModel):
Examples:
```python
>>> from transformers import MobileNetV2FeatureExtractor, MobileNetV2ForSemanticSegmentation
>>> from transformers import MobileNetV2ImageProcessor, MobileNetV2ForSemanticSegmentation
>>> from PIL import Image
>>> import requests
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = MobileNetV2FeatureExtractor.from_pretrained("google/deeplabv3_mobilenet_v2_1.0_513")
>>> image_processor = MobileNetV2ImageProcessor.from_pretrained("google/deeplabv3_mobilenet_v2_1.0_513")
>>> model = MobileNetV2ForSemanticSegmentation.from_pretrained("google/deeplabv3_mobilenet_v2_1.0_513")
>>> inputs = feature_extractor(images=image, return_tensors="pt")
>>> inputs = image_processor(images=image, return_tensors="pt")
>>> with torch.no_grad():
... outputs = model(**inputs)
......
......@@ -49,7 +49,7 @@ logger = logging.get_logger(__name__)
# General docstring
_CONFIG_FOR_DOC = "MobileViTConfig"
_FEAT_EXTRACTOR_FOR_DOC = "MobileViTFeatureExtractor"
_FEAT_EXTRACTOR_FOR_DOC = "MobileViTImageProcessor"
# Base docstring
_CHECKPOINT_FOR_DOC = "apple/mobilevit-small"
......@@ -692,8 +692,8 @@ MOBILEVIT_START_DOCSTRING = r"""
MOBILEVIT_INPUTS_DOCSTRING = r"""
Args:
pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
Pixel values. Pixel values can be obtained using [`MobileViTFeatureExtractor`]. See
[`MobileViTFeatureExtractor.__call__`] for details.
Pixel values. Pixel values can be obtained using [`MobileViTImageProcessor`]. See
[`MobileViTImageProcessor.__call__`] for details.
output_hidden_states (`bool`, *optional*):
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
more detail.
......@@ -1027,17 +1027,17 @@ class MobileViTForSemanticSegmentation(MobileViTPreTrainedModel):
Examples:
```python
>>> from transformers import MobileViTFeatureExtractor, MobileViTForSemanticSegmentation
>>> from transformers import MobileViTImageProcessor, MobileViTForSemanticSegmentation
>>> from PIL import Image
>>> import requests
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = MobileViTFeatureExtractor.from_pretrained("apple/deeplabv3-mobilevit-small")
>>> image_processor = MobileViTImageProcessor.from_pretrained("apple/deeplabv3-mobilevit-small")
>>> model = MobileViTForSemanticSegmentation.from_pretrained("apple/deeplabv3-mobilevit-small")
>>> inputs = feature_extractor(images=image, return_tensors="pt")
>>> inputs = image_processor(images=image, return_tensors="pt")
>>> with torch.no_grad():
... outputs = model(**inputs)
......
......@@ -43,7 +43,7 @@ logger = logging.get_logger(__name__)
# General docstring
_CONFIG_FOR_DOC = "MobileViTConfig"
_FEAT_EXTRACTOR_FOR_DOC = "MobileViTFeatureExtractor"
_FEAT_EXTRACTOR_FOR_DOC = "MobileViTImageProcessor"
# Base docstring
_CHECKPOINT_FOR_DOC = "apple/mobilevit-small"
......@@ -811,8 +811,8 @@ MOBILEVIT_START_DOCSTRING = r"""
MOBILEVIT_INPUTS_DOCSTRING = r"""
Args:
pixel_values (`np.ndarray`, `tf.Tensor`, `List[tf.Tensor]`, `Dict[str, tf.Tensor]` or `Dict[str, np.ndarray]` and each example must have the shape `(batch_size, num_channels, height, width)`):
Pixel values. Pixel values can be obtained using [`MobileViTFeatureExtractor`]. See
[`MobileViTFeatureExtractor.__call__`] for details.
Pixel values. Pixel values can be obtained using [`MobileViTImageProcessor`]. See
[`MobileViTImageProcessor.__call__`] for details.
output_hidden_states (`bool`, *optional*):
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
......@@ -1103,17 +1103,17 @@ class TFMobileViTForSemanticSegmentation(TFMobileViTPreTrainedModel):
Examples:
```python
>>> from transformers import MobileViTFeatureExtractor, TFMobileViTForSemanticSegmentation
>>> from transformers import MobileViTImageProcessor, TFMobileViTForSemanticSegmentation
>>> from PIL import Image
>>> import requests
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = MobileViTFeatureExtractor.from_pretrained("apple/deeplabv3-mobilevit-small")
>>> image_processor = MobileViTImageProcessor.from_pretrained("apple/deeplabv3-mobilevit-small")
>>> model = TFMobileViTForSemanticSegmentation.from_pretrained("apple/deeplabv3-mobilevit-small")
>>> inputs = feature_extractor(images=image, return_tensors="tf")
>>> inputs = image_processor(images=image, return_tensors="tf")
>>> outputs = model(**inputs)
......
......@@ -768,7 +768,7 @@ class PerceiverModel(PerceiverPreTrainedModel):
Examples:
```python
>>> from transformers import PerceiverConfig, PerceiverTokenizer, PerceiverFeatureExtractor, PerceiverModel
>>> from transformers import PerceiverConfig, PerceiverTokenizer, PerceiverImageProcessor, PerceiverModel
>>> from transformers.models.perceiver.modeling_perceiver import (
... PerceiverTextPreprocessor,
... PerceiverImagePreprocessor,
......@@ -839,10 +839,10 @@ class PerceiverModel(PerceiverPreTrainedModel):
... )
>>> # you can then do a forward pass as follows:
>>> feature_extractor = PerceiverFeatureExtractor()
>>> image_processor = PerceiverImageProcessor()
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> inputs = feature_extractor(image, return_tensors="pt").pixel_values
>>> inputs = image_processor(image, return_tensors="pt").pixel_values
>>> with torch.no_grad():
... outputs = model(inputs=inputs)
......@@ -1266,17 +1266,17 @@ class PerceiverForImageClassificationLearned(PerceiverPreTrainedModel):
Examples:
```python
>>> from transformers import PerceiverFeatureExtractor, PerceiverForImageClassificationLearned
>>> from transformers import PerceiverImageProcessor, PerceiverForImageClassificationLearned
>>> from PIL import Image
>>> import requests
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = PerceiverFeatureExtractor.from_pretrained("deepmind/vision-perceiver-learned")
>>> image_processor = PerceiverImageProcessor.from_pretrained("deepmind/vision-perceiver-learned")
>>> model = PerceiverForImageClassificationLearned.from_pretrained("deepmind/vision-perceiver-learned")
>>> inputs = feature_extractor(images=image, return_tensors="pt").pixel_values
>>> inputs = image_processor(images=image, return_tensors="pt").pixel_values
>>> outputs = model(inputs=inputs)
>>> logits = outputs.logits
>>> list(logits.shape)
......@@ -1407,17 +1407,17 @@ class PerceiverForImageClassificationFourier(PerceiverPreTrainedModel):
Examples:
```python
>>> from transformers import PerceiverFeatureExtractor, PerceiverForImageClassificationFourier
>>> from transformers import PerceiverImageProcessor, PerceiverForImageClassificationFourier
>>> from PIL import Image
>>> import requests
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = PerceiverFeatureExtractor.from_pretrained("deepmind/vision-perceiver-fourier")
>>> image_processor = PerceiverImageProcessor.from_pretrained("deepmind/vision-perceiver-fourier")
>>> model = PerceiverForImageClassificationFourier.from_pretrained("deepmind/vision-perceiver-fourier")
>>> inputs = feature_extractor(images=image, return_tensors="pt").pixel_values
>>> inputs = image_processor(images=image, return_tensors="pt").pixel_values
>>> outputs = model(inputs=inputs)
>>> logits = outputs.logits
>>> list(logits.shape)
......@@ -1548,17 +1548,17 @@ class PerceiverForImageClassificationConvProcessing(PerceiverPreTrainedModel):
Examples:
```python
>>> from transformers import PerceiverFeatureExtractor, PerceiverForImageClassificationConvProcessing
>>> from transformers import PerceiverImageProcessor, PerceiverForImageClassificationConvProcessing
>>> from PIL import Image
>>> import requests
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = PerceiverFeatureExtractor.from_pretrained("deepmind/vision-perceiver-conv")
>>> image_processor = PerceiverImageProcessor.from_pretrained("deepmind/vision-perceiver-conv")
>>> model = PerceiverForImageClassificationConvProcessing.from_pretrained("deepmind/vision-perceiver-conv")
>>> inputs = feature_extractor(images=image, return_tensors="pt").pixel_values
>>> inputs = image_processor(images=image, return_tensors="pt").pixel_values
>>> outputs = model(inputs=inputs)
>>> logits = outputs.logits
>>> list(logits.shape)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment