Unverified Commit 17a7b49b authored by amyeroberts's avatar amyeroberts Committed by GitHub
Browse files

Update doc examples feature extractor -> image processor (#20501)

* Update doc example feature extractor -> image processor

* Apply suggestions from code review
parent afad0c18
...@@ -37,7 +37,7 @@ logger = logging.get_logger(__name__) ...@@ -37,7 +37,7 @@ logger = logging.get_logger(__name__)
# General docstring # General docstring
_CONFIG_FOR_DOC = "ConvNextConfig" _CONFIG_FOR_DOC = "ConvNextConfig"
_FEAT_EXTRACTOR_FOR_DOC = "ConvNextFeatureExtractor" _FEAT_EXTRACTOR_FOR_DOC = "ConvNextImageProcessor"
# Base docstring # Base docstring
_CHECKPOINT_FOR_DOC = "facebook/convnext-tiny-224" _CHECKPOINT_FOR_DOC = "facebook/convnext-tiny-224"
...@@ -308,8 +308,8 @@ CONVNEXT_START_DOCSTRING = r""" ...@@ -308,8 +308,8 @@ CONVNEXT_START_DOCSTRING = r"""
CONVNEXT_INPUTS_DOCSTRING = r""" CONVNEXT_INPUTS_DOCSTRING = r"""
Args: Args:
pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`): pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
Pixel values. Pixel values can be obtained using [`AutoFeatureExtractor`]. See Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See
[`AutoFeatureExtractor.__call__`] for details. [`AutoImageProcessor.__call__`] for details.
output_hidden_states (`bool`, *optional*): output_hidden_states (`bool`, *optional*):
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
......
...@@ -432,8 +432,8 @@ CONVNEXT_START_DOCSTRING = r""" ...@@ -432,8 +432,8 @@ CONVNEXT_START_DOCSTRING = r"""
CONVNEXT_INPUTS_DOCSTRING = r""" CONVNEXT_INPUTS_DOCSTRING = r"""
Args: Args:
pixel_values (`np.ndarray`, `tf.Tensor`, `List[tf.Tensor]` ``Dict[str, tf.Tensor]` or `Dict[str, np.ndarray]` and each example must have the shape `(batch_size, num_channels, height, width)`): pixel_values (`np.ndarray`, `tf.Tensor`, `List[tf.Tensor]` ``Dict[str, tf.Tensor]` or `Dict[str, np.ndarray]` and each example must have the shape `(batch_size, num_channels, height, width)`):
Pixel values. Pixel values can be obtained using [`ConvNextFeatureExtractor`]. See Pixel values. Pixel values can be obtained using [`ConvNextImageProcessor`]. See
[`ConvNextFeatureExtractor.__call__`] for details. [`ConvNextImageProcessor.__call__`] for details.
output_hidden_states (`bool`, *optional*): output_hidden_states (`bool`, *optional*):
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
...@@ -470,17 +470,17 @@ class TFConvNextModel(TFConvNextPreTrainedModel): ...@@ -470,17 +470,17 @@ class TFConvNextModel(TFConvNextPreTrainedModel):
Examples: Examples:
```python ```python
>>> from transformers import ConvNextFeatureExtractor, TFConvNextModel >>> from transformers import ConvNextImageProcessor, TFConvNextModel
>>> from PIL import Image >>> from PIL import Image
>>> import requests >>> import requests
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw) >>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = ConvNextFeatureExtractor.from_pretrained("facebook/convnext-tiny-224") >>> image_processor = ConvNextImageProcessor.from_pretrained("facebook/convnext-tiny-224")
>>> model = TFConvNextModel.from_pretrained("facebook/convnext-tiny-224") >>> model = TFConvNextModel.from_pretrained("facebook/convnext-tiny-224")
>>> inputs = feature_extractor(images=image, return_tensors="tf") >>> inputs = image_processor(images=image, return_tensors="tf")
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
>>> last_hidden_states = outputs.last_hidden_state >>> last_hidden_states = outputs.last_hidden_state
```""" ```"""
...@@ -561,7 +561,7 @@ class TFConvNextForImageClassification(TFConvNextPreTrainedModel, TFSequenceClas ...@@ -561,7 +561,7 @@ class TFConvNextForImageClassification(TFConvNextPreTrainedModel, TFSequenceClas
Examples: Examples:
```python ```python
>>> from transformers import ConvNextFeatureExtractor, TFConvNextForImageClassification >>> from transformers import ConvNextImageProcessor, TFConvNextForImageClassification
>>> import tensorflow as tf >>> import tensorflow as tf
>>> from PIL import Image >>> from PIL import Image
>>> import requests >>> import requests
...@@ -569,10 +569,10 @@ class TFConvNextForImageClassification(TFConvNextPreTrainedModel, TFSequenceClas ...@@ -569,10 +569,10 @@ class TFConvNextForImageClassification(TFConvNextPreTrainedModel, TFSequenceClas
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw) >>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = ConvNextFeatureExtractor.from_pretrained("facebook/convnext-tiny-224") >>> image_processor = ConvNextImageProcessor.from_pretrained("facebook/convnext-tiny-224")
>>> model = TFConvNextForImageClassification.from_pretrained("facebook/convnext-tiny-224") >>> model = TFConvNextForImageClassification.from_pretrained("facebook/convnext-tiny-224")
>>> inputs = feature_extractor(images=image, return_tensors="tf") >>> inputs = image_processor(images=image, return_tensors="tf")
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
>>> logits = outputs.logits >>> logits = outputs.logits
>>> # model predicts one of the 1000 ImageNet classes >>> # model predicts one of the 1000 ImageNet classes
......
...@@ -35,7 +35,7 @@ logger = logging.get_logger(__name__) ...@@ -35,7 +35,7 @@ logger = logging.get_logger(__name__)
# General docstring # General docstring
_CONFIG_FOR_DOC = "CvtConfig" _CONFIG_FOR_DOC = "CvtConfig"
_FEAT_EXTRACTOR_FOR_DOC = "AutoFeatureExtractor" _FEAT_EXTRACTOR_FOR_DOC = "AutoImageProcessor"
# Base docstring # Base docstring
_CHECKPOINT_FOR_DOC = "microsoft/cvt-13" _CHECKPOINT_FOR_DOC = "microsoft/cvt-13"
...@@ -573,8 +573,8 @@ CVT_START_DOCSTRING = r""" ...@@ -573,8 +573,8 @@ CVT_START_DOCSTRING = r"""
CVT_INPUTS_DOCSTRING = r""" CVT_INPUTS_DOCSTRING = r"""
Args: Args:
pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`): pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
Pixel values. Pixel values can be obtained using [`CvtFeatureExtractor`]. See Pixel values. Pixel values can be obtained using [`CvtImageProcessor`]. See [`CvtImageProcessor.__call__`]
[`CvtFeatureExtractor.__call__`] for details. for details.
output_hidden_states (`bool`, *optional*): output_hidden_states (`bool`, *optional*):
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
more detail. more detail.
......
...@@ -766,8 +766,8 @@ TFCVT_START_DOCSTRING = r""" ...@@ -766,8 +766,8 @@ TFCVT_START_DOCSTRING = r"""
TFCVT_INPUTS_DOCSTRING = r""" TFCVT_INPUTS_DOCSTRING = r"""
Args: Args:
pixel_values (`np.ndarray`, `tf.Tensor`, `List[tf.Tensor]` ``Dict[str, tf.Tensor]` or `Dict[str, np.ndarray]` and each example must have the shape `(batch_size, num_channels, height, width)`): pixel_values (`np.ndarray`, `tf.Tensor`, `List[tf.Tensor]` ``Dict[str, tf.Tensor]` or `Dict[str, np.ndarray]` and each example must have the shape `(batch_size, num_channels, height, width)`):
Pixel values. Pixel values can be obtained using [`AutoFeatureExtractor`]. See Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See
[`AutoFeatureExtractor.__call__`] for details. [`AutoImageProcessor.__call__`] for details.
output_hidden_states (`bool`, *optional*): output_hidden_states (`bool`, *optional*):
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
...@@ -808,17 +808,17 @@ class TFCvtModel(TFCvtPreTrainedModel): ...@@ -808,17 +808,17 @@ class TFCvtModel(TFCvtPreTrainedModel):
Examples: Examples:
```python ```python
>>> from transformers import AutoFeatureExtractor, TFCvtModel >>> from transformers import AutoImageProcessor, TFCvtModel
>>> from PIL import Image >>> from PIL import Image
>>> import requests >>> import requests
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw) >>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = AutoFeatureExtractor.from_pretrained("microsoft/cvt-13") >>> image_processor = AutoImageProcessor.from_pretrained("microsoft/cvt-13")
>>> model = TFCvtModel.from_pretrained("microsoft/cvt-13") >>> model = TFCvtModel.from_pretrained("microsoft/cvt-13")
>>> inputs = feature_extractor(images=image, return_tensors="tf") >>> inputs = image_processor(images=image, return_tensors="tf")
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
>>> last_hidden_states = outputs.last_hidden_state >>> last_hidden_states = outputs.last_hidden_state
```""" ```"""
...@@ -897,7 +897,7 @@ class TFCvtForImageClassification(TFCvtPreTrainedModel, TFSequenceClassification ...@@ -897,7 +897,7 @@ class TFCvtForImageClassification(TFCvtPreTrainedModel, TFSequenceClassification
Examples: Examples:
```python ```python
>>> from transformers import AutoFeatureExtractor, TFCvtForImageClassification >>> from transformers import AutoImageProcessor, TFCvtForImageClassification
>>> import tensorflow as tf >>> import tensorflow as tf
>>> from PIL import Image >>> from PIL import Image
>>> import requests >>> import requests
...@@ -905,10 +905,10 @@ class TFCvtForImageClassification(TFCvtPreTrainedModel, TFSequenceClassification ...@@ -905,10 +905,10 @@ class TFCvtForImageClassification(TFCvtPreTrainedModel, TFSequenceClassification
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw) >>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = AutoFeatureExtractor.from_pretrained("microsoft/cvt-13") >>> image_processor = AutoImageProcessor.from_pretrained("microsoft/cvt-13")
>>> model = TFCvtForImageClassification.from_pretrained("microsoft/cvt-13") >>> model = TFCvtForImageClassification.from_pretrained("microsoft/cvt-13")
>>> inputs = feature_extractor(images=image, return_tensors="tf") >>> inputs = image_processor(images=image, return_tensors="tf")
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
>>> logits = outputs.logits >>> logits = outputs.logits
>>> # model predicts one of the 1000 ImageNet classes >>> # model predicts one of the 1000 ImageNet classes
......
...@@ -48,7 +48,7 @@ logger = logging.get_logger(__name__) ...@@ -48,7 +48,7 @@ logger = logging.get_logger(__name__)
# General docstring # General docstring
_CONFIG_FOR_DOC = "Data2VecVisionConfig" _CONFIG_FOR_DOC = "Data2VecVisionConfig"
_FEAT_EXTRACTOR_FOR_DOC = "BeitFeatureExtractor" _FEAT_EXTRACTOR_FOR_DOC = "BeitImageProcessor"
# Base docstring # Base docstring
_CHECKPOINT_FOR_DOC = "facebook/data2vec-vision-base" _CHECKPOINT_FOR_DOC = "facebook/data2vec-vision-base"
...@@ -606,8 +606,8 @@ DATA2VEC_VISION_START_DOCSTRING = r""" ...@@ -606,8 +606,8 @@ DATA2VEC_VISION_START_DOCSTRING = r"""
DATA2VEC_VISION_INPUTS_DOCSTRING = r""" DATA2VEC_VISION_INPUTS_DOCSTRING = r"""
Args: Args:
pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`): pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
Pixel values. Pixel values can be obtained using [`BeitFeatureExtractor`]. See Pixel values. Pixel values can be obtained using [`BeitImageProcessor`]. See
[`BeitFeatureExtractor.__call__`] for details. [`BeitImageProcessor.__call__`] for details.
head_mask (`torch.FloatTensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*): head_mask (`torch.FloatTensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`: Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:
...@@ -1146,17 +1146,17 @@ class Data2VecVisionForSemanticSegmentation(Data2VecVisionPreTrainedModel): ...@@ -1146,17 +1146,17 @@ class Data2VecVisionForSemanticSegmentation(Data2VecVisionPreTrainedModel):
Examples: Examples:
```python ```python
>>> from transformers import AutoFeatureExtractor, Data2VecVisionForSemanticSegmentation >>> from transformers import AutoImageProcessor, Data2VecVisionForSemanticSegmentation
>>> from PIL import Image >>> from PIL import Image
>>> import requests >>> import requests
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw) >>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/data2vec-vision-base") >>> image_processor = AutoImageProcessor.from_pretrained("facebook/data2vec-vision-base")
>>> model = Data2VecVisionForSemanticSegmentation.from_pretrained("facebook/data2vec-vision-base") >>> model = Data2VecVisionForSemanticSegmentation.from_pretrained("facebook/data2vec-vision-base")
>>> inputs = feature_extractor(images=image, return_tensors="pt") >>> inputs = image_processor(images=image, return_tensors="pt")
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
>>> # logits are of shape (batch_size, num_labels, height, width) >>> # logits are of shape (batch_size, num_labels, height, width)
>>> logits = outputs.logits >>> logits = outputs.logits
......
...@@ -53,7 +53,7 @@ logger = logging.get_logger(__name__) ...@@ -53,7 +53,7 @@ logger = logging.get_logger(__name__)
# General docstring # General docstring
_CONFIG_FOR_DOC = "Data2VecVisionConfig" _CONFIG_FOR_DOC = "Data2VecVisionConfig"
_FEAT_EXTRACTOR_FOR_DOC = "BeitFeatureExtractor" _FEAT_EXTRACTOR_FOR_DOC = "BeitImageProcessor"
# Base docstring # Base docstring
_CHECKPOINT_FOR_DOC = "facebook/data2vec-vision-base" _CHECKPOINT_FOR_DOC = "facebook/data2vec-vision-base"
...@@ -849,8 +849,8 @@ DATA2VEC_VISION_START_DOCSTRING = r""" ...@@ -849,8 +849,8 @@ DATA2VEC_VISION_START_DOCSTRING = r"""
DATA2VEC_VISION_INPUTS_DOCSTRING = r""" DATA2VEC_VISION_INPUTS_DOCSTRING = r"""
Args: Args:
pixel_values (`np.ndarray`, `tf.Tensor`, `List[tf.Tensor]` ``Dict[str, tf.Tensor]` or `Dict[str, np.ndarray]` and each example must have the shape `(batch_size, num_channels, height, width)`): pixel_values (`np.ndarray`, `tf.Tensor`, `List[tf.Tensor]` ``Dict[str, tf.Tensor]` or `Dict[str, np.ndarray]` and each example must have the shape `(batch_size, num_channels, height, width)`):
Pixel values. Pixel values can be obtained using [`BeitFeatureExtractor`]. See Pixel values. Pixel values can be obtained using [`BeitImageProcessor`]. See
[`BeitFeatureExtractor.__call__`] for details. [`BeitImageProcessor.__call__`] for details.
head_mask (`np.ndarray` or `tf.Tensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*): head_mask (`np.ndarray` or `tf.Tensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`: Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:
...@@ -1397,17 +1397,17 @@ class TFData2VecVisionForSemanticSegmentation(TFData2VecVisionPreTrainedModel): ...@@ -1397,17 +1397,17 @@ class TFData2VecVisionForSemanticSegmentation(TFData2VecVisionPreTrainedModel):
Examples: Examples:
```python ```python
>>> from transformers import AutoFeatureExtractor, TFData2VecVisionForSemanticSegmentation >>> from transformers import AutoImageProcessor, TFData2VecVisionForSemanticSegmentation
>>> from PIL import Image >>> from PIL import Image
>>> import requests >>> import requests
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw) >>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/data2vec-vision-base") >>> image_processor = AutoImageProcessor.from_pretrained("facebook/data2vec-vision-base")
>>> model = TFData2VecVisionForSemanticSegmentation.from_pretrained("facebook/data2vec-vision-base") >>> model = TFData2VecVisionForSemanticSegmentation.from_pretrained("facebook/data2vec-vision-base")
>>> inputs = feature_extractor(images=image, return_tensors="pt") >>> inputs = image_processor(images=image, return_tensors="pt")
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
>>> # logits are of shape (batch_size, num_labels, height, width) >>> # logits are of shape (batch_size, num_labels, height, width)
>>> logits = outputs.logits >>> logits = outputs.logits
......
...@@ -241,7 +241,7 @@ class DeformableDetrObjectDetectionOutput(ModelOutput): ...@@ -241,7 +241,7 @@ class DeformableDetrObjectDetectionOutput(ModelOutput):
pred_boxes (`torch.FloatTensor` of shape `(batch_size, num_queries, 4)`): pred_boxes (`torch.FloatTensor` of shape `(batch_size, num_queries, 4)`):
Normalized boxes coordinates for all queries, represented as (center_x, center_y, width, height). These Normalized boxes coordinates for all queries, represented as (center_x, center_y, width, height). These
values are normalized in [0, 1], relative to the size of each individual image in the batch (disregarding values are normalized in [0, 1], relative to the size of each individual image in the batch (disregarding
possible padding). You can use [`~AutoFeatureExtractor.post_process_object_detection`] to retrieve the possible padding). You can use [`~AutoImageProcessor.post_process_object_detection`] to retrieve the
unnormalized bounding boxes. unnormalized bounding boxes.
auxiliary_outputs (`list[Dict]`, *optional*): auxiliary_outputs (`list[Dict]`, *optional*):
Optional, only returned when auxilary losses are activated (i.e. `config.auxiliary_loss` is set to `True`) Optional, only returned when auxilary losses are activated (i.e. `config.auxiliary_loss` is set to `True`)
...@@ -1073,8 +1073,7 @@ DEFORMABLE_DETR_INPUTS_DOCSTRING = r""" ...@@ -1073,8 +1073,7 @@ DEFORMABLE_DETR_INPUTS_DOCSTRING = r"""
pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`): pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
Pixel values. Padding will be ignored by default should you provide it. Pixel values. Padding will be ignored by default should you provide it.
Pixel values can be obtained using [`AutoFeatureExtractor`]. See [`AutoFeatureExtractor.__call__`] for Pixel values can be obtained using [`AutoImageProcessor`]. See [`AutoImageProcessor.__call__`] for details.
details.
pixel_mask (`torch.LongTensor` of shape `(batch_size, height, width)`, *optional*): pixel_mask (`torch.LongTensor` of shape `(batch_size, height, width)`, *optional*):
Mask to avoid performing attention on padding pixel values. Mask values selected in `[0, 1]`: Mask to avoid performing attention on padding pixel values. Mask values selected in `[0, 1]`:
...@@ -1603,17 +1602,17 @@ class DeformableDetrModel(DeformableDetrPreTrainedModel): ...@@ -1603,17 +1602,17 @@ class DeformableDetrModel(DeformableDetrPreTrainedModel):
Examples: Examples:
```python ```python
>>> from transformers import AutoFeatureExtractor, DeformableDetrModel >>> from transformers import AutoImageProcessor, DeformableDetrModel
>>> from PIL import Image >>> from PIL import Image
>>> import requests >>> import requests
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw) >>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = AutoFeatureExtractor.from_pretrained("SenseTime/deformable-detr") >>> image_processor = AutoImageProcessor.from_pretrained("SenseTime/deformable-detr")
>>> model = DeformableDetrModel.from_pretrained("SenseTime/deformable-detr") >>> model = DeformableDetrModel.from_pretrained("SenseTime/deformable-detr")
>>> inputs = feature_extractor(images=image, return_tensors="pt") >>> inputs = image_processor(images=image, return_tensors="pt")
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
...@@ -1873,24 +1872,24 @@ class DeformableDetrForObjectDetection(DeformableDetrPreTrainedModel): ...@@ -1873,24 +1872,24 @@ class DeformableDetrForObjectDetection(DeformableDetrPreTrainedModel):
Examples: Examples:
```python ```python
>>> from transformers import AutoFeatureExtractor, DeformableDetrForObjectDetection >>> from transformers import AutoImageProcessor, DeformableDetrForObjectDetection
>>> from PIL import Image >>> from PIL import Image
>>> import requests >>> import requests
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw) >>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = AutoFeatureExtractor.from_pretrained("SenseTime/deformable-detr") >>> image_processor = AutoImageProcessor.from_pretrained("SenseTime/deformable-detr")
>>> model = DeformableDetrForObjectDetection.from_pretrained("SenseTime/deformable-detr") >>> model = DeformableDetrForObjectDetection.from_pretrained("SenseTime/deformable-detr")
>>> inputs = feature_extractor(images=image, return_tensors="pt") >>> inputs = image_processor(images=image, return_tensors="pt")
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
>>> # convert outputs (bounding boxes and class logits) to COCO API >>> # convert outputs (bounding boxes and class logits) to COCO API
>>> target_sizes = torch.tensor([image.size[::-1]]) >>> target_sizes = torch.tensor([image.size[::-1]])
>>> results = feature_extractor.post_process_object_detection( >>> results = image_processor.post_process_object_detection(outputs, threshold=0.5, target_sizes=target_sizes)[
... outputs, threshold=0.5, target_sizes=target_sizes ... 0
... )[0] ... ]
>>> for score, label, box in zip(results["scores"], results["labels"], results["boxes"]): >>> for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
... box = [round(i, 2) for i in box.tolist()] ... box = [round(i, 2) for i in box.tolist()]
... print( ... print(
......
...@@ -44,7 +44,7 @@ logger = logging.get_logger(__name__) ...@@ -44,7 +44,7 @@ logger = logging.get_logger(__name__)
# General docstring # General docstring
_CONFIG_FOR_DOC = "DeiTConfig" _CONFIG_FOR_DOC = "DeiTConfig"
_FEAT_EXTRACTOR_FOR_DOC = "DeiTFeatureExtractor" _FEAT_EXTRACTOR_FOR_DOC = "DeiTImageProcessor"
# Base docstring # Base docstring
_CHECKPOINT_FOR_DOC = "facebook/deit-base-distilled-patch16-224" _CHECKPOINT_FOR_DOC = "facebook/deit-base-distilled-patch16-224"
...@@ -433,8 +433,8 @@ DEIT_START_DOCSTRING = r""" ...@@ -433,8 +433,8 @@ DEIT_START_DOCSTRING = r"""
DEIT_INPUTS_DOCSTRING = r""" DEIT_INPUTS_DOCSTRING = r"""
Args: Args:
pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`): pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
Pixel values. Pixel values can be obtained using [`DeiTFeatureExtractor`]. See Pixel values. Pixel values can be obtained using [`DeiTImageProcessor`]. See
[`DeiTFeatureExtractor.__call__`] for details. [`DeiTImageProcessor.__call__`] for details.
head_mask (`torch.FloatTensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*): head_mask (`torch.FloatTensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`: Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:
...@@ -611,7 +611,7 @@ class DeiTForMaskedImageModeling(DeiTPreTrainedModel): ...@@ -611,7 +611,7 @@ class DeiTForMaskedImageModeling(DeiTPreTrainedModel):
Examples: Examples:
```python ```python
>>> from transformers import DeiTFeatureExtractor, DeiTForMaskedImageModeling >>> from transformers import DeiTImageProcessor, DeiTForMaskedImageModeling
>>> import torch >>> import torch
>>> from PIL import Image >>> from PIL import Image
>>> import requests >>> import requests
...@@ -619,11 +619,11 @@ class DeiTForMaskedImageModeling(DeiTPreTrainedModel): ...@@ -619,11 +619,11 @@ class DeiTForMaskedImageModeling(DeiTPreTrainedModel):
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw) >>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = DeiTFeatureExtractor.from_pretrained("facebook/deit-base-distilled-patch16-224") >>> image_processor = DeiTImageProcessor.from_pretrained("facebook/deit-base-distilled-patch16-224")
>>> model = DeiTForMaskedImageModeling.from_pretrained("facebook/deit-base-distilled-patch16-224") >>> model = DeiTForMaskedImageModeling.from_pretrained("facebook/deit-base-distilled-patch16-224")
>>> num_patches = (model.config.image_size // model.config.patch_size) ** 2 >>> num_patches = (model.config.image_size // model.config.patch_size) ** 2
>>> pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values >>> pixel_values = image_processor(images=image, return_tensors="pt").pixel_values
>>> # create random boolean mask of shape (batch_size, num_patches) >>> # create random boolean mask of shape (batch_size, num_patches)
>>> bool_masked_pos = torch.randint(low=0, high=2, size=(1, num_patches)).bool() >>> bool_masked_pos = torch.randint(low=0, high=2, size=(1, num_patches)).bool()
...@@ -721,7 +721,7 @@ class DeiTForImageClassification(DeiTPreTrainedModel): ...@@ -721,7 +721,7 @@ class DeiTForImageClassification(DeiTPreTrainedModel):
Examples: Examples:
```python ```python
>>> from transformers import DeiTFeatureExtractor, DeiTForImageClassification >>> from transformers import DeiTImageProcessor, DeiTForImageClassification
>>> import torch >>> import torch
>>> from PIL import Image >>> from PIL import Image
>>> import requests >>> import requests
...@@ -732,10 +732,10 @@ class DeiTForImageClassification(DeiTPreTrainedModel): ...@@ -732,10 +732,10 @@ class DeiTForImageClassification(DeiTPreTrainedModel):
>>> # note: we are loading a DeiTForImageClassificationWithTeacher from the hub here, >>> # note: we are loading a DeiTForImageClassificationWithTeacher from the hub here,
>>> # so the head will be randomly initialized, hence the predictions will be random >>> # so the head will be randomly initialized, hence the predictions will be random
>>> feature_extractor = DeiTFeatureExtractor.from_pretrained("facebook/deit-base-distilled-patch16-224") >>> image_processor = DeiTImageProcessor.from_pretrained("facebook/deit-base-distilled-patch16-224")
>>> model = DeiTForImageClassification.from_pretrained("facebook/deit-base-distilled-patch16-224") >>> model = DeiTForImageClassification.from_pretrained("facebook/deit-base-distilled-patch16-224")
>>> inputs = feature_extractor(images=image, return_tensors="pt") >>> inputs = image_processor(images=image, return_tensors="pt")
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
>>> logits = outputs.logits >>> logits = outputs.logits
>>> # model predicts one of the 1000 ImageNet classes >>> # model predicts one of the 1000 ImageNet classes
......
...@@ -52,7 +52,7 @@ logger = logging.get_logger(__name__) ...@@ -52,7 +52,7 @@ logger = logging.get_logger(__name__)
# General docstring # General docstring
_CONFIG_FOR_DOC = "DeiTConfig" _CONFIG_FOR_DOC = "DeiTConfig"
_FEAT_EXTRACTOR_FOR_DOC = "DeiTFeatureExtractor" _FEAT_EXTRACTOR_FOR_DOC = "DeiTImageProcessor"
# Base docstring # Base docstring
_CHECKPOINT_FOR_DOC = "facebook/deit-base-distilled-patch16-224" _CHECKPOINT_FOR_DOC = "facebook/deit-base-distilled-patch16-224"
...@@ -614,8 +614,8 @@ DEIT_START_DOCSTRING = r""" ...@@ -614,8 +614,8 @@ DEIT_START_DOCSTRING = r"""
DEIT_INPUTS_DOCSTRING = r""" DEIT_INPUTS_DOCSTRING = r"""
Args: Args:
pixel_values (`tf.Tensor` of shape `(batch_size, num_channels, height, width)`): pixel_values (`tf.Tensor` of shape `(batch_size, num_channels, height, width)`):
Pixel values. Pixel values can be obtained using [`DeiTFeatureExtractor`]. See Pixel values. Pixel values can be obtained using [`DeiTImageProcessor`]. See
[`DeiTFeatureExtractor.__call__`] for details. [`DeiTImageProcessor.__call__`] for details.
head_mask (`tf.Tensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*): head_mask (`tf.Tensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`: Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:
...@@ -786,7 +786,7 @@ class TFDeiTForMaskedImageModeling(TFDeiTPreTrainedModel): ...@@ -786,7 +786,7 @@ class TFDeiTForMaskedImageModeling(TFDeiTPreTrainedModel):
Examples: Examples:
```python ```python
>>> from transformers import DeiTFeatureExtractor, TFDeiTForMaskedImageModeling >>> from transformers import DeiTImageProcessor, TFDeiTForMaskedImageModeling
>>> import tensorflow as tf >>> import tensorflow as tf
>>> from PIL import Image >>> from PIL import Image
>>> import requests >>> import requests
...@@ -794,11 +794,11 @@ class TFDeiTForMaskedImageModeling(TFDeiTPreTrainedModel): ...@@ -794,11 +794,11 @@ class TFDeiTForMaskedImageModeling(TFDeiTPreTrainedModel):
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw) >>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = DeiTFeatureExtractor.from_pretrained("facebook/deit-base-distilled-patch16-224") >>> image_processor = DeiTImageProcessor.from_pretrained("facebook/deit-base-distilled-patch16-224")
>>> model = TFDeiTForMaskedImageModeling.from_pretrained("facebook/deit-base-distilled-patch16-224") >>> model = TFDeiTForMaskedImageModeling.from_pretrained("facebook/deit-base-distilled-patch16-224")
>>> num_patches = (model.config.image_size // model.config.patch_size) ** 2 >>> num_patches = (model.config.image_size // model.config.patch_size) ** 2
>>> pixel_values = feature_extractor(images=image, return_tensors="tf").pixel_values >>> pixel_values = image_processor(images=image, return_tensors="tf").pixel_values
>>> # create random boolean mask of shape (batch_size, num_patches) >>> # create random boolean mask of shape (batch_size, num_patches)
>>> bool_masked_pos = tf.cast(tf.random.uniform((1, num_patches), minval=0, maxval=2, dtype=tf.int32), tf.bool) >>> bool_masked_pos = tf.cast(tf.random.uniform((1, num_patches), minval=0, maxval=2, dtype=tf.int32), tf.bool)
...@@ -917,7 +917,7 @@ class TFDeiTForImageClassification(TFDeiTPreTrainedModel, TFSequenceClassificati ...@@ -917,7 +917,7 @@ class TFDeiTForImageClassification(TFDeiTPreTrainedModel, TFSequenceClassificati
Examples: Examples:
```python ```python
>>> from transformers import DeiTFeatureExtractor, TFDeiTForImageClassification >>> from transformers import DeiTImageProcessor, TFDeiTForImageClassification
>>> import tensorflow as tf >>> import tensorflow as tf
>>> from PIL import Image >>> from PIL import Image
>>> import requests >>> import requests
...@@ -928,10 +928,10 @@ class TFDeiTForImageClassification(TFDeiTPreTrainedModel, TFSequenceClassificati ...@@ -928,10 +928,10 @@ class TFDeiTForImageClassification(TFDeiTPreTrainedModel, TFSequenceClassificati
>>> # note: we are loading a TFDeiTForImageClassificationWithTeacher from the hub here, >>> # note: we are loading a TFDeiTForImageClassificationWithTeacher from the hub here,
>>> # so the head will be randomly initialized, hence the predictions will be random >>> # so the head will be randomly initialized, hence the predictions will be random
>>> feature_extractor = DeiTFeatureExtractor.from_pretrained("facebook/deit-base-distilled-patch16-224") >>> image_processor = DeiTImageProcessor.from_pretrained("facebook/deit-base-distilled-patch16-224")
>>> model = TFDeiTForImageClassification.from_pretrained("facebook/deit-base-distilled-patch16-224") >>> model = TFDeiTForImageClassification.from_pretrained("facebook/deit-base-distilled-patch16-224")
>>> inputs = feature_extractor(images=image, return_tensors="tf") >>> inputs = image_processor(images=image, return_tensors="tf")
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
>>> logits = outputs.logits >>> logits = outputs.logits
>>> # model predicts one of the 1000 ImageNet classes >>> # model predicts one of the 1000 ImageNet classes
......
...@@ -148,7 +148,7 @@ class DetrObjectDetectionOutput(ModelOutput): ...@@ -148,7 +148,7 @@ class DetrObjectDetectionOutput(ModelOutput):
pred_boxes (`torch.FloatTensor` of shape `(batch_size, num_queries, 4)`): pred_boxes (`torch.FloatTensor` of shape `(batch_size, num_queries, 4)`):
Normalized boxes coordinates for all queries, represented as (center_x, center_y, width, height). These Normalized boxes coordinates for all queries, represented as (center_x, center_y, width, height). These
values are normalized in [0, 1], relative to the size of each individual image in the batch (disregarding values are normalized in [0, 1], relative to the size of each individual image in the batch (disregarding
possible padding). You can use [`~DetrFeatureExtractor.post_process_object_detection`] to retrieve the possible padding). You can use [`~DetrImageProcessor.post_process_object_detection`] to retrieve the
unnormalized bounding boxes. unnormalized bounding boxes.
auxiliary_outputs (`list[Dict]`, *optional*): auxiliary_outputs (`list[Dict]`, *optional*):
Optional, only returned when auxilary losses are activated (i.e. `config.auxiliary_loss` is set to `True`) Optional, only returned when auxilary losses are activated (i.e. `config.auxiliary_loss` is set to `True`)
...@@ -211,13 +211,13 @@ class DetrSegmentationOutput(ModelOutput): ...@@ -211,13 +211,13 @@ class DetrSegmentationOutput(ModelOutput):
pred_boxes (`torch.FloatTensor` of shape `(batch_size, num_queries, 4)`): pred_boxes (`torch.FloatTensor` of shape `(batch_size, num_queries, 4)`):
Normalized boxes coordinates for all queries, represented as (center_x, center_y, width, height). These Normalized boxes coordinates for all queries, represented as (center_x, center_y, width, height). These
values are normalized in [0, 1], relative to the size of each individual image in the batch (disregarding values are normalized in [0, 1], relative to the size of each individual image in the batch (disregarding
possible padding). You can use [`~DetrFeatureExtractor.post_process_object_detection`] to retrieve the possible padding). You can use [`~DetrImageProcessor.post_process_object_detection`] to retrieve the
unnormalized bounding boxes. unnormalized bounding boxes.
pred_masks (`torch.FloatTensor` of shape `(batch_size, num_queries, height/4, width/4)`): pred_masks (`torch.FloatTensor` of shape `(batch_size, num_queries, height/4, width/4)`):
Segmentation masks logits for all queries. See also Segmentation masks logits for all queries. See also
[`~DetrFeatureExtractor.post_process_semantic_segmentation`] or [`~DetrImageProcessor.post_process_semantic_segmentation`] or
[`~DetrFeatureExtractor.post_process_instance_segmentation`] [`~DetrImageProcessor.post_process_instance_segmentation`]
[`~DetrFeatureExtractor.post_process_panoptic_segmentation`] to evaluate semantic, instance and panoptic [`~DetrImageProcessor.post_process_panoptic_segmentation`] to evaluate semantic, instance and panoptic
segmentation masks respectively. segmentation masks respectively.
auxiliary_outputs (`list[Dict]`, *optional*): auxiliary_outputs (`list[Dict]`, *optional*):
Optional, only returned when auxiliary losses are activated (i.e. `config.auxiliary_loss` is set to `True`) Optional, only returned when auxiliary losses are activated (i.e. `config.auxiliary_loss` is set to `True`)
...@@ -856,8 +856,7 @@ DETR_INPUTS_DOCSTRING = r""" ...@@ -856,8 +856,7 @@ DETR_INPUTS_DOCSTRING = r"""
pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`): pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
Pixel values. Padding will be ignored by default should you provide it. Pixel values. Padding will be ignored by default should you provide it.
Pixel values can be obtained using [`DetrFeatureExtractor`]. See [`DetrFeatureExtractor.__call__`] for Pixel values can be obtained using [`DetrImageProcessor`]. See [`DetrImageProcessor.__call__`] for details.
details.
pixel_mask (`torch.LongTensor` of shape `(batch_size, height, width)`, *optional*): pixel_mask (`torch.LongTensor` of shape `(batch_size, height, width)`, *optional*):
Mask to avoid performing attention on padding pixel values. Mask values selected in `[0, 1]`: Mask to avoid performing attention on padding pixel values. Mask values selected in `[0, 1]`:
...@@ -1243,18 +1242,18 @@ class DetrModel(DetrPreTrainedModel): ...@@ -1243,18 +1242,18 @@ class DetrModel(DetrPreTrainedModel):
Examples: Examples:
```python ```python
>>> from transformers import DetrFeatureExtractor, DetrModel >>> from transformers import DetrImageProcessor, DetrModel
>>> from PIL import Image >>> from PIL import Image
>>> import requests >>> import requests
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw) >>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = DetrFeatureExtractor.from_pretrained("facebook/detr-resnet-50") >>> image_processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
>>> model = DetrModel.from_pretrained("facebook/detr-resnet-50") >>> model = DetrModel.from_pretrained("facebook/detr-resnet-50")
>>> # prepare image for the model >>> # prepare image for the model
>>> inputs = feature_extractor(images=image, return_tensors="pt") >>> inputs = image_processor(images=image, return_tensors="pt")
>>> # forward pass >>> # forward pass
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
...@@ -1410,7 +1409,7 @@ class DetrForObjectDetection(DetrPreTrainedModel): ...@@ -1410,7 +1409,7 @@ class DetrForObjectDetection(DetrPreTrainedModel):
Examples: Examples:
```python ```python
>>> from transformers import DetrFeatureExtractor, DetrForObjectDetection >>> from transformers import DetrImageProcessor, DetrForObjectDetection
>>> import torch >>> import torch
>>> from PIL import Image >>> from PIL import Image
>>> import requests >>> import requests
...@@ -1418,17 +1417,17 @@ class DetrForObjectDetection(DetrPreTrainedModel): ...@@ -1418,17 +1417,17 @@ class DetrForObjectDetection(DetrPreTrainedModel):
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw) >>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = DetrFeatureExtractor.from_pretrained("facebook/detr-resnet-50") >>> image_processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
>>> model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50") >>> model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
>>> inputs = feature_extractor(images=image, return_tensors="pt") >>> inputs = image_processor(images=image, return_tensors="pt")
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
>>> # convert outputs (bounding boxes and class logits) to COCO API >>> # convert outputs (bounding boxes and class logits) to COCO API
>>> target_sizes = torch.tensor([image.size[::-1]]) >>> target_sizes = torch.tensor([image.size[::-1]])
>>> results = feature_extractor.post_process_object_detection( >>> results = image_processor.post_process_object_detection(outputs, threshold=0.9, target_sizes=target_sizes)[
... outputs, threshold=0.9, target_sizes=target_sizes ... 0
... )[0] ... ]
>>> for score, label, box in zip(results["scores"], results["labels"], results["boxes"]): >>> for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
... box = [round(i, 2) for i in box.tolist()] ... box = [round(i, 2) for i in box.tolist()]
...@@ -1588,24 +1587,24 @@ class DetrForSegmentation(DetrPreTrainedModel): ...@@ -1588,24 +1587,24 @@ class DetrForSegmentation(DetrPreTrainedModel):
>>> import torch >>> import torch
>>> import numpy >>> import numpy
>>> from transformers import DetrFeatureExtractor, DetrForSegmentation >>> from transformers import DetrImageProcessor, DetrForSegmentation
>>> from transformers.image_transforms import rgb_to_id >>> from transformers.image_transforms import rgb_to_id
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw) >>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = DetrFeatureExtractor.from_pretrained("facebook/detr-resnet-50-panoptic") >>> image_processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50-panoptic")
>>> model = DetrForSegmentation.from_pretrained("facebook/detr-resnet-50-panoptic") >>> model = DetrForSegmentation.from_pretrained("facebook/detr-resnet-50-panoptic")
>>> # prepare image for the model >>> # prepare image for the model
>>> inputs = feature_extractor(images=image, return_tensors="pt") >>> inputs = image_processor(images=image, return_tensors="pt")
>>> # forward pass >>> # forward pass
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
>>> # Use the `post_process_panoptic_segmentation` method of `DetrFeatureExtractor` to retrieve post-processed panoptic segmentation maps >>> # Use the `post_process_panoptic_segmentation` method of `DetrImageProcessor` to retrieve post-processed panoptic segmentation maps
>>> # Segmentation results are returned as a list of dictionaries >>> # Segmentation results are returned as a list of dictionaries
>>> result = feature_extractor.post_process_panoptic_segmentation(outputs, target_sizes=[(300, 500)]) >>> result = image_processor.post_process_panoptic_segmentation(outputs, target_sizes=[(300, 500)])
>>> # A tensor of shape (height, width) where each value denotes a segment id, filled with -1 if no segment is found >>> # A tensor of shape (height, width) where each value denotes a segment id, filled with -1 if no segment is found
>>> panoptic_seg = result[0]["segmentation"] >>> panoptic_seg = result[0]["segmentation"]
......
...@@ -52,7 +52,7 @@ logger = logging.get_logger(__name__) ...@@ -52,7 +52,7 @@ logger = logging.get_logger(__name__)
# General docstring # General docstring
_CONFIG_FOR_DOC = "DPTConfig" _CONFIG_FOR_DOC = "DPTConfig"
_FEAT_EXTRACTOR_FOR_DOC = "DPTFeatureExtractor" _FEAT_EXTRACTOR_FOR_DOC = "DPTImageProcessor"
# Base docstring # Base docstring
_CHECKPOINT_FOR_DOC = "Intel/dpt-large" _CHECKPOINT_FOR_DOC = "Intel/dpt-large"
...@@ -651,8 +651,8 @@ DPT_START_DOCSTRING = r""" ...@@ -651,8 +651,8 @@ DPT_START_DOCSTRING = r"""
DPT_INPUTS_DOCSTRING = r""" DPT_INPUTS_DOCSTRING = r"""
Args: Args:
pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`): pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
Pixel values. Pixel values can be obtained using [`ViTFeatureExtractor`]. See Pixel values. Pixel values can be obtained using [`ViTImageProcessor`]. See [`ViTImageProcessor.__call__`]
[`ViTFeatureExtractor.__call__`] for details. for details.
head_mask (`torch.FloatTensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*): head_mask (`torch.FloatTensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`: Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:
...@@ -890,7 +890,7 @@ class DPTForDepthEstimation(DPTPreTrainedModel): ...@@ -890,7 +890,7 @@ class DPTForDepthEstimation(DPTPreTrainedModel):
Examples: Examples:
```python ```python
>>> from transformers import DPTFeatureExtractor, DPTForDepthEstimation >>> from transformers import DPTImageProcessor, DPTForDepthEstimation
>>> import torch >>> import torch
>>> import numpy as np >>> import numpy as np
>>> from PIL import Image >>> from PIL import Image
...@@ -899,11 +899,11 @@ class DPTForDepthEstimation(DPTPreTrainedModel): ...@@ -899,11 +899,11 @@ class DPTForDepthEstimation(DPTPreTrainedModel):
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw) >>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large") >>> image_processor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
>>> model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large") >>> model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
>>> # prepare image for the model >>> # prepare image for the model
>>> inputs = feature_extractor(images=image, return_tensors="pt") >>> inputs = image_processor(images=image, return_tensors="pt")
>>> with torch.no_grad(): >>> with torch.no_grad():
... outputs = model(**inputs) ... outputs = model(**inputs)
...@@ -1052,17 +1052,17 @@ class DPTForSemanticSegmentation(DPTPreTrainedModel): ...@@ -1052,17 +1052,17 @@ class DPTForSemanticSegmentation(DPTPreTrainedModel):
Examples: Examples:
```python ```python
>>> from transformers import DPTFeatureExtractor, DPTForSemanticSegmentation >>> from transformers import DPTImageProcessor, DPTForSemanticSegmentation
>>> from PIL import Image >>> from PIL import Image
>>> import requests >>> import requests
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw) >>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large-ade") >>> image_processor = DPTImageProcessor.from_pretrained("Intel/dpt-large-ade")
>>> model = DPTForSemanticSegmentation.from_pretrained("Intel/dpt-large-ade") >>> model = DPTForSemanticSegmentation.from_pretrained("Intel/dpt-large-ade")
>>> inputs = feature_extractor(images=image, return_tensors="pt") >>> inputs = image_processor(images=image, return_tensors="pt")
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
>>> logits = outputs.logits >>> logits = outputs.logits
......
...@@ -41,7 +41,7 @@ logger = logging.get_logger(__name__) ...@@ -41,7 +41,7 @@ logger = logging.get_logger(__name__)
# General docstring # General docstring
_CONFIG_FOR_DOC = "GLPNConfig" _CONFIG_FOR_DOC = "GLPNConfig"
_FEAT_EXTRACTOR_FOR_DOC = "GLPNFeatureExtractor" _FEAT_EXTRACTOR_FOR_DOC = "GLPNImageProcessor"
# Base docstring # Base docstring
_CHECKPOINT_FOR_DOC = "vinvino02/glpn-kitti" _CHECKPOINT_FOR_DOC = "vinvino02/glpn-kitti"
...@@ -464,7 +464,7 @@ GLPN_INPUTS_DOCSTRING = r""" ...@@ -464,7 +464,7 @@ GLPN_INPUTS_DOCSTRING = r"""
Args: Args:
pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`): pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
Pixel values. Padding will be ignored by default should you provide it. Pixel values can be obtained using Pixel values. Padding will be ignored by default should you provide it. Pixel values can be obtained using
[`GLPNFeatureExtractor`]. See [`GLPNFeatureExtractor.__call__`] for details. [`GLPNImageProcessor`]. See [`GLPNImageProcessor.__call__`] for details.
output_attentions (`bool`, *optional*): output_attentions (`bool`, *optional*):
Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
...@@ -713,7 +713,7 @@ class GLPNForDepthEstimation(GLPNPreTrainedModel): ...@@ -713,7 +713,7 @@ class GLPNForDepthEstimation(GLPNPreTrainedModel):
Examples: Examples:
```python ```python
>>> from transformers import GLPNFeatureExtractor, GLPNForDepthEstimation >>> from transformers import GLPNImageProcessor, GLPNForDepthEstimation
>>> import torch >>> import torch
>>> import numpy as np >>> import numpy as np
>>> from PIL import Image >>> from PIL import Image
...@@ -722,11 +722,11 @@ class GLPNForDepthEstimation(GLPNPreTrainedModel): ...@@ -722,11 +722,11 @@ class GLPNForDepthEstimation(GLPNPreTrainedModel):
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw) >>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = GLPNFeatureExtractor.from_pretrained("vinvino02/glpn-kitti") >>> image_processor = GLPNImageProcessor.from_pretrained("vinvino02/glpn-kitti")
>>> model = GLPNForDepthEstimation.from_pretrained("vinvino02/glpn-kitti") >>> model = GLPNForDepthEstimation.from_pretrained("vinvino02/glpn-kitti")
>>> # prepare image for the model >>> # prepare image for the model
>>> inputs = feature_extractor(images=image, return_tensors="pt") >>> inputs = image_processor(images=image, return_tensors="pt")
>>> with torch.no_grad(): >>> with torch.no_grad():
... outputs = model(**inputs) ... outputs = model(**inputs)
......
...@@ -556,7 +556,7 @@ IMAGEGPT_INPUTS_DOCSTRING = r""" ...@@ -556,7 +556,7 @@ IMAGEGPT_INPUTS_DOCSTRING = r"""
If `past_key_values` is used, only `input_ids` that do not have their past calculated should be passed as If `past_key_values` is used, only `input_ids` that do not have their past calculated should be passed as
`input_ids`. `input_ids`.
Indices can be obtained using [`ImageGPTFeatureExtractor`]. See [`ImageGPTFeatureExtractor.__call__`] for Indices can be obtained using [`ImageGPTImageProcessor`]. See [`ImageGPTImageProcessor.__call__`] for
details. details.
past_key_values (`Tuple[Tuple[torch.Tensor]]` of length `config.n_layers`): past_key_values (`Tuple[Tuple[torch.Tensor]]` of length `config.n_layers`):
...@@ -679,17 +679,17 @@ class ImageGPTModel(ImageGPTPreTrainedModel): ...@@ -679,17 +679,17 @@ class ImageGPTModel(ImageGPTPreTrainedModel):
Examples: Examples:
```python ```python
>>> from transformers import ImageGPTFeatureExtractor, ImageGPTModel >>> from transformers import ImageGPTImageProcessor, ImageGPTModel
>>> from PIL import Image >>> from PIL import Image
>>> import requests >>> import requests
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw) >>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = ImageGPTFeatureExtractor.from_pretrained("openai/imagegpt-small") >>> image_processor = ImageGPTImageProcessor.from_pretrained("openai/imagegpt-small")
>>> model = ImageGPTModel.from_pretrained("openai/imagegpt-small") >>> model = ImageGPTModel.from_pretrained("openai/imagegpt-small")
>>> inputs = feature_extractor(images=image, return_tensors="pt") >>> inputs = image_processor(images=image, return_tensors="pt")
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
>>> last_hidden_states = outputs.last_hidden_state >>> last_hidden_states = outputs.last_hidden_state
```""" ```"""
...@@ -973,12 +973,12 @@ class ImageGPTForCausalImageModeling(ImageGPTPreTrainedModel): ...@@ -973,12 +973,12 @@ class ImageGPTForCausalImageModeling(ImageGPTPreTrainedModel):
Examples: Examples:
```python ```python
>>> from transformers import ImageGPTFeatureExtractor, ImageGPTForCausalImageModeling >>> from transformers import ImageGPTImageProcessor, ImageGPTForCausalImageModeling
>>> import torch >>> import torch
>>> import matplotlib.pyplot as plt >>> import matplotlib.pyplot as plt
>>> import numpy as np >>> import numpy as np
>>> feature_extractor = ImageGPTFeatureExtractor.from_pretrained("openai/imagegpt-small") >>> image_processor = ImageGPTImageProcessor.from_pretrained("openai/imagegpt-small")
>>> model = ImageGPTForCausalImageModeling.from_pretrained("openai/imagegpt-small") >>> model = ImageGPTForCausalImageModeling.from_pretrained("openai/imagegpt-small")
>>> device = torch.device("cuda" if torch.cuda.is_available() else "cpu") >>> device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
>>> model.to(device) >>> model.to(device)
...@@ -991,9 +991,9 @@ class ImageGPTForCausalImageModeling(ImageGPTPreTrainedModel): ...@@ -991,9 +991,9 @@ class ImageGPTForCausalImageModeling(ImageGPTPreTrainedModel):
... input_ids=context, max_length=model.config.n_positions + 1, temperature=1.0, do_sample=True, top_k=40 ... input_ids=context, max_length=model.config.n_positions + 1, temperature=1.0, do_sample=True, top_k=40
... ) ... )
>>> clusters = feature_extractor.clusters >>> clusters = image_processor.clusters
>>> height = feature_extractor.size["height"] >>> height = image_processor.size["height"]
>>> width = feature_extractor.size["width"] >>> width = image_processor.size["width"]
>>> samples = output[:, 1:].cpu().detach().numpy() >>> samples = output[:, 1:].cpu().detach().numpy()
>>> samples_img = [ >>> samples_img = [
...@@ -1124,17 +1124,17 @@ class ImageGPTForImageClassification(ImageGPTPreTrainedModel): ...@@ -1124,17 +1124,17 @@ class ImageGPTForImageClassification(ImageGPTPreTrainedModel):
Examples: Examples:
```python ```python
>>> from transformers import ImageGPTFeatureExtractor, ImageGPTForImageClassification >>> from transformers import ImageGPTImageProcessor, ImageGPTForImageClassification
>>> from PIL import Image >>> from PIL import Image
>>> import requests >>> import requests
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw) >>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = ImageGPTFeatureExtractor.from_pretrained("openai/imagegpt-small") >>> image_processor = ImageGPTImageProcessor.from_pretrained("openai/imagegpt-small")
>>> model = ImageGPTForImageClassification.from_pretrained("openai/imagegpt-small") >>> model = ImageGPTForImageClassification.from_pretrained("openai/imagegpt-small")
>>> inputs = feature_extractor(images=image, return_tensors="pt") >>> inputs = image_processor(images=image, return_tensors="pt")
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
>>> logits = outputs.logits >>> logits = outputs.logits
```""" ```"""
......
...@@ -38,7 +38,7 @@ logger = logging.get_logger(__name__) ...@@ -38,7 +38,7 @@ logger = logging.get_logger(__name__)
# General docstring # General docstring
_CONFIG_FOR_DOC = "LevitConfig" _CONFIG_FOR_DOC = "LevitConfig"
_FEAT_EXTRACTOR_FOR_DOC = "LevitFeatureExtractor" _FEAT_EXTRACTOR_FOR_DOC = "LevitImageProcessor"
# Base docstring # Base docstring
_CHECKPOINT_FOR_DOC = "facebook/levit-128S" _CHECKPOINT_FOR_DOC = "facebook/levit-128S"
...@@ -523,8 +523,8 @@ LEVIT_START_DOCSTRING = r""" ...@@ -523,8 +523,8 @@ LEVIT_START_DOCSTRING = r"""
LEVIT_INPUTS_DOCSTRING = r""" LEVIT_INPUTS_DOCSTRING = r"""
Args: Args:
pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`): pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
Pixel values. Pixel values can be obtained using [`AutoFeatureExtractor`]. See Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See
[`AutoFeatureExtractor.__call__`] for details. [`AutoImageProcessor.__call__`] for details.
output_hidden_states (`bool`, *optional*): output_hidden_states (`bool`, *optional*):
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
......
...@@ -51,7 +51,7 @@ logger = logging.get_logger(__name__) ...@@ -51,7 +51,7 @@ logger = logging.get_logger(__name__)
_CONFIG_FOR_DOC = "MaskFormerConfig" _CONFIG_FOR_DOC = "MaskFormerConfig"
_CHECKPOINT_FOR_DOC = "facebook/maskformer-swin-base-ade" _CHECKPOINT_FOR_DOC = "facebook/maskformer-swin-base-ade"
_FEAT_EXTRACTOR_FOR_DOC = "MaskFormerFeatureExtractor" _FEAT_EXTRACTOR_FOR_DOC = "MaskFormerImageProcessor"
MASKFORMER_PRETRAINED_MODEL_ARCHIVE_LIST = [ MASKFORMER_PRETRAINED_MODEL_ARCHIVE_LIST = [
"facebook/maskformer-swin-base-ade", "facebook/maskformer-swin-base-ade",
...@@ -192,10 +192,10 @@ class MaskFormerForInstanceSegmentationOutput(ModelOutput): ...@@ -192,10 +192,10 @@ class MaskFormerForInstanceSegmentationOutput(ModelOutput):
""" """
Class for outputs of [`MaskFormerForInstanceSegmentation`]. Class for outputs of [`MaskFormerForInstanceSegmentation`].
This output can be directly passed to [`~MaskFormerFeatureExtractor.post_process_semantic_segmentation`] or or This output can be directly passed to [`~MaskFormerImageProcessor.post_process_semantic_segmentation`] or or
[`~MaskFormerFeatureExtractor.post_process_instance_segmentation`] or [`~MaskFormerImageProcessor.post_process_instance_segmentation`] or
[`~MaskFormerFeatureExtractor.post_process_panoptic_segmentation`] depending on the task. Please, see [`~MaskFormerImageProcessor.post_process_panoptic_segmentation`] depending on the task. Please, see
[`~MaskFormerFeatureExtractor] for details regarding usage. [`~MaskFormerImageProcessor] for details regarding usage.
Args: Args:
loss (`torch.Tensor`, *optional*): loss (`torch.Tensor`, *optional*):
...@@ -1462,8 +1462,8 @@ MASKFORMER_START_DOCSTRING = r""" ...@@ -1462,8 +1462,8 @@ MASKFORMER_START_DOCSTRING = r"""
MASKFORMER_INPUTS_DOCSTRING = r""" MASKFORMER_INPUTS_DOCSTRING = r"""
Args: Args:
pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`): pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
Pixel values. Pixel values can be obtained using [`AutoFeatureExtractor`]. See Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See
[`AutoFeatureExtractor.__call__`] for details. [`AutoImageProcessor.__call__`] for details.
pixel_mask (`torch.LongTensor` of shape `(batch_size, height, width)`, *optional*): pixel_mask (`torch.LongTensor` of shape `(batch_size, height, width)`, *optional*):
Mask to avoid performing attention on padding pixel values. Mask values selected in `[0, 1]`: Mask to avoid performing attention on padding pixel values. Mask values selected in `[0, 1]`:
...@@ -1562,18 +1562,18 @@ class MaskFormerModel(MaskFormerPreTrainedModel): ...@@ -1562,18 +1562,18 @@ class MaskFormerModel(MaskFormerPreTrainedModel):
Examples: Examples:
```python ```python
>>> from transformers import MaskFormerFeatureExtractor, MaskFormerModel >>> from transformers import MaskFormerImageProcessor, MaskFormerModel
>>> from PIL import Image >>> from PIL import Image
>>> import requests >>> import requests
>>> # load MaskFormer fine-tuned on ADE20k semantic segmentation >>> # load MaskFormer fine-tuned on ADE20k semantic segmentation
>>> feature_extractor = MaskFormerFeatureExtractor.from_pretrained("facebook/maskformer-swin-base-ade") >>> image_processor = MaskFormerImageProcessor.from_pretrained("facebook/maskformer-swin-base-ade")
>>> model = MaskFormerModel.from_pretrained("facebook/maskformer-swin-base-ade") >>> model = MaskFormerModel.from_pretrained("facebook/maskformer-swin-base-ade")
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw) >>> image = Image.open(requests.get(url, stream=True).raw)
>>> inputs = feature_extractor(image, return_tensors="pt") >>> inputs = image_processor(image, return_tensors="pt")
>>> # forward pass >>> # forward pass
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
...@@ -1741,19 +1741,19 @@ class MaskFormerForInstanceSegmentation(MaskFormerPreTrainedModel): ...@@ -1741,19 +1741,19 @@ class MaskFormerForInstanceSegmentation(MaskFormerPreTrainedModel):
Semantic segmentation example: Semantic segmentation example:
```python ```python
>>> from transformers import MaskFormerFeatureExtractor, MaskFormerForInstanceSegmentation >>> from transformers import MaskFormerImageProcessor, MaskFormerForInstanceSegmentation
>>> from PIL import Image >>> from PIL import Image
>>> import requests >>> import requests
>>> # load MaskFormer fine-tuned on ADE20k semantic segmentation >>> # load MaskFormer fine-tuned on ADE20k semantic segmentation
>>> feature_extractor = MaskFormerFeatureExtractor.from_pretrained("facebook/maskformer-swin-base-ade") >>> image_processor = MaskFormerImageProcessor.from_pretrained("facebook/maskformer-swin-base-ade")
>>> model = MaskFormerForInstanceSegmentation.from_pretrained("facebook/maskformer-swin-base-ade") >>> model = MaskFormerForInstanceSegmentation.from_pretrained("facebook/maskformer-swin-base-ade")
>>> url = ( >>> url = (
... "https://huggingface.co/datasets/hf-internal-testing/fixtures_ade20k/resolve/main/ADE_val_00000001.jpg" ... "https://huggingface.co/datasets/hf-internal-testing/fixtures_ade20k/resolve/main/ADE_val_00000001.jpg"
... ) ... )
>>> image = Image.open(requests.get(url, stream=True).raw) >>> image = Image.open(requests.get(url, stream=True).raw)
>>> inputs = feature_extractor(images=image, return_tensors="pt") >>> inputs = image_processor(images=image, return_tensors="pt")
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
>>> # model predicts class_queries_logits of shape `(batch_size, num_queries)` >>> # model predicts class_queries_logits of shape `(batch_size, num_queries)`
...@@ -1761,8 +1761,8 @@ class MaskFormerForInstanceSegmentation(MaskFormerPreTrainedModel): ...@@ -1761,8 +1761,8 @@ class MaskFormerForInstanceSegmentation(MaskFormerPreTrainedModel):
>>> class_queries_logits = outputs.class_queries_logits >>> class_queries_logits = outputs.class_queries_logits
>>> masks_queries_logits = outputs.masks_queries_logits >>> masks_queries_logits = outputs.masks_queries_logits
>>> # you can pass them to feature_extractor for postprocessing >>> # you can pass them to image_processor for postprocessing
>>> predicted_semantic_map = feature_extractor.post_process_semantic_segmentation( >>> predicted_semantic_map = image_processor.post_process_semantic_segmentation(
... outputs, target_sizes=[image.size[::-1]] ... outputs, target_sizes=[image.size[::-1]]
... )[0] ... )[0]
...@@ -1774,17 +1774,17 @@ class MaskFormerForInstanceSegmentation(MaskFormerPreTrainedModel): ...@@ -1774,17 +1774,17 @@ class MaskFormerForInstanceSegmentation(MaskFormerPreTrainedModel):
Panoptic segmentation example: Panoptic segmentation example:
```python ```python
>>> from transformers import MaskFormerFeatureExtractor, MaskFormerForInstanceSegmentation >>> from transformers import MaskFormerImageProcessor, MaskFormerForInstanceSegmentation
>>> from PIL import Image >>> from PIL import Image
>>> import requests >>> import requests
>>> # load MaskFormer fine-tuned on COCO panoptic segmentation >>> # load MaskFormer fine-tuned on COCO panoptic segmentation
>>> feature_extractor = MaskFormerFeatureExtractor.from_pretrained("facebook/maskformer-swin-base-coco") >>> image_processor = MaskFormerImageProcessor.from_pretrained("facebook/maskformer-swin-base-coco")
>>> model = MaskFormerForInstanceSegmentation.from_pretrained("facebook/maskformer-swin-base-coco") >>> model = MaskFormerForInstanceSegmentation.from_pretrained("facebook/maskformer-swin-base-coco")
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw) >>> image = Image.open(requests.get(url, stream=True).raw)
>>> inputs = feature_extractor(images=image, return_tensors="pt") >>> inputs = image_processor(images=image, return_tensors="pt")
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
>>> # model predicts class_queries_logits of shape `(batch_size, num_queries)` >>> # model predicts class_queries_logits of shape `(batch_size, num_queries)`
...@@ -1792,8 +1792,8 @@ class MaskFormerForInstanceSegmentation(MaskFormerPreTrainedModel): ...@@ -1792,8 +1792,8 @@ class MaskFormerForInstanceSegmentation(MaskFormerPreTrainedModel):
>>> class_queries_logits = outputs.class_queries_logits >>> class_queries_logits = outputs.class_queries_logits
>>> masks_queries_logits = outputs.masks_queries_logits >>> masks_queries_logits = outputs.masks_queries_logits
>>> # you can pass them to feature_extractor for postprocessing >>> # you can pass them to image_processor for postprocessing
>>> result = feature_extractor.post_process_panoptic_segmentation(outputs, target_sizes=[image.size[::-1]])[0] >>> result = image_processor.post_process_panoptic_segmentation(outputs, target_sizes=[image.size[::-1]])[0]
>>> # we refer to the demo notebooks for visualization (see "Resources" section in the MaskFormer docs) >>> # we refer to the demo notebooks for visualization (see "Resources" section in the MaskFormer docs)
>>> predicted_panoptic_map = result["segmentation"] >>> predicted_panoptic_map = result["segmentation"]
......
...@@ -33,7 +33,7 @@ logger = logging.get_logger(__name__) ...@@ -33,7 +33,7 @@ logger = logging.get_logger(__name__)
# General docstring # General docstring
_CONFIG_FOR_DOC = "MobileNetV1Config" _CONFIG_FOR_DOC = "MobileNetV1Config"
_FEAT_EXTRACTOR_FOR_DOC = "MobileNetV1FeatureExtractor" _FEAT_EXTRACTOR_FOR_DOC = "MobileNetV1ImageProcessor"
# Base docstring # Base docstring
_CHECKPOINT_FOR_DOC = "google/mobilenet_v1_1.0_224" _CHECKPOINT_FOR_DOC = "google/mobilenet_v1_1.0_224"
...@@ -285,8 +285,8 @@ MOBILENET_V1_START_DOCSTRING = r""" ...@@ -285,8 +285,8 @@ MOBILENET_V1_START_DOCSTRING = r"""
MOBILENET_V1_INPUTS_DOCSTRING = r""" MOBILENET_V1_INPUTS_DOCSTRING = r"""
Args: Args:
pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`): pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
Pixel values. Pixel values can be obtained using [`MobileNetV1FeatureExtractor`]. See Pixel values. Pixel values can be obtained using [`MobileNetV1ImageProcessor`]. See
[`MobileNetV1FeatureExtractor.__call__`] for details. [`MobileNetV1ImageProcessor.__call__`] for details.
output_hidden_states (`bool`, *optional*): output_hidden_states (`bool`, *optional*):
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
more detail. more detail.
......
...@@ -43,7 +43,7 @@ logger = logging.get_logger(__name__) ...@@ -43,7 +43,7 @@ logger = logging.get_logger(__name__)
# General docstring # General docstring
_CONFIG_FOR_DOC = "MobileNetV2Config" _CONFIG_FOR_DOC = "MobileNetV2Config"
_FEAT_EXTRACTOR_FOR_DOC = "MobileNetV2FeatureExtractor" _FEAT_EXTRACTOR_FOR_DOC = "MobileNetV2ImageProcessor"
# Base docstring # Base docstring
_CHECKPOINT_FOR_DOC = "google/mobilenet_v2_1.0_224" _CHECKPOINT_FOR_DOC = "google/mobilenet_v2_1.0_224"
...@@ -486,8 +486,8 @@ MOBILENET_V2_START_DOCSTRING = r""" ...@@ -486,8 +486,8 @@ MOBILENET_V2_START_DOCSTRING = r"""
MOBILENET_V2_INPUTS_DOCSTRING = r""" MOBILENET_V2_INPUTS_DOCSTRING = r"""
Args: Args:
pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`): pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
Pixel values. Pixel values can be obtained using [`MobileNetV2FeatureExtractor`]. See Pixel values. Pixel values can be obtained using [`MobileNetV2ImageProcessor`]. See
[`MobileNetV2FeatureExtractor.__call__`] for details. [`MobileNetV2ImageProcessor.__call__`] for details.
output_hidden_states (`bool`, *optional*): output_hidden_states (`bool`, *optional*):
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
more detail. more detail.
...@@ -811,17 +811,17 @@ class MobileNetV2ForSemanticSegmentation(MobileNetV2PreTrainedModel): ...@@ -811,17 +811,17 @@ class MobileNetV2ForSemanticSegmentation(MobileNetV2PreTrainedModel):
Examples: Examples:
```python ```python
>>> from transformers import MobileNetV2FeatureExtractor, MobileNetV2ForSemanticSegmentation >>> from transformers import MobileNetV2ImageProcessor, MobileNetV2ForSemanticSegmentation
>>> from PIL import Image >>> from PIL import Image
>>> import requests >>> import requests
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw) >>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = MobileNetV2FeatureExtractor.from_pretrained("google/deeplabv3_mobilenet_v2_1.0_513") >>> image_processor = MobileNetV2ImageProcessor.from_pretrained("google/deeplabv3_mobilenet_v2_1.0_513")
>>> model = MobileNetV2ForSemanticSegmentation.from_pretrained("google/deeplabv3_mobilenet_v2_1.0_513") >>> model = MobileNetV2ForSemanticSegmentation.from_pretrained("google/deeplabv3_mobilenet_v2_1.0_513")
>>> inputs = feature_extractor(images=image, return_tensors="pt") >>> inputs = image_processor(images=image, return_tensors="pt")
>>> with torch.no_grad(): >>> with torch.no_grad():
... outputs = model(**inputs) ... outputs = model(**inputs)
......
...@@ -49,7 +49,7 @@ logger = logging.get_logger(__name__) ...@@ -49,7 +49,7 @@ logger = logging.get_logger(__name__)
# General docstring # General docstring
_CONFIG_FOR_DOC = "MobileViTConfig" _CONFIG_FOR_DOC = "MobileViTConfig"
_FEAT_EXTRACTOR_FOR_DOC = "MobileViTFeatureExtractor" _FEAT_EXTRACTOR_FOR_DOC = "MobileViTImageProcessor"
# Base docstring # Base docstring
_CHECKPOINT_FOR_DOC = "apple/mobilevit-small" _CHECKPOINT_FOR_DOC = "apple/mobilevit-small"
...@@ -692,8 +692,8 @@ MOBILEVIT_START_DOCSTRING = r""" ...@@ -692,8 +692,8 @@ MOBILEVIT_START_DOCSTRING = r"""
MOBILEVIT_INPUTS_DOCSTRING = r""" MOBILEVIT_INPUTS_DOCSTRING = r"""
Args: Args:
pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`): pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
Pixel values. Pixel values can be obtained using [`MobileViTFeatureExtractor`]. See Pixel values. Pixel values can be obtained using [`MobileViTImageProcessor`]. See
[`MobileViTFeatureExtractor.__call__`] for details. [`MobileViTImageProcessor.__call__`] for details.
output_hidden_states (`bool`, *optional*): output_hidden_states (`bool`, *optional*):
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
more detail. more detail.
...@@ -1027,17 +1027,17 @@ class MobileViTForSemanticSegmentation(MobileViTPreTrainedModel): ...@@ -1027,17 +1027,17 @@ class MobileViTForSemanticSegmentation(MobileViTPreTrainedModel):
Examples: Examples:
```python ```python
>>> from transformers import MobileViTFeatureExtractor, MobileViTForSemanticSegmentation >>> from transformers import MobileViTImageProcessor, MobileViTForSemanticSegmentation
>>> from PIL import Image >>> from PIL import Image
>>> import requests >>> import requests
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw) >>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = MobileViTFeatureExtractor.from_pretrained("apple/deeplabv3-mobilevit-small") >>> image_processor = MobileViTImageProcessor.from_pretrained("apple/deeplabv3-mobilevit-small")
>>> model = MobileViTForSemanticSegmentation.from_pretrained("apple/deeplabv3-mobilevit-small") >>> model = MobileViTForSemanticSegmentation.from_pretrained("apple/deeplabv3-mobilevit-small")
>>> inputs = feature_extractor(images=image, return_tensors="pt") >>> inputs = image_processor(images=image, return_tensors="pt")
>>> with torch.no_grad(): >>> with torch.no_grad():
... outputs = model(**inputs) ... outputs = model(**inputs)
......
...@@ -43,7 +43,7 @@ logger = logging.get_logger(__name__) ...@@ -43,7 +43,7 @@ logger = logging.get_logger(__name__)
# General docstring # General docstring
_CONFIG_FOR_DOC = "MobileViTConfig" _CONFIG_FOR_DOC = "MobileViTConfig"
_FEAT_EXTRACTOR_FOR_DOC = "MobileViTFeatureExtractor" _FEAT_EXTRACTOR_FOR_DOC = "MobileViTImageProcessor"
# Base docstring # Base docstring
_CHECKPOINT_FOR_DOC = "apple/mobilevit-small" _CHECKPOINT_FOR_DOC = "apple/mobilevit-small"
...@@ -811,8 +811,8 @@ MOBILEVIT_START_DOCSTRING = r""" ...@@ -811,8 +811,8 @@ MOBILEVIT_START_DOCSTRING = r"""
MOBILEVIT_INPUTS_DOCSTRING = r""" MOBILEVIT_INPUTS_DOCSTRING = r"""
Args: Args:
pixel_values (`np.ndarray`, `tf.Tensor`, `List[tf.Tensor]`, `Dict[str, tf.Tensor]` or `Dict[str, np.ndarray]` and each example must have the shape `(batch_size, num_channels, height, width)`): pixel_values (`np.ndarray`, `tf.Tensor`, `List[tf.Tensor]`, `Dict[str, tf.Tensor]` or `Dict[str, np.ndarray]` and each example must have the shape `(batch_size, num_channels, height, width)`):
Pixel values. Pixel values can be obtained using [`MobileViTFeatureExtractor`]. See Pixel values. Pixel values can be obtained using [`MobileViTImageProcessor`]. See
[`MobileViTFeatureExtractor.__call__`] for details. [`MobileViTImageProcessor.__call__`] for details.
output_hidden_states (`bool`, *optional*): output_hidden_states (`bool`, *optional*):
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
...@@ -1103,17 +1103,17 @@ class TFMobileViTForSemanticSegmentation(TFMobileViTPreTrainedModel): ...@@ -1103,17 +1103,17 @@ class TFMobileViTForSemanticSegmentation(TFMobileViTPreTrainedModel):
Examples: Examples:
```python ```python
>>> from transformers import MobileViTFeatureExtractor, TFMobileViTForSemanticSegmentation >>> from transformers import MobileViTImageProcessor, TFMobileViTForSemanticSegmentation
>>> from PIL import Image >>> from PIL import Image
>>> import requests >>> import requests
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw) >>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = MobileViTFeatureExtractor.from_pretrained("apple/deeplabv3-mobilevit-small") >>> image_processor = MobileViTImageProcessor.from_pretrained("apple/deeplabv3-mobilevit-small")
>>> model = TFMobileViTForSemanticSegmentation.from_pretrained("apple/deeplabv3-mobilevit-small") >>> model = TFMobileViTForSemanticSegmentation.from_pretrained("apple/deeplabv3-mobilevit-small")
>>> inputs = feature_extractor(images=image, return_tensors="tf") >>> inputs = image_processor(images=image, return_tensors="tf")
>>> outputs = model(**inputs) >>> outputs = model(**inputs)
......
...@@ -768,7 +768,7 @@ class PerceiverModel(PerceiverPreTrainedModel): ...@@ -768,7 +768,7 @@ class PerceiverModel(PerceiverPreTrainedModel):
Examples: Examples:
```python ```python
>>> from transformers import PerceiverConfig, PerceiverTokenizer, PerceiverFeatureExtractor, PerceiverModel >>> from transformers import PerceiverConfig, PerceiverTokenizer, PerceiverImageProcessor, PerceiverModel
>>> from transformers.models.perceiver.modeling_perceiver import ( >>> from transformers.models.perceiver.modeling_perceiver import (
... PerceiverTextPreprocessor, ... PerceiverTextPreprocessor,
... PerceiverImagePreprocessor, ... PerceiverImagePreprocessor,
...@@ -839,10 +839,10 @@ class PerceiverModel(PerceiverPreTrainedModel): ...@@ -839,10 +839,10 @@ class PerceiverModel(PerceiverPreTrainedModel):
... ) ... )
>>> # you can then do a forward pass as follows: >>> # you can then do a forward pass as follows:
>>> feature_extractor = PerceiverFeatureExtractor() >>> image_processor = PerceiverImageProcessor()
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw) >>> image = Image.open(requests.get(url, stream=True).raw)
>>> inputs = feature_extractor(image, return_tensors="pt").pixel_values >>> inputs = image_processor(image, return_tensors="pt").pixel_values
>>> with torch.no_grad(): >>> with torch.no_grad():
... outputs = model(inputs=inputs) ... outputs = model(inputs=inputs)
...@@ -1266,17 +1266,17 @@ class PerceiverForImageClassificationLearned(PerceiverPreTrainedModel): ...@@ -1266,17 +1266,17 @@ class PerceiverForImageClassificationLearned(PerceiverPreTrainedModel):
Examples: Examples:
```python ```python
>>> from transformers import PerceiverFeatureExtractor, PerceiverForImageClassificationLearned >>> from transformers import PerceiverImageProcessor, PerceiverForImageClassificationLearned
>>> from PIL import Image >>> from PIL import Image
>>> import requests >>> import requests
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw) >>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = PerceiverFeatureExtractor.from_pretrained("deepmind/vision-perceiver-learned") >>> image_processor = PerceiverImageProcessor.from_pretrained("deepmind/vision-perceiver-learned")
>>> model = PerceiverForImageClassificationLearned.from_pretrained("deepmind/vision-perceiver-learned") >>> model = PerceiverForImageClassificationLearned.from_pretrained("deepmind/vision-perceiver-learned")
>>> inputs = feature_extractor(images=image, return_tensors="pt").pixel_values >>> inputs = image_processor(images=image, return_tensors="pt").pixel_values
>>> outputs = model(inputs=inputs) >>> outputs = model(inputs=inputs)
>>> logits = outputs.logits >>> logits = outputs.logits
>>> list(logits.shape) >>> list(logits.shape)
...@@ -1407,17 +1407,17 @@ class PerceiverForImageClassificationFourier(PerceiverPreTrainedModel): ...@@ -1407,17 +1407,17 @@ class PerceiverForImageClassificationFourier(PerceiverPreTrainedModel):
Examples: Examples:
```python ```python
>>> from transformers import PerceiverFeatureExtractor, PerceiverForImageClassificationFourier >>> from transformers import PerceiverImageProcessor, PerceiverForImageClassificationFourier
>>> from PIL import Image >>> from PIL import Image
>>> import requests >>> import requests
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw) >>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = PerceiverFeatureExtractor.from_pretrained("deepmind/vision-perceiver-fourier") >>> image_processor = PerceiverImageProcessor.from_pretrained("deepmind/vision-perceiver-fourier")
>>> model = PerceiverForImageClassificationFourier.from_pretrained("deepmind/vision-perceiver-fourier") >>> model = PerceiverForImageClassificationFourier.from_pretrained("deepmind/vision-perceiver-fourier")
>>> inputs = feature_extractor(images=image, return_tensors="pt").pixel_values >>> inputs = image_processor(images=image, return_tensors="pt").pixel_values
>>> outputs = model(inputs=inputs) >>> outputs = model(inputs=inputs)
>>> logits = outputs.logits >>> logits = outputs.logits
>>> list(logits.shape) >>> list(logits.shape)
...@@ -1548,17 +1548,17 @@ class PerceiverForImageClassificationConvProcessing(PerceiverPreTrainedModel): ...@@ -1548,17 +1548,17 @@ class PerceiverForImageClassificationConvProcessing(PerceiverPreTrainedModel):
Examples: Examples:
```python ```python
>>> from transformers import PerceiverFeatureExtractor, PerceiverForImageClassificationConvProcessing >>> from transformers import PerceiverImageProcessor, PerceiverForImageClassificationConvProcessing
>>> from PIL import Image >>> from PIL import Image
>>> import requests >>> import requests
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw) >>> image = Image.open(requests.get(url, stream=True).raw)
>>> feature_extractor = PerceiverFeatureExtractor.from_pretrained("deepmind/vision-perceiver-conv") >>> image_processor = PerceiverImageProcessor.from_pretrained("deepmind/vision-perceiver-conv")
>>> model = PerceiverForImageClassificationConvProcessing.from_pretrained("deepmind/vision-perceiver-conv") >>> model = PerceiverForImageClassificationConvProcessing.from_pretrained("deepmind/vision-perceiver-conv")
>>> inputs = feature_extractor(images=image, return_tensors="pt").pixel_values >>> inputs = image_processor(images=image, return_tensors="pt").pixel_values
>>> outputs = model(inputs=inputs) >>> outputs = model(inputs=inputs)
>>> logits = outputs.logits >>> logits = outputs.logits
>>> list(logits.shape) >>> list(logits.shape)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment