Fix `Owlv2` checkpoint name and a default value in `Owlv2VisionConfig` (#27402)

* fix * fix * fix * fix * fix * fix * fix --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>

Fix `Owlv2` checkpoint name and a default value in `Owlv2VisionConfig` (#27402)
* fix * fix * fix * fix * fix * fix * fix --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
740cd935 · Yih-Dar · GitHub · 51a98c40 · 740cd935
Unverified Commit 740cd935 authored Nov 09, 2023 by Yih-Dar Committed by GitHub Nov 09, 2023
Show whitespace changes
Inline Side-by-side

Showing with 12 additions and 12 deletions

src/transformers/models/owlv2/configuration_owlv2.py src/transformers/models/owlv2/configuration_owlv2.py +12 -12

No files found.
--- a/src/transformers/models/owlv2/configuration_owlv2.py
+++ b/src/transformers/models/owlv2/configuration_owlv2.py
@@ -32,13 +32,13 @@ OWLV2_PRETRAINED_CONFIG_ARCHIVE_MAP = {
 }


-# Copied from transformers.models.owlvit.configuration_owlvit.OwlViTTextConfig with OwlViT->Owlv2, owlvit-base-patch-16->owlv2-base-patch16, owlvit->owlv2, OWL-ViT->OWLv2
+# Copied from transformers.models.owlvit.configuration_owlvit.OwlViTTextConfig with OwlViT->Owlv2, owlvit-base-patch32->owlv2-base-patch16, owlvit->owlv2, OWL-ViT->OWLv2
 class Owlv2TextConfig(PretrainedConfig):
    r"""
    This is the configuration class to store the configuration of an [`Owlv2TextModel`]. It is used to instantiate an
    Owlv2 text encoder according to the specified arguments, defining the model architecture. Instantiating a
    configuration with the defaults will yield a similar configuration to that of the Owlv2
-    [google/owlv2-base-patch32](https://huggingface.co/google/owlv2-base-patch32) architecture.
+    [google/owlv2-base-patch16](https://huggingface.co/google/owlv2-base-patch16) architecture.

    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
    documentation from [`PretrainedConfig`] for more information.
@@ -83,10 +83,10 @@ class Owlv2TextConfig(PretrainedConfig):
    ```python
    >>> from transformers import Owlv2TextConfig, Owlv2TextModel

-    >>> # Initializing a Owlv2TextModel with google/owlv2-base-patch32 style configuration
+    >>> # Initializing a Owlv2TextModel with google/owlv2-base-patch16 style configuration
    >>> configuration = Owlv2TextConfig()

-    >>> # Initializing a Owlv2TextConfig from the google/owlv2-base-patch32 style configuration
+    >>> # Initializing a Owlv2TextConfig from the google/owlv2-base-patch16 style configuration
    >>> model = Owlv2TextModel(configuration)

    >>> # Accessing the model configuration
@@ -145,13 +145,13 @@ class Owlv2TextConfig(PretrainedConfig):
        return cls.from_dict(config_dict, **kwargs)


-# Copied from transformers.models.owlvit.configuration_owlvit.OwlViTVisionConfig with OwlViT->Owlv2, owlvit-base-patch-32->owlv2-base-patch16, owlvit->owlv2, OWL-ViT->OWLv2
+# Copied from transformers.models.owlvit.configuration_owlvit.OwlViTVisionConfig with OwlViT->Owlv2, owlvit-base-patch32->owlv2-base-patch16, owlvit->owlv2, OWL-ViT->OWLv2, 32->16
 class Owlv2VisionConfig(PretrainedConfig):
    r"""
    This is the configuration class to store the configuration of an [`Owlv2VisionModel`]. It is used to instantiate an
    OWLv2 image encoder according to the specified arguments, defining the model architecture. Instantiating a
    configuration with the defaults will yield a similar configuration to that of the OWLv2
-    [google/owlv2-base-patch32](https://huggingface.co/google/owlv2-base-patch32) architecture.
+    [google/owlv2-base-patch16](https://huggingface.co/google/owlv2-base-patch16) architecture.

    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
    documentation from [`PretrainedConfig`] for more information.
@@ -169,7 +169,7 @@ class Owlv2VisionConfig(PretrainedConfig):
            Number of channels in the input images.
        image_size (`int`, *optional*, defaults to 768):
            The size (resolution) of each image.
-        patch_size (`int`, *optional*, defaults to 32):
+        patch_size (`int`, *optional*, defaults to 16):
            The size (resolution) of each patch.
        hidden_act (`str` or `function`, *optional*, defaults to `"quick_gelu"`):
            The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
@@ -189,10 +189,10 @@ class Owlv2VisionConfig(PretrainedConfig):
    ```python
    >>> from transformers import Owlv2VisionConfig, Owlv2VisionModel

-    >>> # Initializing a Owlv2VisionModel with google/owlv2-base-patch32 style configuration
+    >>> # Initializing a Owlv2VisionModel with google/owlv2-base-patch16 style configuration
    >>> configuration = Owlv2VisionConfig()

-    >>> # Initializing a Owlv2VisionModel model from the google/owlv2-base-patch32 style configuration
+    >>> # Initializing a Owlv2VisionModel model from the google/owlv2-base-patch16 style configuration
    >>> model = Owlv2VisionModel(configuration)

    >>> # Accessing the model configuration
@@ -209,7 +209,7 @@ class Owlv2VisionConfig(PretrainedConfig):
        num_attention_heads=12,
        num_channels=3,
        image_size=768,
-        patch_size=32,
+        patch_size=16,
        hidden_act="quick_gelu",
        layer_norm_eps=1e-5,
        attention_dropout=0.0,
@@ -251,13 +251,13 @@ class Owlv2VisionConfig(PretrainedConfig):
        return cls.from_dict(config_dict, **kwargs)


-# Copied from transformers.models.owlvit.configuration_owlvit.OwlViTConfig with OwlViT->Owlv2, owlvit-base-patch-32->owlv2-base-patch32, owlvit->owlv2, OWL-ViT->OWLv2
+# Copied from transformers.models.owlvit.configuration_owlvit.OwlViTConfig with OwlViT->Owlv2, owlvit-base-patch32->owlv2-base-patch16, owlvit->owlv2, OWL-ViT->OWLv2
 class Owlv2Config(PretrainedConfig):
    r"""
    [`Owlv2Config`] is the configuration class to store the configuration of an [`Owlv2Model`]. It is used to
    instantiate an OWLv2 model according to the specified arguments, defining the text model and vision model configs.
    Instantiating a configuration with the defaults will yield a similar configuration to that of the OWLv2
-    [google/owlv2-base-patch32](https://huggingface.co/google/owlv2-base-patch32) architecture.
+    [google/owlv2-base-patch16](https://huggingface.co/google/owlv2-base-patch16) architecture.

    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
    documentation from [`PretrainedConfig`] for more information.