Unverified Commit 740cd935 authored by Yih-Dar's avatar Yih-Dar Committed by GitHub
Browse files

Fix `Owlv2` checkpoint name and a default value in `Owlv2VisionConfig` (#27402)



* fix

* fix

* fix

* fix

* fix

* fix

* fix

---------
Co-authored-by: default avatarydshieh <ydshieh@users.noreply.github.com>
parent 51a98c40
......@@ -32,13 +32,13 @@ OWLV2_PRETRAINED_CONFIG_ARCHIVE_MAP = {
}
# Copied from transformers.models.owlvit.configuration_owlvit.OwlViTTextConfig with OwlViT->Owlv2, owlvit-base-patch-16->owlv2-base-patch16, owlvit->owlv2, OWL-ViT->OWLv2
# Copied from transformers.models.owlvit.configuration_owlvit.OwlViTTextConfig with OwlViT->Owlv2, owlvit-base-patch32->owlv2-base-patch16, owlvit->owlv2, OWL-ViT->OWLv2
class Owlv2TextConfig(PretrainedConfig):
r"""
This is the configuration class to store the configuration of an [`Owlv2TextModel`]. It is used to instantiate an
Owlv2 text encoder according to the specified arguments, defining the model architecture. Instantiating a
configuration with the defaults will yield a similar configuration to that of the Owlv2
[google/owlv2-base-patch32](https://huggingface.co/google/owlv2-base-patch32) architecture.
[google/owlv2-base-patch16](https://huggingface.co/google/owlv2-base-patch16) architecture.
Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
documentation from [`PretrainedConfig`] for more information.
......@@ -83,10 +83,10 @@ class Owlv2TextConfig(PretrainedConfig):
```python
>>> from transformers import Owlv2TextConfig, Owlv2TextModel
>>> # Initializing a Owlv2TextModel with google/owlv2-base-patch32 style configuration
>>> # Initializing a Owlv2TextModel with google/owlv2-base-patch16 style configuration
>>> configuration = Owlv2TextConfig()
>>> # Initializing a Owlv2TextConfig from the google/owlv2-base-patch32 style configuration
>>> # Initializing a Owlv2TextConfig from the google/owlv2-base-patch16 style configuration
>>> model = Owlv2TextModel(configuration)
>>> # Accessing the model configuration
......@@ -145,13 +145,13 @@ class Owlv2TextConfig(PretrainedConfig):
return cls.from_dict(config_dict, **kwargs)
# Copied from transformers.models.owlvit.configuration_owlvit.OwlViTVisionConfig with OwlViT->Owlv2, owlvit-base-patch-32->owlv2-base-patch16, owlvit->owlv2, OWL-ViT->OWLv2
# Copied from transformers.models.owlvit.configuration_owlvit.OwlViTVisionConfig with OwlViT->Owlv2, owlvit-base-patch32->owlv2-base-patch16, owlvit->owlv2, OWL-ViT->OWLv2, 32->16
class Owlv2VisionConfig(PretrainedConfig):
r"""
This is the configuration class to store the configuration of an [`Owlv2VisionModel`]. It is used to instantiate an
OWLv2 image encoder according to the specified arguments, defining the model architecture. Instantiating a
configuration with the defaults will yield a similar configuration to that of the OWLv2
[google/owlv2-base-patch32](https://huggingface.co/google/owlv2-base-patch32) architecture.
[google/owlv2-base-patch16](https://huggingface.co/google/owlv2-base-patch16) architecture.
Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
documentation from [`PretrainedConfig`] for more information.
......@@ -169,7 +169,7 @@ class Owlv2VisionConfig(PretrainedConfig):
Number of channels in the input images.
image_size (`int`, *optional*, defaults to 768):
The size (resolution) of each image.
patch_size (`int`, *optional*, defaults to 32):
patch_size (`int`, *optional*, defaults to 16):
The size (resolution) of each patch.
hidden_act (`str` or `function`, *optional*, defaults to `"quick_gelu"`):
The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
......@@ -189,10 +189,10 @@ class Owlv2VisionConfig(PretrainedConfig):
```python
>>> from transformers import Owlv2VisionConfig, Owlv2VisionModel
>>> # Initializing a Owlv2VisionModel with google/owlv2-base-patch32 style configuration
>>> # Initializing a Owlv2VisionModel with google/owlv2-base-patch16 style configuration
>>> configuration = Owlv2VisionConfig()
>>> # Initializing a Owlv2VisionModel model from the google/owlv2-base-patch32 style configuration
>>> # Initializing a Owlv2VisionModel model from the google/owlv2-base-patch16 style configuration
>>> model = Owlv2VisionModel(configuration)
>>> # Accessing the model configuration
......@@ -209,7 +209,7 @@ class Owlv2VisionConfig(PretrainedConfig):
num_attention_heads=12,
num_channels=3,
image_size=768,
patch_size=32,
patch_size=16,
hidden_act="quick_gelu",
layer_norm_eps=1e-5,
attention_dropout=0.0,
......@@ -251,13 +251,13 @@ class Owlv2VisionConfig(PretrainedConfig):
return cls.from_dict(config_dict, **kwargs)
# Copied from transformers.models.owlvit.configuration_owlvit.OwlViTConfig with OwlViT->Owlv2, owlvit-base-patch-32->owlv2-base-patch32, owlvit->owlv2, OWL-ViT->OWLv2
# Copied from transformers.models.owlvit.configuration_owlvit.OwlViTConfig with OwlViT->Owlv2, owlvit-base-patch32->owlv2-base-patch16, owlvit->owlv2, OWL-ViT->OWLv2
class Owlv2Config(PretrainedConfig):
r"""
[`Owlv2Config`] is the configuration class to store the configuration of an [`Owlv2Model`]. It is used to
instantiate an OWLv2 model according to the specified arguments, defining the text model and vision model configs.
Instantiating a configuration with the defaults will yield a similar configuration to that of the OWLv2
[google/owlv2-base-patch32](https://huggingface.co/google/owlv2-base-patch32) architecture.
[google/owlv2-base-patch16](https://huggingface.co/google/owlv2-base-patch16) architecture.
Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
documentation from [`PretrainedConfig`] for more information.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment