configuration_deit.py 5.07 KB
Newer Older
NielsRogge's avatar
NielsRogge committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# coding=utf-8
# Copyright 2021 Facebook AI Research (FAIR) and The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Sylvain Gugger's avatar
Sylvain Gugger committed
15
""" DeiT model configuration"""
NielsRogge's avatar
NielsRogge committed
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30

from ...configuration_utils import PretrainedConfig
from ...utils import logging


logger = logging.get_logger(__name__)

DEIT_PRETRAINED_CONFIG_ARCHIVE_MAP = {
    "facebook/deit-base-distilled-patch16-224": "https://huggingface.co/facebook/deit-base-patch16-224/resolve/main/config.json",
    # See all DeiT models at https://huggingface.co/models?filter=deit
}


class DeiTConfig(PretrainedConfig):
    r"""
Sylvain Gugger's avatar
Sylvain Gugger committed
31
32
33
    This is the configuration class to store the configuration of a [`DeiTModel`]. It is used to instantiate an DeiT
    model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
    defaults will yield a similar configuration to that of the DeiT
34
    [facebook/deit-base-distilled-patch16-224](https://huggingface.co/facebook/deit-base-distilled-patch16-224)
NielsRogge's avatar
NielsRogge committed
35
36
    architecture.

Sylvain Gugger's avatar
Sylvain Gugger committed
37
38
    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
    documentation from [`PretrainedConfig`] for more information.
NielsRogge's avatar
NielsRogge committed
39
40
41


    Args:
42
        hidden_size (`int`, *optional*, defaults to 768):
NielsRogge's avatar
NielsRogge committed
43
            Dimensionality of the encoder layers and the pooler layer.
44
        num_hidden_layers (`int`, *optional*, defaults to 12):
NielsRogge's avatar
NielsRogge committed
45
            Number of hidden layers in the Transformer encoder.
46
        num_attention_heads (`int`, *optional*, defaults to 12):
NielsRogge's avatar
NielsRogge committed
47
            Number of attention heads for each attention layer in the Transformer encoder.
48
        intermediate_size (`int`, *optional*, defaults to 3072):
NielsRogge's avatar
NielsRogge committed
49
            Dimensionality of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder.
50
        hidden_act (`str` or `function`, *optional*, defaults to `"gelu"`):
Sylvain Gugger's avatar
Sylvain Gugger committed
51
52
            The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
            `"relu"`, `"selu"` and `"gelu_new"` are supported.
53
        hidden_dropout_prob (`float`, *optional*, defaults to 0.1):
NielsRogge's avatar
NielsRogge committed
54
            The dropout probabilitiy for all fully connected layers in the embeddings, encoder, and pooler.
55
        attention_probs_dropout_prob (`float`, *optional*, defaults to 0.1):
NielsRogge's avatar
NielsRogge committed
56
            The dropout ratio for the attention probabilities.
57
        initializer_range (`float`, *optional*, defaults to 0.02):
NielsRogge's avatar
NielsRogge committed
58
            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
59
        layer_norm_eps (`float`, *optional*, defaults to 1e-12):
NielsRogge's avatar
NielsRogge committed
60
            The epsilon used by the layer normalization layers.
61
        image_size (`int`, *optional*, defaults to `224`):
NielsRogge's avatar
NielsRogge committed
62
            The size (resolution) of each image.
63
        patch_size (`int`, *optional*, defaults to `16`):
NielsRogge's avatar
NielsRogge committed
64
            The size (resolution) of each patch.
65
        num_channels (`int`, *optional*, defaults to `3`):
NielsRogge's avatar
NielsRogge committed
66
            The number of input channels.
67
        qkv_bias (`bool`, *optional*, defaults to `True`):
68
            Whether to add a bias to the queries, keys and values.
NielsRogge's avatar
NielsRogge committed
69
70


71
    Example:
NielsRogge's avatar
NielsRogge committed
72

73
74
    ```python
    >>> from transformers import DeiTModel, DeiTConfig
NielsRogge's avatar
NielsRogge committed
75

76
77
    >>> # Initializing a DeiT deit-base-distilled-patch16-224 style configuration
    >>> configuration = DeiTConfig()
NielsRogge's avatar
NielsRogge committed
78

79
80
    >>> # Initializing a model from the deit-base-distilled-patch16-224 style configuration
    >>> model = DeiTModel(configuration)
NielsRogge's avatar
NielsRogge committed
81

82
83
84
    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```"""
NielsRogge's avatar
NielsRogge committed
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
    model_type = "deit"

    def __init__(
        self,
        hidden_size=768,
        num_hidden_layers=12,
        num_attention_heads=12,
        intermediate_size=3072,
        hidden_act="gelu",
        hidden_dropout_prob=0.0,
        attention_probs_dropout_prob=0.0,
        initializer_range=0.02,
        layer_norm_eps=1e-12,
        is_encoder_decoder=False,
        image_size=224,
        patch_size=16,
        num_channels=3,
102
        qkv_bias=True,
NielsRogge's avatar
NielsRogge committed
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
        **kwargs
    ):
        super().__init__(**kwargs)

        self.hidden_size = hidden_size
        self.num_hidden_layers = num_hidden_layers
        self.num_attention_heads = num_attention_heads
        self.intermediate_size = intermediate_size
        self.hidden_act = hidden_act
        self.hidden_dropout_prob = hidden_dropout_prob
        self.attention_probs_dropout_prob = attention_probs_dropout_prob
        self.initializer_range = initializer_range
        self.layer_norm_eps = layer_norm_eps

        self.image_size = image_size
        self.patch_size = patch_size
        self.num_channels = num_channels
120
        self.qkv_bias = qkv_bias