Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
4430b912
Unverified
Commit
4430b912
authored
Dec 05, 2022
by
Yih-Dar
Committed by
GitHub
Dec 05, 2022
Browse files
clean up unused `classifier_dropout` in config (#20596)
Co-authored-by:
ydshieh
<
ydshieh@users.noreply.github.com
>
parent
eefae413
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
0 additions
and
24 deletions
+0
-24
src/transformers/models/blenderbot/configuration_blenderbot.py
...ransformers/models/blenderbot/configuration_blenderbot.py
+0
-4
src/transformers/models/blenderbot_small/configuration_blenderbot_small.py
...models/blenderbot_small/configuration_blenderbot_small.py
+0
-4
src/transformers/models/chinese_clip/configuration_chinese_clip.py
...formers/models/chinese_clip/configuration_chinese_clip.py
+0
-4
src/transformers/models/marian/configuration_marian.py
src/transformers/models/marian/configuration_marian.py
+0
-4
src/transformers/models/pegasus/configuration_pegasus.py
src/transformers/models/pegasus/configuration_pegasus.py
+0
-4
src/transformers/models/speech_to_text_2/configuration_speech_to_text_2.py
...models/speech_to_text_2/configuration_speech_to_text_2.py
+0
-4
No files found.
src/transformers/models/blenderbot/configuration_blenderbot.py
View file @
4430b912
...
...
@@ -71,8 +71,6 @@ class BlenderbotConfig(PretrainedConfig):
The dropout ratio for the attention probabilities.
activation_dropout (`float`, *optional*, defaults to 0.0):
The dropout ratio for activations inside the fully connected layer.
classifier_dropout (`float`, *optional*, defaults to 0.0):
The dropout ratio for classifier.
max_position_embeddings (`int`, *optional*, defaults to 128):
The maximum sequence length that this model might ever be used with. Typically set this to something large
just in case (e.g., 512 or 1024 or 2048).
...
...
@@ -131,7 +129,6 @@ class BlenderbotConfig(PretrainedConfig):
activation_dropout
=
0.0
,
init_std
=
0.02
,
decoder_start_token_id
=
1
,
classifier_dropout
=
0.0
,
scale_embedding
=
False
,
pad_token_id
=
0
,
bos_token_id
=
1
,
...
...
@@ -156,7 +153,6 @@ class BlenderbotConfig(PretrainedConfig):
self
.
init_std
=
init_std
self
.
encoder_layerdrop
=
encoder_layerdrop
self
.
decoder_layerdrop
=
decoder_layerdrop
self
.
classifier_dropout
=
classifier_dropout
self
.
use_cache
=
use_cache
self
.
num_hidden_layers
=
encoder_layers
self
.
scale_embedding
=
scale_embedding
# scale factor will be sqrt(d_model) if True
...
...
src/transformers/models/blenderbot_small/configuration_blenderbot_small.py
View file @
4430b912
...
...
@@ -71,8 +71,6 @@ class BlenderbotSmallConfig(PretrainedConfig):
The dropout ratio for the attention probabilities.
activation_dropout (`float`, *optional*, defaults to 0.0):
The dropout ratio for activations inside the fully connected layer.
classifier_dropout (`float`, *optional*, defaults to 0.0):
The dropout ratio for classifier.
max_position_embeddings (`int`, *optional*, defaults to 512):
The maximum sequence length that this model might ever be used with. Typically set this to something large
just in case (e.g., 512 or 1024 or 2048).
...
...
@@ -131,7 +129,6 @@ class BlenderbotSmallConfig(PretrainedConfig):
activation_dropout
=
0.0
,
init_std
=
0.02
,
decoder_start_token_id
=
1
,
classifier_dropout
=
0.0
,
scale_embedding
=
False
,
pad_token_id
=
0
,
bos_token_id
=
1
,
...
...
@@ -155,7 +152,6 @@ class BlenderbotSmallConfig(PretrainedConfig):
self
.
init_std
=
init_std
self
.
encoder_layerdrop
=
encoder_layerdrop
self
.
decoder_layerdrop
=
decoder_layerdrop
self
.
classifier_dropout
=
classifier_dropout
self
.
use_cache
=
use_cache
self
.
num_hidden_layers
=
encoder_layers
self
.
scale_embedding
=
scale_embedding
# scale factor will be sqrt(d_model) if True
...
...
src/transformers/models/chinese_clip/configuration_chinese_clip.py
View file @
4430b912
...
...
@@ -87,8 +87,6 @@ class ChineseCLIPTextConfig(PretrainedConfig):
use_cache (`bool`, *optional*, defaults to `True`):
Whether or not the model should return the last key/values attentions (not used by all models). Only
relevant if `config.is_decoder=True`.
classifier_dropout (`float`, *optional*):
The dropout ratio for the classification head.
Example:
...
...
@@ -124,7 +122,6 @@ class ChineseCLIPTextConfig(PretrainedConfig):
pad_token_id
=
0
,
position_embedding_type
=
"absolute"
,
use_cache
=
True
,
classifier_dropout
=
None
,
**
kwargs
):
super
().
__init__
(
pad_token_id
=
pad_token_id
,
**
kwargs
)
...
...
@@ -144,7 +141,6 @@ class ChineseCLIPTextConfig(PretrainedConfig):
self
.
layer_norm_eps
=
layer_norm_eps
self
.
position_embedding_type
=
position_embedding_type
self
.
use_cache
=
use_cache
self
.
classifier_dropout
=
classifier_dropout
@
classmethod
def
from_pretrained
(
cls
,
pretrained_model_name_or_path
:
Union
[
str
,
os
.
PathLike
],
**
kwargs
)
->
"PretrainedConfig"
:
...
...
src/transformers/models/marian/configuration_marian.py
View file @
4430b912
...
...
@@ -69,8 +69,6 @@ class MarianConfig(PretrainedConfig):
The dropout ratio for the attention probabilities.
activation_dropout (`float`, *optional*, defaults to 0.0):
The dropout ratio for activations inside the fully connected layer.
classifier_dropout (`float`, *optional*, defaults to 0.0):
The dropout ratio for classifier.
max_position_embeddings (`int`, *optional*, defaults to 1024):
The maximum sequence length that this model might ever be used with. Typically set this to something large
just in case (e.g., 512 or 1024 or 2048).
...
...
@@ -130,7 +128,6 @@ class MarianConfig(PretrainedConfig):
activation_dropout
=
0.0
,
init_std
=
0.02
,
decoder_start_token_id
=
58100
,
classifier_dropout
=
0.0
,
scale_embedding
=
False
,
pad_token_id
=
58100
,
eos_token_id
=
0
,
...
...
@@ -155,7 +152,6 @@ class MarianConfig(PretrainedConfig):
self
.
init_std
=
init_std
self
.
encoder_layerdrop
=
encoder_layerdrop
self
.
decoder_layerdrop
=
decoder_layerdrop
self
.
classifier_dropout
=
classifier_dropout
self
.
use_cache
=
use_cache
self
.
num_hidden_layers
=
encoder_layers
self
.
scale_embedding
=
scale_embedding
# scale factor will be sqrt(d_model) if True
...
...
src/transformers/models/pegasus/configuration_pegasus.py
View file @
4430b912
...
...
@@ -64,8 +64,6 @@ class PegasusConfig(PretrainedConfig):
The dropout ratio for the attention probabilities.
activation_dropout (`float`, *optional*, defaults to 0.0):
The dropout ratio for activations inside the fully connected layer.
classifier_dropout (`float`, *optional*, defaults to 0.0):
The dropout ratio for classifier.
max_position_embeddings (`int`, *optional*, defaults to 1024):
The maximum sequence length that this model might ever be used with. Typically set this to something large
just in case (e.g., 512 or 1024 or 2048).
...
...
@@ -124,7 +122,6 @@ class PegasusConfig(PretrainedConfig):
activation_dropout
=
0.0
,
init_std
=
0.02
,
decoder_start_token_id
=
0
,
classifier_dropout
=
0.0
,
scale_embedding
=
False
,
pad_token_id
=
0
,
eos_token_id
=
1
,
...
...
@@ -147,7 +144,6 @@ class PegasusConfig(PretrainedConfig):
self
.
init_std
=
init_std
self
.
encoder_layerdrop
=
encoder_layerdrop
self
.
decoder_layerdrop
=
decoder_layerdrop
self
.
classifier_dropout
=
classifier_dropout
self
.
use_cache
=
use_cache
self
.
num_hidden_layers
=
encoder_layers
self
.
scale_embedding
=
scale_embedding
# scale factor will be sqrt(d_model) if True
...
...
src/transformers/models/speech_to_text_2/configuration_speech_to_text_2.py
View file @
4430b912
...
...
@@ -60,8 +60,6 @@ class Speech2Text2Config(PretrainedConfig):
The dropout ratio for the attention probabilities.
activation_dropout (`float`, *optional*, defaults to 0.0):
The dropout ratio for activations inside the fully connected layer.
classifier_dropout (`float`, *optional*, defaults to 0.0):
The dropout ratio for classifier.
init_std (`float`, *optional*, defaults to 0.02):
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
https://arxiv.org/abs/1909.11556>`__ for more details.
...
...
@@ -109,7 +107,6 @@ class Speech2Text2Config(PretrainedConfig):
activation_dropout
=
0.0
,
init_std
=
0.02
,
decoder_start_token_id
=
2
,
classifier_dropout
=
0.0
,
scale_embedding
=
True
,
pad_token_id
=
1
,
bos_token_id
=
0
,
...
...
@@ -129,7 +126,6 @@ class Speech2Text2Config(PretrainedConfig):
self
.
activation_function
=
activation_function
self
.
init_std
=
init_std
self
.
decoder_layerdrop
=
decoder_layerdrop
self
.
classifier_dropout
=
classifier_dropout
self
.
use_cache
=
use_cache
self
.
num_hidden_layers
=
decoder_layers
self
.
scale_embedding
=
scale_embedding
# scale factor will be sqrt(d_model) if True
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment