"...git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "af1c864cdc21176a529f0aefceb99e9609a06be5"
Unverified Commit 2280880c authored by Yih-Dar's avatar Yih-Dar Committed by GitHub
Browse files

remove unused `use_cache` in config classes (#20844)



remove unused use_cache in config classes
Co-authored-by: default avatarydshieh <ydshieh@users.noreply.github.com>
parent d0bfdd20
...@@ -104,7 +104,6 @@ class CanineConfig(PretrainedConfig): ...@@ -104,7 +104,6 @@ class CanineConfig(PretrainedConfig):
type_vocab_size=16, type_vocab_size=16,
initializer_range=0.02, initializer_range=0.02,
layer_norm_eps=1e-12, layer_norm_eps=1e-12,
use_cache=True,
pad_token_id=0, pad_token_id=0,
bos_token_id=0xE000, bos_token_id=0xE000,
eos_token_id=0xE001, eos_token_id=0xE001,
...@@ -128,7 +127,6 @@ class CanineConfig(PretrainedConfig): ...@@ -128,7 +127,6 @@ class CanineConfig(PretrainedConfig):
self.initializer_range = initializer_range self.initializer_range = initializer_range
self.type_vocab_size = type_vocab_size self.type_vocab_size = type_vocab_size
self.layer_norm_eps = layer_norm_eps self.layer_norm_eps = layer_norm_eps
self.use_cache = use_cache
# Character config: # Character config:
self.downsampling_rate = downsampling_rate self.downsampling_rate = downsampling_rate
......
...@@ -70,9 +70,6 @@ class LiltConfig(PretrainedConfig): ...@@ -70,9 +70,6 @@ class LiltConfig(PretrainedConfig):
[Self-Attention with Relative Position Representations (Shaw et al.)](https://arxiv.org/abs/1803.02155). [Self-Attention with Relative Position Representations (Shaw et al.)](https://arxiv.org/abs/1803.02155).
For more information on `"relative_key_query"`, please refer to *Method 4* in [Improve Transformer Models For more information on `"relative_key_query"`, please refer to *Method 4* in [Improve Transformer Models
with Better Relative Position Embeddings (Huang et al.)](https://arxiv.org/abs/2009.13658). with Better Relative Position Embeddings (Huang et al.)](https://arxiv.org/abs/2009.13658).
use_cache (`bool`, *optional*, defaults to `True`):
Whether or not the model should return the last key/values attentions (not used by all models). Only
relevant if `config.is_decoder=True`.
classifier_dropout (`float`, *optional*): classifier_dropout (`float`, *optional*):
The dropout ratio for the classification head. The dropout ratio for the classification head.
channel_shrink_ratio (`int`, *optional*, defaults to 4): channel_shrink_ratio (`int`, *optional*, defaults to 4):
...@@ -111,7 +108,6 @@ class LiltConfig(PretrainedConfig): ...@@ -111,7 +108,6 @@ class LiltConfig(PretrainedConfig):
layer_norm_eps=1e-12, layer_norm_eps=1e-12,
pad_token_id=0, pad_token_id=0,
position_embedding_type="absolute", position_embedding_type="absolute",
use_cache=True,
classifier_dropout=None, classifier_dropout=None,
channel_shrink_ratio=4, channel_shrink_ratio=4,
max_2d_position_embeddings=1024, max_2d_position_embeddings=1024,
...@@ -132,7 +128,6 @@ class LiltConfig(PretrainedConfig): ...@@ -132,7 +128,6 @@ class LiltConfig(PretrainedConfig):
self.initializer_range = initializer_range self.initializer_range = initializer_range
self.layer_norm_eps = layer_norm_eps self.layer_norm_eps = layer_norm_eps
self.position_embedding_type = position_embedding_type self.position_embedding_type = position_embedding_type
self.use_cache = use_cache
self.classifier_dropout = classifier_dropout self.classifier_dropout = classifier_dropout
self.channel_shrink_ratio = channel_shrink_ratio self.channel_shrink_ratio = channel_shrink_ratio
self.max_2d_position_embeddings = max_2d_position_embeddings self.max_2d_position_embeddings = max_2d_position_embeddings
...@@ -92,9 +92,6 @@ class LongformerConfig(PretrainedConfig): ...@@ -92,9 +92,6 @@ class LongformerConfig(PretrainedConfig):
[Self-Attention with Relative Position Representations (Shaw et al.)](https://arxiv.org/abs/1803.02155). [Self-Attention with Relative Position Representations (Shaw et al.)](https://arxiv.org/abs/1803.02155).
For more information on `"relative_key_query"`, please refer to *Method 4* in [Improve Transformer Models For more information on `"relative_key_query"`, please refer to *Method 4* in [Improve Transformer Models
with Better Relative Position Embeddings (Huang et al.)](https://arxiv.org/abs/2009.13658). with Better Relative Position Embeddings (Huang et al.)](https://arxiv.org/abs/2009.13658).
use_cache (`bool`, *optional*, defaults to `True`):
Whether or not the model should return the last key/values attentions (not used by all models). Only
relevant if `config.is_decoder=True`.
classifier_dropout (`float`, *optional*): classifier_dropout (`float`, *optional*):
The dropout ratio for the classification head. The dropout ratio for the classification head.
attention_window (`int` or `List[int]`, *optional*, defaults to 512): attention_window (`int` or `List[int]`, *optional*, defaults to 512):
...@@ -137,7 +134,6 @@ class LongformerConfig(PretrainedConfig): ...@@ -137,7 +134,6 @@ class LongformerConfig(PretrainedConfig):
initializer_range: float = 0.02, initializer_range: float = 0.02,
layer_norm_eps: float = 1e-12, layer_norm_eps: float = 1e-12,
position_embedding_type: str = "absolute", position_embedding_type: str = "absolute",
use_cache: bool = True,
classifier_dropout: float = None, classifier_dropout: float = None,
onnx_export: bool = False, onnx_export: bool = False,
**kwargs **kwargs
...@@ -162,7 +158,6 @@ class LongformerConfig(PretrainedConfig): ...@@ -162,7 +158,6 @@ class LongformerConfig(PretrainedConfig):
self.initializer_range = initializer_range self.initializer_range = initializer_range
self.layer_norm_eps = layer_norm_eps self.layer_norm_eps = layer_norm_eps
self.position_embedding_type = position_embedding_type self.position_embedding_type = position_embedding_type
self.use_cache = use_cache
self.classifier_dropout = classifier_dropout self.classifier_dropout = classifier_dropout
self.onnx_export = onnx_export self.onnx_export = onnx_export
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment