Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
66f89332
"...resources/git@developer.sourcefind.cn:wangsen/mineru.git" did not exist on "41d96cd89acbe5fa77a9ac87516ff1a4c9adb384"
Unverified
Commit
66f89332
authored
Jun 15, 2022
by
Stas Bekman
Committed by
GitHub
Jun 15, 2022
Browse files
normalize keys_to_ignore (#17722)
parent
c3c62b5d
Changes
23
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
57 additions
and
57 deletions
+57
-57
src/transformers/models/bart/modeling_bart.py
src/transformers/models/bart/modeling_bart.py
+2
-2
src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py
...ormers/models/bigbird_pegasus/modeling_bigbird_pegasus.py
+1
-1
src/transformers/models/blenderbot/modeling_blenderbot.py
src/transformers/models/blenderbot/modeling_blenderbot.py
+3
-3
src/transformers/models/blenderbot_small/modeling_blenderbot_small.py
...mers/models/blenderbot_small/modeling_blenderbot_small.py
+3
-3
src/transformers/models/convbert/modeling_convbert.py
src/transformers/models/convbert/modeling_convbert.py
+1
-1
src/transformers/models/electra/modeling_electra.py
src/transformers/models/electra/modeling_electra.py
+1
-1
src/transformers/models/gpt2/modeling_gpt2.py
src/transformers/models/gpt2/modeling_gpt2.py
+1
-1
src/transformers/models/gpt_neo/modeling_gpt_neo.py
src/transformers/models/gpt_neo/modeling_gpt_neo.py
+2
-2
src/transformers/models/gptj/modeling_gptj.py
src/transformers/models/gptj/modeling_gptj.py
+2
-2
src/transformers/models/imagegpt/modeling_imagegpt.py
src/transformers/models/imagegpt/modeling_imagegpt.py
+1
-1
src/transformers/models/led/modeling_led.py
src/transformers/models/led/modeling_led.py
+3
-3
src/transformers/models/longt5/modeling_longt5.py
src/transformers/models/longt5/modeling_longt5.py
+8
-8
src/transformers/models/m2m_100/modeling_m2m_100.py
src/transformers/models/m2m_100/modeling_m2m_100.py
+3
-3
src/transformers/models/marian/modeling_marian.py
src/transformers/models/marian/modeling_marian.py
+3
-3
src/transformers/models/mbart/modeling_mbart.py
src/transformers/models/mbart/modeling_mbart.py
+3
-3
src/transformers/models/mt5/modeling_mt5.py
src/transformers/models/mt5/modeling_mt5.py
+9
-9
src/transformers/models/opt/modeling_opt.py
src/transformers/models/opt/modeling_opt.py
+2
-2
src/transformers/models/pegasus/modeling_pegasus.py
src/transformers/models/pegasus/modeling_pegasus.py
+4
-4
src/transformers/models/plbart/modeling_plbart.py
src/transformers/models/plbart/modeling_plbart.py
+3
-3
src/transformers/models/roformer/modeling_roformer.py
src/transformers/models/roformer/modeling_roformer.py
+2
-2
No files found.
src/transformers/models/bart/modeling_bart.py
View file @
66f89332
...
@@ -497,7 +497,7 @@ class BartPretrainedModel(PreTrainedModel):
...
@@ -497,7 +497,7 @@ class BartPretrainedModel(PreTrainedModel):
config_class
=
BartConfig
config_class
=
BartConfig
base_model_prefix
=
"model"
base_model_prefix
=
"model"
supports_gradient_checkpointing
=
True
supports_gradient_checkpointing
=
True
_keys_to_ignore_on_load_unexpected
=
[
r
"encoder
\
.version"
,
r
"decoder
\
.version"
]
_keys_to_ignore_on_load_unexpected
=
[
r
"encoder.version"
,
r
"decoder.version"
]
def
_init_weights
(
self
,
module
):
def
_init_weights
(
self
,
module
):
std
=
self
.
config
.
init_std
std
=
self
.
config
.
init_std
...
@@ -1272,7 +1272,7 @@ class BartModel(BartPretrainedModel):
...
@@ -1272,7 +1272,7 @@ class BartModel(BartPretrainedModel):
)
)
class
BartForConditionalGeneration
(
BartPretrainedModel
):
class
BartForConditionalGeneration
(
BartPretrainedModel
):
base_model_prefix
=
"model"
base_model_prefix
=
"model"
_keys_to_ignore_on_load_missing
=
[
r
"final_logits_bias"
,
r
"lm_head
\
.weight"
]
_keys_to_ignore_on_load_missing
=
[
r
"final_logits_bias"
,
r
"lm_head.weight"
]
def
__init__
(
self
,
config
:
BartConfig
):
def
__init__
(
self
,
config
:
BartConfig
):
super
().
__init__
(
config
)
super
().
__init__
(
config
)
...
...
src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py
View file @
66f89332
...
@@ -2476,7 +2476,7 @@ class BigBirdPegasusModel(BigBirdPegasusPreTrainedModel):
...
@@ -2476,7 +2476,7 @@ class BigBirdPegasusModel(BigBirdPegasusPreTrainedModel):
# Copied from transformers.models.bart.modeling_bart.BartForConditionalGeneration with Bart->BigBirdPegasus, BART->BIGBIRD_PEGASUS
# Copied from transformers.models.bart.modeling_bart.BartForConditionalGeneration with Bart->BigBirdPegasus, BART->BIGBIRD_PEGASUS
class
BigBirdPegasusForConditionalGeneration
(
BigBirdPegasusPreTrainedModel
):
class
BigBirdPegasusForConditionalGeneration
(
BigBirdPegasusPreTrainedModel
):
base_model_prefix
=
"model"
base_model_prefix
=
"model"
_keys_to_ignore_on_load_missing
=
[
r
"final_logits_bias"
,
r
"lm_head
\
.weight"
]
_keys_to_ignore_on_load_missing
=
[
r
"final_logits_bias"
,
r
"lm_head.weight"
]
def
__init__
(
self
,
config
:
BigBirdPegasusConfig
):
def
__init__
(
self
,
config
:
BigBirdPegasusConfig
):
super
().
__init__
(
config
)
super
().
__init__
(
config
)
...
...
src/transformers/models/blenderbot/modeling_blenderbot.py
View file @
66f89332
...
@@ -1226,9 +1226,9 @@ class BlenderbotForConditionalGeneration(BlenderbotPreTrainedModel):
...
@@ -1226,9 +1226,9 @@ class BlenderbotForConditionalGeneration(BlenderbotPreTrainedModel):
base_model_prefix
=
"model"
base_model_prefix
=
"model"
_keys_to_ignore_on_load_missing
=
[
_keys_to_ignore_on_load_missing
=
[
r
"final_logits_bias"
,
r
"final_logits_bias"
,
r
"encoder
\
.version"
,
r
"encoder.version"
,
r
"decoder
\
.version"
,
r
"decoder.version"
,
r
"lm_head
\
.weight"
,
r
"lm_head.weight"
,
]
]
def
__init__
(
self
,
config
:
BlenderbotConfig
):
def
__init__
(
self
,
config
:
BlenderbotConfig
):
...
...
src/transformers/models/blenderbot_small/modeling_blenderbot_small.py
View file @
66f89332
...
@@ -1208,9 +1208,9 @@ class BlenderbotSmallForConditionalGeneration(BlenderbotSmallPreTrainedModel):
...
@@ -1208,9 +1208,9 @@ class BlenderbotSmallForConditionalGeneration(BlenderbotSmallPreTrainedModel):
base_model_prefix
=
"model"
base_model_prefix
=
"model"
_keys_to_ignore_on_load_missing
=
[
_keys_to_ignore_on_load_missing
=
[
r
"final_logits_bias"
,
r
"final_logits_bias"
,
r
"encoder
\
.version"
,
r
"encoder.version"
,
r
"decoder
\
.version"
,
r
"decoder.version"
,
r
"lm_head
\
.weight"
,
r
"lm_head.weight"
,
]
]
def
__init__
(
self
,
config
:
BlenderbotSmallConfig
):
def
__init__
(
self
,
config
:
BlenderbotSmallConfig
):
...
...
src/transformers/models/convbert/modeling_convbert.py
View file @
66f89332
...
@@ -251,7 +251,7 @@ class ConvBertPreTrainedModel(PreTrainedModel):
...
@@ -251,7 +251,7 @@ class ConvBertPreTrainedModel(PreTrainedModel):
base_model_prefix
=
"convbert"
base_model_prefix
=
"convbert"
supports_gradient_checkpointing
=
True
supports_gradient_checkpointing
=
True
authorized_missing_keys
=
[
r
"position_ids"
]
authorized_missing_keys
=
[
r
"position_ids"
]
authorized_unexpected_keys
=
[
r
"convbert
\
.embeddings_project
\
.weight"
,
r
"convbert
\
.embeddings_project
\
.bias"
]
authorized_unexpected_keys
=
[
r
"convbert.embeddings_project.weight"
,
r
"convbert.embeddings_project.bias"
]
def
_init_weights
(
self
,
module
):
def
_init_weights
(
self
,
module
):
"""Initialize the weights"""
"""Initialize the weights"""
...
...
src/transformers/models/electra/modeling_electra.py
View file @
66f89332
...
@@ -670,7 +670,7 @@ class ElectraPreTrainedModel(PreTrainedModel):
...
@@ -670,7 +670,7 @@ class ElectraPreTrainedModel(PreTrainedModel):
base_model_prefix
=
"electra"
base_model_prefix
=
"electra"
supports_gradient_checkpointing
=
True
supports_gradient_checkpointing
=
True
_keys_to_ignore_on_load_missing
=
[
r
"position_ids"
]
_keys_to_ignore_on_load_missing
=
[
r
"position_ids"
]
_keys_to_ignore_on_load_unexpected
=
[
r
"electra
\
.embeddings_project
\
.weight"
,
r
"electra
\
.embeddings_project
\
.bias"
]
_keys_to_ignore_on_load_unexpected
=
[
r
"electra.embeddings_project.weight"
,
r
"electra.embeddings_project.bias"
]
# Copied from transformers.models.bert.modeling_bert.BertPreTrainedModel._init_weights
# Copied from transformers.models.bert.modeling_bert.BertPreTrainedModel._init_weights
def
_init_weights
(
self
,
module
):
def
_init_weights
(
self
,
module
):
...
...
src/transformers/models/gpt2/modeling_gpt2.py
View file @
66f89332
...
@@ -1328,7 +1328,7 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel):
...
@@ -1328,7 +1328,7 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel):
GPT2_START_DOCSTRING
,
GPT2_START_DOCSTRING
,
)
)
class
GPT2ForSequenceClassification
(
GPT2PreTrainedModel
):
class
GPT2ForSequenceClassification
(
GPT2PreTrainedModel
):
_keys_to_ignore_on_load_missing
=
[
r
"h\.\d+\.attn\.masked_bias"
,
r
"lm_head
\
.weight"
]
_keys_to_ignore_on_load_missing
=
[
r
"h\.\d+\.attn\.masked_bias"
,
r
"lm_head.weight"
]
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
().
__init__
(
config
)
super
().
__init__
(
config
)
...
...
src/transformers/models/gpt_neo/modeling_gpt_neo.py
View file @
66f89332
...
@@ -661,7 +661,7 @@ class GPTNeoModel(GPTNeoPreTrainedModel):
...
@@ -661,7 +661,7 @@ class GPTNeoModel(GPTNeoPreTrainedModel):
class
GPTNeoForCausalLM
(
GPTNeoPreTrainedModel
):
class
GPTNeoForCausalLM
(
GPTNeoPreTrainedModel
):
_keys_to_ignore_on_load_missing
=
[
_keys_to_ignore_on_load_missing
=
[
r
"h\.\d+\.attn\.masked_bias"
,
r
"h\.\d+\.attn\.masked_bias"
,
r
"lm_head
\
.weight"
,
r
"lm_head.weight"
,
r
"h\.\d+\.attn\.attention\.bias"
,
r
"h\.\d+\.attn\.attention\.bias"
,
]
]
_keys_to_ignore_on_save
=
[
r
"lm_head.weight"
]
_keys_to_ignore_on_save
=
[
r
"lm_head.weight"
]
...
@@ -812,7 +812,7 @@ class GPTNeoForCausalLM(GPTNeoPreTrainedModel):
...
@@ -812,7 +812,7 @@ class GPTNeoForCausalLM(GPTNeoPreTrainedModel):
GPT_NEO_START_DOCSTRING
,
GPT_NEO_START_DOCSTRING
,
)
)
class
GPTNeoForSequenceClassification
(
GPTNeoPreTrainedModel
):
class
GPTNeoForSequenceClassification
(
GPTNeoPreTrainedModel
):
_keys_to_ignore_on_load_missing
=
[
r
"h\.\d+\.attn\.masked_bias"
,
r
"lm_head
\
.weight"
]
_keys_to_ignore_on_load_missing
=
[
r
"h\.\d+\.attn\.masked_bias"
,
r
"lm_head.weight"
]
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
().
__init__
(
config
)
super
().
__init__
(
config
)
...
...
src/transformers/models/gptj/modeling_gptj.py
View file @
66f89332
...
@@ -890,7 +890,7 @@ class GPTJForCausalLM(GPTJPreTrainedModel):
...
@@ -890,7 +890,7 @@ class GPTJForCausalLM(GPTJPreTrainedModel):
GPTJ_START_DOCSTRING
,
GPTJ_START_DOCSTRING
,
)
)
class
GPTJForSequenceClassification
(
GPTJPreTrainedModel
):
class
GPTJForSequenceClassification
(
GPTJPreTrainedModel
):
_keys_to_ignore_on_load_missing
=
[
r
"h\.\d+\.attn\.masked_bias"
,
r
"h\.\d+\.attn\.bias"
,
r
"lm_head
\
.weight"
]
_keys_to_ignore_on_load_missing
=
[
r
"h\.\d+\.attn\.masked_bias"
,
r
"h\.\d+\.attn\.bias"
,
r
"lm_head.weight"
]
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
().
__init__
(
config
)
super
().
__init__
(
config
)
...
@@ -1017,7 +1017,7 @@ class GPTJForSequenceClassification(GPTJPreTrainedModel):
...
@@ -1017,7 +1017,7 @@ class GPTJForSequenceClassification(GPTJPreTrainedModel):
GPTJ_START_DOCSTRING
,
GPTJ_START_DOCSTRING
,
)
)
class
GPTJForQuestionAnswering
(
GPTJPreTrainedModel
):
class
GPTJForQuestionAnswering
(
GPTJPreTrainedModel
):
_keys_to_ignore_on_load_missing
=
[
r
"h\.\d+\.attn\.masked_bias"
,
r
"h\.\d+\.attn\.bias"
,
r
"lm_head
\
.weight"
]
_keys_to_ignore_on_load_missing
=
[
r
"h\.\d+\.attn\.masked_bias"
,
r
"h\.\d+\.attn\.bias"
,
r
"lm_head.weight"
]
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
().
__init__
(
config
)
super
().
__init__
(
config
)
...
...
src/transformers/models/imagegpt/modeling_imagegpt.py
View file @
66f89332
...
@@ -1087,7 +1087,7 @@ class ImageGPTForCausalImageModeling(ImageGPTPreTrainedModel):
...
@@ -1087,7 +1087,7 @@ class ImageGPTForCausalImageModeling(ImageGPTPreTrainedModel):
IMAGEGPT_START_DOCSTRING
,
IMAGEGPT_START_DOCSTRING
,
)
)
class
ImageGPTForImageClassification
(
ImageGPTPreTrainedModel
):
class
ImageGPTForImageClassification
(
ImageGPTPreTrainedModel
):
_keys_to_ignore_on_load_missing
=
[
r
"h\.\d+\.attn\.masked_bias"
,
r
"lm_head
\
.weight"
]
_keys_to_ignore_on_load_missing
=
[
r
"h\.\d+\.attn\.masked_bias"
,
r
"lm_head.weight"
]
def
__init__
(
self
,
config
:
ImageGPTConfig
):
def
__init__
(
self
,
config
:
ImageGPTConfig
):
super
().
__init__
(
config
)
super
().
__init__
(
config
)
...
...
src/transformers/models/led/modeling_led.py
View file @
66f89332
...
@@ -2298,9 +2298,9 @@ class LEDForConditionalGeneration(LEDPreTrainedModel):
...
@@ -2298,9 +2298,9 @@ class LEDForConditionalGeneration(LEDPreTrainedModel):
base_model_prefix
=
"led"
base_model_prefix
=
"led"
_keys_to_ignore_on_load_missing
=
[
_keys_to_ignore_on_load_missing
=
[
r
"final_logits_bias"
,
r
"final_logits_bias"
,
r
"encoder
\
.version"
,
r
"encoder.version"
,
r
"decoder
\
.version"
,
r
"decoder.version"
,
r
"lm_head
\
.weight"
,
r
"lm_head.weight"
,
]
]
def
__init__
(
self
,
config
:
LEDConfig
):
def
__init__
(
self
,
config
:
LEDConfig
):
...
...
src/transformers/models/longt5/modeling_longt5.py
View file @
66f89332
...
@@ -1721,11 +1721,11 @@ num_heads)`.
...
@@ -1721,11 +1721,11 @@ num_heads)`.
)
)
class
LongT5Model
(
LongT5PreTrainedModel
):
class
LongT5Model
(
LongT5PreTrainedModel
):
_keys_to_ignore_on_load_missing
=
[
_keys_to_ignore_on_load_missing
=
[
r
"encoder
\
.embed_tokens
\
.weight"
,
r
"encoder.embed_tokens.weight"
,
r
"decoder
\
.embed_tokens
\
.weight"
,
r
"decoder.embed_tokens.weight"
,
]
]
_keys_to_ignore_on_load_unexpected
=
[
_keys_to_ignore_on_load_unexpected
=
[
r
"decoder
\
.block
\
.0
\
.layer
\
.1
\
.EncDecAttention
\
.relative_attention_bias
\
.weight"
,
r
"decoder.block.0.layer.1.EncDecAttention.relative_attention_bias.weight"
,
]
]
def
__init__
(
self
,
config
:
LongT5Config
):
def
__init__
(
self
,
config
:
LongT5Config
):
...
@@ -1874,12 +1874,12 @@ class LongT5Model(LongT5PreTrainedModel):
...
@@ -1874,12 +1874,12 @@ class LongT5Model(LongT5PreTrainedModel):
@
add_start_docstrings
(
"""LONGT5 Model with a `language modeling` head on top."""
,
LONGT5_START_DOCSTRING
)
@
add_start_docstrings
(
"""LONGT5 Model with a `language modeling` head on top."""
,
LONGT5_START_DOCSTRING
)
class
LongT5ForConditionalGeneration
(
LongT5PreTrainedModel
):
class
LongT5ForConditionalGeneration
(
LongT5PreTrainedModel
):
_keys_to_ignore_on_load_missing
=
[
_keys_to_ignore_on_load_missing
=
[
r
"encoder
\
.embed_tokens
\
.weight"
,
r
"encoder.embed_tokens.weight"
,
r
"decoder
\
.embed_tokens
\
.weight"
,
r
"decoder.embed_tokens.weight"
,
r
"lm_head
\
.weight"
,
r
"lm_head.weight"
,
]
]
_keys_to_ignore_on_load_unexpected
=
[
_keys_to_ignore_on_load_unexpected
=
[
r
"decoder
\
.block
\
.0
\
.layer
\
.1
\
.EncDecAttention
\
.relative_attention_bias
\
.weight"
,
r
"decoder.block.0.layer.1.EncDecAttention.relative_attention_bias.weight"
,
]
]
def
__init__
(
self
,
config
:
LongT5Config
):
def
__init__
(
self
,
config
:
LongT5Config
):
...
@@ -2116,7 +2116,7 @@ class LongT5ForConditionalGeneration(LongT5PreTrainedModel):
...
@@ -2116,7 +2116,7 @@ class LongT5ForConditionalGeneration(LongT5PreTrainedModel):
)
)
class
LongT5EncoderModel
(
LongT5PreTrainedModel
):
class
LongT5EncoderModel
(
LongT5PreTrainedModel
):
authorized_missing_keys
=
[
authorized_missing_keys
=
[
r
"encoder
\
.embed_tokens
\
.weight"
,
r
"encoder.embed_tokens.weight"
,
]
]
def
__init__
(
self
,
config
:
LongT5Config
):
def
__init__
(
self
,
config
:
LongT5Config
):
...
...
src/transformers/models/m2m_100/modeling_m2m_100.py
View file @
66f89332
...
@@ -1241,9 +1241,9 @@ class M2M100Model(M2M100PreTrainedModel):
...
@@ -1241,9 +1241,9 @@ class M2M100Model(M2M100PreTrainedModel):
class
M2M100ForConditionalGeneration
(
M2M100PreTrainedModel
):
class
M2M100ForConditionalGeneration
(
M2M100PreTrainedModel
):
base_model_prefix
=
"model"
base_model_prefix
=
"model"
_keys_to_ignore_on_load_missing
=
[
_keys_to_ignore_on_load_missing
=
[
r
"encoder
\
.version"
,
r
"encoder.version"
,
r
"decoder
\
.version"
,
r
"decoder.version"
,
r
"lm_head
\
.weight"
,
r
"lm_head.weight"
,
r
"model.encoder.embed_positions.weights"
,
r
"model.encoder.embed_positions.weights"
,
r
"model.decoder.embed_positions.weights"
,
r
"model.decoder.embed_positions.weights"
,
]
]
...
...
src/transformers/models/marian/modeling_marian.py
View file @
66f89332
...
@@ -1272,9 +1272,9 @@ class MarianMTModel(MarianPreTrainedModel):
...
@@ -1272,9 +1272,9 @@ class MarianMTModel(MarianPreTrainedModel):
base_model_prefix
=
"model"
base_model_prefix
=
"model"
_keys_to_ignore_on_load_missing
=
[
_keys_to_ignore_on_load_missing
=
[
r
"final_logits_bias"
,
r
"final_logits_bias"
,
r
"encoder
\
.version"
,
r
"encoder.version"
,
r
"decoder
\
.version"
,
r
"decoder.version"
,
r
"lm_head
\
.weight"
,
r
"lm_head.weight"
,
r
"embed_positions"
,
r
"embed_positions"
,
]
]
...
...
src/transformers/models/mbart/modeling_mbart.py
View file @
66f89332
...
@@ -1263,9 +1263,9 @@ class MBartForConditionalGeneration(MBartPreTrainedModel):
...
@@ -1263,9 +1263,9 @@ class MBartForConditionalGeneration(MBartPreTrainedModel):
base_model_prefix
=
"model"
base_model_prefix
=
"model"
_keys_to_ignore_on_load_missing
=
[
_keys_to_ignore_on_load_missing
=
[
r
"final_logits_bias"
,
r
"final_logits_bias"
,
r
"encoder
\
.version"
,
r
"encoder.version"
,
r
"decoder
\
.version"
,
r
"decoder.version"
,
r
"lm_head
\
.weight"
,
r
"lm_head.weight"
,
]
]
def
__init__
(
self
,
config
:
MBartConfig
):
def
__init__
(
self
,
config
:
MBartConfig
):
...
...
src/transformers/models/mt5/modeling_mt5.py
View file @
66f89332
...
@@ -49,13 +49,13 @@ class MT5Model(T5Model):
...
@@ -49,13 +49,13 @@ class MT5Model(T5Model):
model_type
=
"mt5"
model_type
=
"mt5"
config_class
=
MT5Config
config_class
=
MT5Config
_keys_to_ignore_on_load_missing
=
[
_keys_to_ignore_on_load_missing
=
[
r
"encoder
\
.embed_tokens
\
.weight"
,
r
"encoder.embed_tokens.weight"
,
r
"decoder
\
.embed_tokens
\
.weight"
,
r
"decoder.embed_tokens.weight"
,
r
"decoder
\
.block
\
.0
\
.layer
\
.1
\
.EncDecAttention
\
.relative_attention_bias
\
.weight"
,
r
"decoder.block.0.layer.1.EncDecAttention.relative_attention_bias.weight"
,
]
]
_keys_to_ignore_on_save
=
[
_keys_to_ignore_on_save
=
[
r
"encoder
\
.embed_tokens
\
.weight"
,
r
"encoder.embed_tokens.weight"
,
r
"decoder
\
.embed_tokens
\
.weight"
,
r
"decoder.embed_tokens.weight"
,
]
]
...
@@ -84,10 +84,10 @@ class MT5ForConditionalGeneration(T5ForConditionalGeneration):
...
@@ -84,10 +84,10 @@ class MT5ForConditionalGeneration(T5ForConditionalGeneration):
model_type
=
"mt5"
model_type
=
"mt5"
config_class
=
MT5Config
config_class
=
MT5Config
_keys_to_ignore_on_load_missing
=
[
_keys_to_ignore_on_load_missing
=
[
r
"encoder
\
.embed_tokens
\
.weight"
,
r
"encoder.embed_tokens.weight"
,
]
]
_keys_to_ignore_on_save
=
[
_keys_to_ignore_on_save
=
[
r
"encoder
\
.embed_tokens
\
.weight"
,
r
"encoder.embed_tokens.weight"
,
]
]
...
@@ -112,8 +112,8 @@ class MT5EncoderModel(T5EncoderModel):
...
@@ -112,8 +112,8 @@ class MT5EncoderModel(T5EncoderModel):
model_type
=
"mt5"
model_type
=
"mt5"
config_class
=
MT5Config
config_class
=
MT5Config
_keys_to_ignore_on_load_missing
=
[
_keys_to_ignore_on_load_missing
=
[
r
"encoder
\
.embed_tokens
\
.weight"
,
r
"encoder.embed_tokens.weight"
,
]
]
_keys_to_ignore_on_save
=
[
_keys_to_ignore_on_save
=
[
r
"encoder
\
.embed_tokens
\
.weight"
,
r
"encoder.embed_tokens.weight"
,
]
]
src/transformers/models/opt/modeling_opt.py
View file @
66f89332
...
@@ -382,7 +382,7 @@ class OPTPreTrainedModel(PreTrainedModel):
...
@@ -382,7 +382,7 @@ class OPTPreTrainedModel(PreTrainedModel):
base_model_prefix
=
"model"
base_model_prefix
=
"model"
supports_gradient_checkpointing
=
True
supports_gradient_checkpointing
=
True
_no_split_modules
=
[
"OPTDecoderLayer"
]
_no_split_modules
=
[
"OPTDecoderLayer"
]
_keys_to_ignore_on_load_unexpected
=
[
r
"decoder
\
.version"
]
_keys_to_ignore_on_load_unexpected
=
[
r
"decoder.version"
]
def
_init_weights
(
self
,
module
):
def
_init_weights
(
self
,
module
):
std
=
self
.
config
.
init_std
std
=
self
.
config
.
init_std
...
@@ -780,7 +780,7 @@ class OPTModel(OPTPreTrainedModel):
...
@@ -780,7 +780,7 @@ class OPTModel(OPTPreTrainedModel):
class
OPTForCausalLM
(
OPTPreTrainedModel
):
class
OPTForCausalLM
(
OPTPreTrainedModel
):
_keys_to_ignore_on_load_missing
=
[
r
"lm_head
\
.weight"
]
_keys_to_ignore_on_load_missing
=
[
r
"lm_head.weight"
]
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
().
__init__
(
config
)
super
().
__init__
(
config
)
...
...
src/transformers/models/pegasus/modeling_pegasus.py
View file @
66f89332
...
@@ -1290,10 +1290,10 @@ class PegasusForConditionalGeneration(PegasusPreTrainedModel):
...
@@ -1290,10 +1290,10 @@ class PegasusForConditionalGeneration(PegasusPreTrainedModel):
base_model_prefix
=
"model"
base_model_prefix
=
"model"
_keys_to_ignore_on_load_missing
=
[
_keys_to_ignore_on_load_missing
=
[
r
"final_logits_bias"
,
r
"final_logits_bias"
,
r
"encoder
\
.version"
,
r
"encoder.version"
,
r
"decoder
\
.version"
,
r
"decoder.version"
,
r
"lm_head
\
.weight"
,
r
"lm_head.weight"
,
r
"embed_positions
\
.weight"
,
r
"embed_positions.weight"
,
]
]
def
__init__
(
self
,
config
:
PegasusConfig
):
def
__init__
(
self
,
config
:
PegasusConfig
):
...
...
src/transformers/models/plbart/modeling_plbart.py
View file @
66f89332
...
@@ -1235,9 +1235,9 @@ class PLBartForConditionalGeneration(PLBartPreTrainedModel):
...
@@ -1235,9 +1235,9 @@ class PLBartForConditionalGeneration(PLBartPreTrainedModel):
base_model_prefix
=
"model"
base_model_prefix
=
"model"
_keys_to_ignore_on_load_missing
=
[
_keys_to_ignore_on_load_missing
=
[
r
"final_logits_bias"
,
r
"final_logits_bias"
,
r
"encoder
\
.version"
,
r
"encoder.version"
,
r
"decoder
\
.version"
,
r
"decoder.version"
,
r
"lm_head
\
.weight"
,
r
"lm_head.weight"
,
]
]
def
__init__
(
self
,
config
:
PLBartConfig
):
def
__init__
(
self
,
config
:
PLBartConfig
):
...
...
src/transformers/models/roformer/modeling_roformer.py
View file @
66f89332
...
@@ -699,8 +699,8 @@ class RoFormerPreTrainedModel(PreTrainedModel):
...
@@ -699,8 +699,8 @@ class RoFormerPreTrainedModel(PreTrainedModel):
supports_gradient_checkpointing
=
True
supports_gradient_checkpointing
=
True
_keys_to_ignore_on_load_missing
=
[]
_keys_to_ignore_on_load_missing
=
[]
_keys_to_ignore_on_load_unexpected
=
[
_keys_to_ignore_on_load_unexpected
=
[
r
"roformer
\
.embeddings_project
\
.weight"
,
r
"roformer.embeddings_project.weight"
,
r
"roformer
\
.embeddings_project
\
.bias"
,
r
"roformer.embeddings_project.bias"
,
]
]
def
_init_weights
(
self
,
module
):
def
_init_weights
(
self
,
module
):
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment