"...resources/git@developer.sourcefind.cn:wangsen/mineru.git" did not exist on "41d96cd89acbe5fa77a9ac87516ff1a4c9adb384"
Unverified Commit 66f89332 authored by Stas Bekman's avatar Stas Bekman Committed by GitHub
Browse files

normalize keys_to_ignore (#17722)

parent c3c62b5d
...@@ -497,7 +497,7 @@ class BartPretrainedModel(PreTrainedModel): ...@@ -497,7 +497,7 @@ class BartPretrainedModel(PreTrainedModel):
config_class = BartConfig config_class = BartConfig
base_model_prefix = "model" base_model_prefix = "model"
supports_gradient_checkpointing = True supports_gradient_checkpointing = True
_keys_to_ignore_on_load_unexpected = [r"encoder\.version", r"decoder\.version"] _keys_to_ignore_on_load_unexpected = [r"encoder.version", r"decoder.version"]
def _init_weights(self, module): def _init_weights(self, module):
std = self.config.init_std std = self.config.init_std
...@@ -1272,7 +1272,7 @@ class BartModel(BartPretrainedModel): ...@@ -1272,7 +1272,7 @@ class BartModel(BartPretrainedModel):
) )
class BartForConditionalGeneration(BartPretrainedModel): class BartForConditionalGeneration(BartPretrainedModel):
base_model_prefix = "model" base_model_prefix = "model"
_keys_to_ignore_on_load_missing = [r"final_logits_bias", r"lm_head\.weight"] _keys_to_ignore_on_load_missing = [r"final_logits_bias", r"lm_head.weight"]
def __init__(self, config: BartConfig): def __init__(self, config: BartConfig):
super().__init__(config) super().__init__(config)
......
...@@ -2476,7 +2476,7 @@ class BigBirdPegasusModel(BigBirdPegasusPreTrainedModel): ...@@ -2476,7 +2476,7 @@ class BigBirdPegasusModel(BigBirdPegasusPreTrainedModel):
# Copied from transformers.models.bart.modeling_bart.BartForConditionalGeneration with Bart->BigBirdPegasus, BART->BIGBIRD_PEGASUS # Copied from transformers.models.bart.modeling_bart.BartForConditionalGeneration with Bart->BigBirdPegasus, BART->BIGBIRD_PEGASUS
class BigBirdPegasusForConditionalGeneration(BigBirdPegasusPreTrainedModel): class BigBirdPegasusForConditionalGeneration(BigBirdPegasusPreTrainedModel):
base_model_prefix = "model" base_model_prefix = "model"
_keys_to_ignore_on_load_missing = [r"final_logits_bias", r"lm_head\.weight"] _keys_to_ignore_on_load_missing = [r"final_logits_bias", r"lm_head.weight"]
def __init__(self, config: BigBirdPegasusConfig): def __init__(self, config: BigBirdPegasusConfig):
super().__init__(config) super().__init__(config)
......
...@@ -1226,9 +1226,9 @@ class BlenderbotForConditionalGeneration(BlenderbotPreTrainedModel): ...@@ -1226,9 +1226,9 @@ class BlenderbotForConditionalGeneration(BlenderbotPreTrainedModel):
base_model_prefix = "model" base_model_prefix = "model"
_keys_to_ignore_on_load_missing = [ _keys_to_ignore_on_load_missing = [
r"final_logits_bias", r"final_logits_bias",
r"encoder\.version", r"encoder.version",
r"decoder\.version", r"decoder.version",
r"lm_head\.weight", r"lm_head.weight",
] ]
def __init__(self, config: BlenderbotConfig): def __init__(self, config: BlenderbotConfig):
......
...@@ -1208,9 +1208,9 @@ class BlenderbotSmallForConditionalGeneration(BlenderbotSmallPreTrainedModel): ...@@ -1208,9 +1208,9 @@ class BlenderbotSmallForConditionalGeneration(BlenderbotSmallPreTrainedModel):
base_model_prefix = "model" base_model_prefix = "model"
_keys_to_ignore_on_load_missing = [ _keys_to_ignore_on_load_missing = [
r"final_logits_bias", r"final_logits_bias",
r"encoder\.version", r"encoder.version",
r"decoder\.version", r"decoder.version",
r"lm_head\.weight", r"lm_head.weight",
] ]
def __init__(self, config: BlenderbotSmallConfig): def __init__(self, config: BlenderbotSmallConfig):
......
...@@ -251,7 +251,7 @@ class ConvBertPreTrainedModel(PreTrainedModel): ...@@ -251,7 +251,7 @@ class ConvBertPreTrainedModel(PreTrainedModel):
base_model_prefix = "convbert" base_model_prefix = "convbert"
supports_gradient_checkpointing = True supports_gradient_checkpointing = True
authorized_missing_keys = [r"position_ids"] authorized_missing_keys = [r"position_ids"]
authorized_unexpected_keys = [r"convbert\.embeddings_project\.weight", r"convbert\.embeddings_project\.bias"] authorized_unexpected_keys = [r"convbert.embeddings_project.weight", r"convbert.embeddings_project.bias"]
def _init_weights(self, module): def _init_weights(self, module):
"""Initialize the weights""" """Initialize the weights"""
......
...@@ -670,7 +670,7 @@ class ElectraPreTrainedModel(PreTrainedModel): ...@@ -670,7 +670,7 @@ class ElectraPreTrainedModel(PreTrainedModel):
base_model_prefix = "electra" base_model_prefix = "electra"
supports_gradient_checkpointing = True supports_gradient_checkpointing = True
_keys_to_ignore_on_load_missing = [r"position_ids"] _keys_to_ignore_on_load_missing = [r"position_ids"]
_keys_to_ignore_on_load_unexpected = [r"electra\.embeddings_project\.weight", r"electra\.embeddings_project\.bias"] _keys_to_ignore_on_load_unexpected = [r"electra.embeddings_project.weight", r"electra.embeddings_project.bias"]
# Copied from transformers.models.bert.modeling_bert.BertPreTrainedModel._init_weights # Copied from transformers.models.bert.modeling_bert.BertPreTrainedModel._init_weights
def _init_weights(self, module): def _init_weights(self, module):
......
...@@ -1328,7 +1328,7 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel): ...@@ -1328,7 +1328,7 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel):
GPT2_START_DOCSTRING, GPT2_START_DOCSTRING,
) )
class GPT2ForSequenceClassification(GPT2PreTrainedModel): class GPT2ForSequenceClassification(GPT2PreTrainedModel):
_keys_to_ignore_on_load_missing = [r"h\.\d+\.attn\.masked_bias", r"lm_head\.weight"] _keys_to_ignore_on_load_missing = [r"h\.\d+\.attn\.masked_bias", r"lm_head.weight"]
def __init__(self, config): def __init__(self, config):
super().__init__(config) super().__init__(config)
......
...@@ -661,7 +661,7 @@ class GPTNeoModel(GPTNeoPreTrainedModel): ...@@ -661,7 +661,7 @@ class GPTNeoModel(GPTNeoPreTrainedModel):
class GPTNeoForCausalLM(GPTNeoPreTrainedModel): class GPTNeoForCausalLM(GPTNeoPreTrainedModel):
_keys_to_ignore_on_load_missing = [ _keys_to_ignore_on_load_missing = [
r"h\.\d+\.attn\.masked_bias", r"h\.\d+\.attn\.masked_bias",
r"lm_head\.weight", r"lm_head.weight",
r"h\.\d+\.attn\.attention\.bias", r"h\.\d+\.attn\.attention\.bias",
] ]
_keys_to_ignore_on_save = [r"lm_head.weight"] _keys_to_ignore_on_save = [r"lm_head.weight"]
...@@ -812,7 +812,7 @@ class GPTNeoForCausalLM(GPTNeoPreTrainedModel): ...@@ -812,7 +812,7 @@ class GPTNeoForCausalLM(GPTNeoPreTrainedModel):
GPT_NEO_START_DOCSTRING, GPT_NEO_START_DOCSTRING,
) )
class GPTNeoForSequenceClassification(GPTNeoPreTrainedModel): class GPTNeoForSequenceClassification(GPTNeoPreTrainedModel):
_keys_to_ignore_on_load_missing = [r"h\.\d+\.attn\.masked_bias", r"lm_head\.weight"] _keys_to_ignore_on_load_missing = [r"h\.\d+\.attn\.masked_bias", r"lm_head.weight"]
def __init__(self, config): def __init__(self, config):
super().__init__(config) super().__init__(config)
......
...@@ -890,7 +890,7 @@ class GPTJForCausalLM(GPTJPreTrainedModel): ...@@ -890,7 +890,7 @@ class GPTJForCausalLM(GPTJPreTrainedModel):
GPTJ_START_DOCSTRING, GPTJ_START_DOCSTRING,
) )
class GPTJForSequenceClassification(GPTJPreTrainedModel): class GPTJForSequenceClassification(GPTJPreTrainedModel):
_keys_to_ignore_on_load_missing = [r"h\.\d+\.attn\.masked_bias", r"h\.\d+\.attn\.bias", r"lm_head\.weight"] _keys_to_ignore_on_load_missing = [r"h\.\d+\.attn\.masked_bias", r"h\.\d+\.attn\.bias", r"lm_head.weight"]
def __init__(self, config): def __init__(self, config):
super().__init__(config) super().__init__(config)
...@@ -1017,7 +1017,7 @@ class GPTJForSequenceClassification(GPTJPreTrainedModel): ...@@ -1017,7 +1017,7 @@ class GPTJForSequenceClassification(GPTJPreTrainedModel):
GPTJ_START_DOCSTRING, GPTJ_START_DOCSTRING,
) )
class GPTJForQuestionAnswering(GPTJPreTrainedModel): class GPTJForQuestionAnswering(GPTJPreTrainedModel):
_keys_to_ignore_on_load_missing = [r"h\.\d+\.attn\.masked_bias", r"h\.\d+\.attn\.bias", r"lm_head\.weight"] _keys_to_ignore_on_load_missing = [r"h\.\d+\.attn\.masked_bias", r"h\.\d+\.attn\.bias", r"lm_head.weight"]
def __init__(self, config): def __init__(self, config):
super().__init__(config) super().__init__(config)
......
...@@ -1087,7 +1087,7 @@ class ImageGPTForCausalImageModeling(ImageGPTPreTrainedModel): ...@@ -1087,7 +1087,7 @@ class ImageGPTForCausalImageModeling(ImageGPTPreTrainedModel):
IMAGEGPT_START_DOCSTRING, IMAGEGPT_START_DOCSTRING,
) )
class ImageGPTForImageClassification(ImageGPTPreTrainedModel): class ImageGPTForImageClassification(ImageGPTPreTrainedModel):
_keys_to_ignore_on_load_missing = [r"h\.\d+\.attn\.masked_bias", r"lm_head\.weight"] _keys_to_ignore_on_load_missing = [r"h\.\d+\.attn\.masked_bias", r"lm_head.weight"]
def __init__(self, config: ImageGPTConfig): def __init__(self, config: ImageGPTConfig):
super().__init__(config) super().__init__(config)
......
...@@ -2298,9 +2298,9 @@ class LEDForConditionalGeneration(LEDPreTrainedModel): ...@@ -2298,9 +2298,9 @@ class LEDForConditionalGeneration(LEDPreTrainedModel):
base_model_prefix = "led" base_model_prefix = "led"
_keys_to_ignore_on_load_missing = [ _keys_to_ignore_on_load_missing = [
r"final_logits_bias", r"final_logits_bias",
r"encoder\.version", r"encoder.version",
r"decoder\.version", r"decoder.version",
r"lm_head\.weight", r"lm_head.weight",
] ]
def __init__(self, config: LEDConfig): def __init__(self, config: LEDConfig):
......
...@@ -1721,11 +1721,11 @@ num_heads)`. ...@@ -1721,11 +1721,11 @@ num_heads)`.
) )
class LongT5Model(LongT5PreTrainedModel): class LongT5Model(LongT5PreTrainedModel):
_keys_to_ignore_on_load_missing = [ _keys_to_ignore_on_load_missing = [
r"encoder\.embed_tokens\.weight", r"encoder.embed_tokens.weight",
r"decoder\.embed_tokens\.weight", r"decoder.embed_tokens.weight",
] ]
_keys_to_ignore_on_load_unexpected = [ _keys_to_ignore_on_load_unexpected = [
r"decoder\.block\.0\.layer\.1\.EncDecAttention\.relative_attention_bias\.weight", r"decoder.block.0.layer.1.EncDecAttention.relative_attention_bias.weight",
] ]
def __init__(self, config: LongT5Config): def __init__(self, config: LongT5Config):
...@@ -1874,12 +1874,12 @@ class LongT5Model(LongT5PreTrainedModel): ...@@ -1874,12 +1874,12 @@ class LongT5Model(LongT5PreTrainedModel):
@add_start_docstrings("""LONGT5 Model with a `language modeling` head on top.""", LONGT5_START_DOCSTRING) @add_start_docstrings("""LONGT5 Model with a `language modeling` head on top.""", LONGT5_START_DOCSTRING)
class LongT5ForConditionalGeneration(LongT5PreTrainedModel): class LongT5ForConditionalGeneration(LongT5PreTrainedModel):
_keys_to_ignore_on_load_missing = [ _keys_to_ignore_on_load_missing = [
r"encoder\.embed_tokens\.weight", r"encoder.embed_tokens.weight",
r"decoder\.embed_tokens\.weight", r"decoder.embed_tokens.weight",
r"lm_head\.weight", r"lm_head.weight",
] ]
_keys_to_ignore_on_load_unexpected = [ _keys_to_ignore_on_load_unexpected = [
r"decoder\.block\.0\.layer\.1\.EncDecAttention\.relative_attention_bias\.weight", r"decoder.block.0.layer.1.EncDecAttention.relative_attention_bias.weight",
] ]
def __init__(self, config: LongT5Config): def __init__(self, config: LongT5Config):
...@@ -2116,7 +2116,7 @@ class LongT5ForConditionalGeneration(LongT5PreTrainedModel): ...@@ -2116,7 +2116,7 @@ class LongT5ForConditionalGeneration(LongT5PreTrainedModel):
) )
class LongT5EncoderModel(LongT5PreTrainedModel): class LongT5EncoderModel(LongT5PreTrainedModel):
authorized_missing_keys = [ authorized_missing_keys = [
r"encoder\.embed_tokens\.weight", r"encoder.embed_tokens.weight",
] ]
def __init__(self, config: LongT5Config): def __init__(self, config: LongT5Config):
......
...@@ -1241,9 +1241,9 @@ class M2M100Model(M2M100PreTrainedModel): ...@@ -1241,9 +1241,9 @@ class M2M100Model(M2M100PreTrainedModel):
class M2M100ForConditionalGeneration(M2M100PreTrainedModel): class M2M100ForConditionalGeneration(M2M100PreTrainedModel):
base_model_prefix = "model" base_model_prefix = "model"
_keys_to_ignore_on_load_missing = [ _keys_to_ignore_on_load_missing = [
r"encoder\.version", r"encoder.version",
r"decoder\.version", r"decoder.version",
r"lm_head\.weight", r"lm_head.weight",
r"model.encoder.embed_positions.weights", r"model.encoder.embed_positions.weights",
r"model.decoder.embed_positions.weights", r"model.decoder.embed_positions.weights",
] ]
......
...@@ -1272,9 +1272,9 @@ class MarianMTModel(MarianPreTrainedModel): ...@@ -1272,9 +1272,9 @@ class MarianMTModel(MarianPreTrainedModel):
base_model_prefix = "model" base_model_prefix = "model"
_keys_to_ignore_on_load_missing = [ _keys_to_ignore_on_load_missing = [
r"final_logits_bias", r"final_logits_bias",
r"encoder\.version", r"encoder.version",
r"decoder\.version", r"decoder.version",
r"lm_head\.weight", r"lm_head.weight",
r"embed_positions", r"embed_positions",
] ]
......
...@@ -1263,9 +1263,9 @@ class MBartForConditionalGeneration(MBartPreTrainedModel): ...@@ -1263,9 +1263,9 @@ class MBartForConditionalGeneration(MBartPreTrainedModel):
base_model_prefix = "model" base_model_prefix = "model"
_keys_to_ignore_on_load_missing = [ _keys_to_ignore_on_load_missing = [
r"final_logits_bias", r"final_logits_bias",
r"encoder\.version", r"encoder.version",
r"decoder\.version", r"decoder.version",
r"lm_head\.weight", r"lm_head.weight",
] ]
def __init__(self, config: MBartConfig): def __init__(self, config: MBartConfig):
......
...@@ -49,13 +49,13 @@ class MT5Model(T5Model): ...@@ -49,13 +49,13 @@ class MT5Model(T5Model):
model_type = "mt5" model_type = "mt5"
config_class = MT5Config config_class = MT5Config
_keys_to_ignore_on_load_missing = [ _keys_to_ignore_on_load_missing = [
r"encoder\.embed_tokens\.weight", r"encoder.embed_tokens.weight",
r"decoder\.embed_tokens\.weight", r"decoder.embed_tokens.weight",
r"decoder\.block\.0\.layer\.1\.EncDecAttention\.relative_attention_bias\.weight", r"decoder.block.0.layer.1.EncDecAttention.relative_attention_bias.weight",
] ]
_keys_to_ignore_on_save = [ _keys_to_ignore_on_save = [
r"encoder\.embed_tokens\.weight", r"encoder.embed_tokens.weight",
r"decoder\.embed_tokens\.weight", r"decoder.embed_tokens.weight",
] ]
...@@ -84,10 +84,10 @@ class MT5ForConditionalGeneration(T5ForConditionalGeneration): ...@@ -84,10 +84,10 @@ class MT5ForConditionalGeneration(T5ForConditionalGeneration):
model_type = "mt5" model_type = "mt5"
config_class = MT5Config config_class = MT5Config
_keys_to_ignore_on_load_missing = [ _keys_to_ignore_on_load_missing = [
r"encoder\.embed_tokens\.weight", r"encoder.embed_tokens.weight",
] ]
_keys_to_ignore_on_save = [ _keys_to_ignore_on_save = [
r"encoder\.embed_tokens\.weight", r"encoder.embed_tokens.weight",
] ]
...@@ -112,8 +112,8 @@ class MT5EncoderModel(T5EncoderModel): ...@@ -112,8 +112,8 @@ class MT5EncoderModel(T5EncoderModel):
model_type = "mt5" model_type = "mt5"
config_class = MT5Config config_class = MT5Config
_keys_to_ignore_on_load_missing = [ _keys_to_ignore_on_load_missing = [
r"encoder\.embed_tokens\.weight", r"encoder.embed_tokens.weight",
] ]
_keys_to_ignore_on_save = [ _keys_to_ignore_on_save = [
r"encoder\.embed_tokens\.weight", r"encoder.embed_tokens.weight",
] ]
...@@ -382,7 +382,7 @@ class OPTPreTrainedModel(PreTrainedModel): ...@@ -382,7 +382,7 @@ class OPTPreTrainedModel(PreTrainedModel):
base_model_prefix = "model" base_model_prefix = "model"
supports_gradient_checkpointing = True supports_gradient_checkpointing = True
_no_split_modules = ["OPTDecoderLayer"] _no_split_modules = ["OPTDecoderLayer"]
_keys_to_ignore_on_load_unexpected = [r"decoder\.version"] _keys_to_ignore_on_load_unexpected = [r"decoder.version"]
def _init_weights(self, module): def _init_weights(self, module):
std = self.config.init_std std = self.config.init_std
...@@ -780,7 +780,7 @@ class OPTModel(OPTPreTrainedModel): ...@@ -780,7 +780,7 @@ class OPTModel(OPTPreTrainedModel):
class OPTForCausalLM(OPTPreTrainedModel): class OPTForCausalLM(OPTPreTrainedModel):
_keys_to_ignore_on_load_missing = [r"lm_head\.weight"] _keys_to_ignore_on_load_missing = [r"lm_head.weight"]
def __init__(self, config): def __init__(self, config):
super().__init__(config) super().__init__(config)
......
...@@ -1290,10 +1290,10 @@ class PegasusForConditionalGeneration(PegasusPreTrainedModel): ...@@ -1290,10 +1290,10 @@ class PegasusForConditionalGeneration(PegasusPreTrainedModel):
base_model_prefix = "model" base_model_prefix = "model"
_keys_to_ignore_on_load_missing = [ _keys_to_ignore_on_load_missing = [
r"final_logits_bias", r"final_logits_bias",
r"encoder\.version", r"encoder.version",
r"decoder\.version", r"decoder.version",
r"lm_head\.weight", r"lm_head.weight",
r"embed_positions\.weight", r"embed_positions.weight",
] ]
def __init__(self, config: PegasusConfig): def __init__(self, config: PegasusConfig):
......
...@@ -1235,9 +1235,9 @@ class PLBartForConditionalGeneration(PLBartPreTrainedModel): ...@@ -1235,9 +1235,9 @@ class PLBartForConditionalGeneration(PLBartPreTrainedModel):
base_model_prefix = "model" base_model_prefix = "model"
_keys_to_ignore_on_load_missing = [ _keys_to_ignore_on_load_missing = [
r"final_logits_bias", r"final_logits_bias",
r"encoder\.version", r"encoder.version",
r"decoder\.version", r"decoder.version",
r"lm_head\.weight", r"lm_head.weight",
] ]
def __init__(self, config: PLBartConfig): def __init__(self, config: PLBartConfig):
......
...@@ -699,8 +699,8 @@ class RoFormerPreTrainedModel(PreTrainedModel): ...@@ -699,8 +699,8 @@ class RoFormerPreTrainedModel(PreTrainedModel):
supports_gradient_checkpointing = True supports_gradient_checkpointing = True
_keys_to_ignore_on_load_missing = [] _keys_to_ignore_on_load_missing = []
_keys_to_ignore_on_load_unexpected = [ _keys_to_ignore_on_load_unexpected = [
r"roformer\.embeddings_project\.weight", r"roformer.embeddings_project.weight",
r"roformer\.embeddings_project\.bias", r"roformer.embeddings_project.bias",
] ]
def _init_weights(self, module): def _init_weights(self, module):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment