[M2M100] update conversion script (#17916)

9eec4e93 · Suraj Patil · GitHub · db2644b9 · 9eec4e93
Unverified Commit 9eec4e93 authored Jun 28, 2022 by Suraj Patil Committed by GitHub Jun 28, 2022
Show whitespace changes
Inline Side-by-side

Showing with 2 additions and 2 deletions

src/transformers/models/m2m_100/convert_m2m100_original_checkpoint_to_pytorch.py .../m2m_100/convert_m2m100_original_checkpoint_to_pytorch.py +2 -2

No files found.
--- a/src/transformers/models/m2m_100/convert_m2m100_original_checkpoint_to_pytorch.py
+++ b/src/transformers/models/m2m_100/convert_m2m100_original_checkpoint_to_pytorch.py
@@ -44,7 +44,7 @@ def make_linear_from_emb(emb):
 def convert_fairseq_m2m100_checkpoint_from_disk(checkpoint_path):
    m2m_100 = torch.load(checkpoint_path, map_location="cpu")
-    args = m2m_100["args"]
+    args = m2m_100["args"] or m2m_100["cfg"]["model"]
    state_dict = m2m_100["model"]
    remove_ignore_keys_(state_dict)
    vocab_size = state_dict["encoder.embed_tokens.weight"].shape[0]
@@ -69,7 +69,7 @@ def convert_fairseq_m2m100_checkpoint_from_disk(checkpoint_path):
    state_dict["shared.weight"] = state_dict["decoder.embed_tokens.weight"]
    model = M2M100ForConditionalGeneration(config)
-    model.model.load_state_dict(state_dict)
+    model.model.load_state_dict(state_dict, strict=False)
    model.lm_head = make_linear_from_emb(model.model.shared)
    return model