Merge branch 'untie_embeddings' of...

Merge branch 'untie_embeddings' of https://gitlab-master.nvidia.com/ADLR/megatron-lm into untie_embeddings

Merge branch 'untie_embeddings' of...
Merge branch 'untie_embeddings' of https://gitlab-master.nvidia.com/ADLR/megatron-lm into untie_embeddings
1f8bc276 · MaximumEntropy · 96f4c5d2 · 6b50a8c6 · 1f8bc276
Commit 1f8bc276 authored Apr 03, 2023 by MaximumEntropy
Hide whitespace changes
Inline Side-by-side

Showing with 1 addition and 0 deletions

megatron/model/language_model.py megatron/model/language_model.py +1 -0

No files found.
--- a/megatron/model/language_model.py
+++ b/megatron/model/language_model.py
@@ -328,6 +328,7 @@ class TransformerLanguageModel(MegatronModule):
                 post_process=True):
        args = get_args()
        # TODO: passing share_word_embeddings=False will not work correctly for T5 and embeddings will not be synced. Fix later for T5.
+        if args.untie_embeddings_and_output_weights: assert not add_decoder
        super(TransformerLanguageModel, self).__init__(share_word_embeddings=not args.untie_embeddings_and_output_weights)

        self.pre_process = pre_process