Commit 66f7336c authored by dongcl's avatar dongcl
Browse files

bug fix

parent 65f7a409
Pipeline #2576 passed with stage
...@@ -174,7 +174,7 @@ class GPTModel(LanguageModule): ...@@ -174,7 +174,7 @@ class GPTModel(LanguageModule):
# add mtp # add mtp
self.mtp_spec: ModuleSpec = mtp_spec self.mtp_spec: ModuleSpec = mtp_spec
self.num_nextn_predict_layers = num_nextn_predict_layers self.num_nextn_predict_layers = num_nextn_predict_layers
self.share_mtp_embedding_and_output_weight = share_mtp_embedding_and_output_weight self.share_mtp_embedding_and_output_weight = share_mtp_embedding_and_output_weight and self.num_nextn_predict_layers > 0
self.recompute_mtp_norm = recompute_mtp_norm self.recompute_mtp_norm = recompute_mtp_norm
self.recompute_mtp_layer = recompute_mtp_layer self.recompute_mtp_layer = recompute_mtp_layer
self.mtp_loss_scale = mtp_loss_scale self.mtp_loss_scale = mtp_loss_scale
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment