"src/vscode:/vscode.git/clone" did not exist on "800739361421dc559230faaec25ab6363f9ebf26"
Commit 834d6dd5 authored by Vijay Korthikanti's avatar Vijay Korthikanti
Browse files

minor fixes

parent 4b506832
......@@ -595,7 +595,7 @@ class TransformerLanguageModelLastStage(TransformerLanguageModelBase):
return super(TransformerLanguageModelLastStage, self).forward(
hidden_states,
enc_attention_mask,
dec_language_input=(dec_input_ids, dec_position_ids),
dec_language_model_input=(dec_input_ids, dec_position_ids),
dec_attn_mask=dec_attn_mask,
enc_dec_attn_mask=enc_dec_attn_mask,
layer_past=layer_past,
......
......@@ -673,6 +673,9 @@ class ParallelTransformer(MegatronModule):
else:
hidden_states = hidden_states.transpose(0, 1).contiguous()
if encoder_output is not None:
encoder_output = encoder_output.transpose(0, 1).contiguous()
if self.checkpoint_activations:
hidden_states = self._checkpointed_forward(hidden_states,
attention_mask,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment