Commit 01dffdb5 authored by Vijay Korthikanti's avatar Vijay Korthikanti
Browse files

typo fix

parent 0bc75448
...@@ -218,7 +218,7 @@ class ParallelSelfAttention(MegatronModule): ...@@ -218,7 +218,7 @@ class ParallelSelfAttention(MegatronModule):
if checkpoint_version == 0: if checkpoint_version == 0:
# [s, b, (3 * np * hn)] --> [s, b, (np * 3 * hn)] # [s, b, (3 * np * hn)] --> [s, b, (np * 3 * hn)]
mixed_x_layer = self._transpose_last_dim(mixed_x_layer, 3, True) mixed_x_layer = self._transpose_last_dim(mixed_x_layer, 3, True)
elif checkpoint_version == 1: elif checkpoint_version == 1.0:
# [s, b, (np * hn * 3)] --> [s, b, (np * 3 * hn)] # [s, b, (np * hn * 3)] --> [s, b, (np * 3 * hn)]
mixed_x_layer = self._transpose_last_dim(mixed_x_layer, 3, False) mixed_x_layer = self._transpose_last_dim(mixed_x_layer, 3, False)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment