Commit 5b7736ca authored by Mostofa Patwary's avatar Mostofa Patwary
Browse files

addressing more comments

parent 07ffa1b7
...@@ -362,7 +362,8 @@ class TransformerLanguageModel(MegatronModule): ...@@ -362,7 +362,8 @@ class TransformerLanguageModel(MegatronModule):
self._embedding_key = 'embedding' self._embedding_key = 'embedding'
# Rotary positional embeddings # Rotary positional embeddings
self.use_rotary_position_embeddings = False self.use_rotary_position_embeddings = \
args.use_rotary_position_embeddings
if args.use_rotary_position_embeddings: if args.use_rotary_position_embeddings:
self.seq_length = args.seq_length self.seq_length = args.seq_length
rotary_dim = args.hidden_size // args.num_attention_heads \ rotary_dim = args.hidden_size // args.num_attention_heads \
...@@ -375,8 +376,6 @@ class TransformerLanguageModel(MegatronModule): ...@@ -375,8 +376,6 @@ class TransformerLanguageModel(MegatronModule):
# Wang and Komatsuzaki et al # Wang and Komatsuzaki et al
# https://github.com/kingoflolz/mesh-transformer-jax/ # https://github.com/kingoflolz/mesh-transformer-jax/
self.rotary_pos_emb = RotaryEmbedding(rotary_dim) self.rotary_pos_emb = RotaryEmbedding(rotary_dim)
self.use_rotary_position_embeddings = \
args.use_rotary_position_embeddings
# Retriever (bi-directional transformer with cross attention) # Retriever (bi-directional transformer with cross attention)
if args.retro_add_retriever: if args.retro_add_retriever:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment