Unverified Commit e02a8065 authored by dumpmemory's avatar dumpmemory Committed by GitHub
Browse files

make opt checkpoint dir name correct (#21660)

make opt checkpoint dir name corrent following https://github.com/huggingface/Megatron-LM/blob/100b522bb8044d98413398f9e71563af15b83325/megatron/checkpointing.py#L117
parent 7f919509
...@@ -690,9 +690,9 @@ def convert_checkpoint_from_transformers_to_megatron(args): ...@@ -690,9 +690,9 @@ def convert_checkpoint_from_transformers_to_megatron(args):
for j in range(args.target_tensor_model_parallel_size): for j in range(args.target_tensor_model_parallel_size):
for k in range(args.target_data_parallel_size): for k in range(args.target_data_parallel_size):
if args.target_pipeline_model_parallel_size == 1: if args.target_pipeline_model_parallel_size == 1:
checkpoint_dir = f"mp_rank_{i:02d}_{k:03d}" checkpoint_dir = f"mp_rank_{j:02d}_{i:03d}"
else: else:
checkpoint_dir = f"mp_rank_{i:02d}_{j:03d}_{k:03d}" checkpoint_dir = f"mp_rank_{j:02d}_{i:03d}_{k:03d}"
checkpoint_dir = os.path.join(release_dir, checkpoint_dir) checkpoint_dir = os.path.join(release_dir, checkpoint_dir)
os.makedirs(checkpoint_dir, exist_ok=True) os.makedirs(checkpoint_dir, exist_ok=True)
torch.save( torch.save(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment