Unverified Commit 21946a8c authored by SeongBeomLEE's avatar SeongBeomLEE Committed by GitHub
Browse files

[fix] Change the condition of ValueError in...

[fix] Change the condition of ValueError in "convert_checkpoint_from_transformers_to_megatron" (#24769)

* fix: half inference error

norm_factor is still torch.float32 after using model.half

So I changed it to register_buffer so I can change it to torch.float16 after using model.half

* fix: Added a variable "persistent=False"

* run make style

* [fix] Change the condition of ValueError
convert_checkpoint_from_transformers_to_megatron

* [fix] error wording
layers -> attention heads
parent 1f6f32c2
......@@ -741,11 +741,18 @@ def convert_checkpoint_from_transformers_to_megatron(args):
# Transformer layers
print("converting transformer layers")
if config.num_hidden_layers % args.target_tensor_model_parallel_size != 0:
if config.num_attention_heads % args.target_tensor_model_parallel_size != 0:
raise ValueError(
f"Number of layers ({config.num_hidden_layers}) must be divisible by number of tensor parallelism"
f"Number of attention heads ({config.num_attention_heads}) must be divisible by number of tensor parallelism"
f" ({args.target_tensor_model_parallel_size})"
)
if config.num_hidden_layers % args.target_pipeline_model_parallel_size != 0:
raise ValueError(
f"Number of layers ({config.num_hidden_layers}) must be divisible by number of pipeline parallelism"
f" ({args.target_pipeline_model_parallel_size})"
)
num_layers = config.num_hidden_layers // args.target_pipeline_model_parallel_size
layer_re = re.compile(r"transformer.h\.(\d+)\.([a-z0-9_.]+)\.([a-z]+)")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment