Unverified Commit bd3db7f4 authored by Ning Xie's avatar Ning Xie Committed by GitHub
Browse files

[Misc] log more detailed message for ensure_model_parallel_initialized (#22144)


Signed-off-by: default avatarAndy Xie <andy.xning@gmail.com>
parent 29b97c09
......@@ -1125,14 +1125,14 @@ def ensure_model_parallel_initialized(
assert (
get_tensor_model_parallel_world_size() == tensor_model_parallel_size
), ("tensor parallel group already initialized, but of unexpected size: "
f"{get_tensor_model_parallel_world_size()=} vs. "
f"{tensor_model_parallel_size=}")
), ("tensor parallel group already initialized, but of unexpected size. "
f"got: {get_tensor_model_parallel_world_size()=} vs. "
f"wanted: {tensor_model_parallel_size=}")
pp_world_size = get_pp_group().world_size
assert (pp_world_size == pipeline_model_parallel_size), (
"pipeline parallel group already initialized, but of unexpected size: "
f"{pp_world_size=} vs. "
f"{pipeline_model_parallel_size=}")
"pipeline parallel group already initialized, but of unexpected size. "
f"got: {pp_world_size=} vs. "
f"wanted: {pipeline_model_parallel_size=}")
def prepare_communication_buffer_for_model(model: torch.nn.Module):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment