Unverified Commit bb6a664e authored by Yoshinari Fujinuma's avatar Yoshinari Fujinuma Committed by GitHub
Browse files

Fix multi-gpu training error for LayoutLMv2 (#21675)


Co-authored-by: default avatarYoshinari Fujinuma <fujinuy@amazon.com>
parent a8eb4f79
...@@ -604,7 +604,7 @@ class LayoutLMv2VisualBackbone(nn.Module): ...@@ -604,7 +604,7 @@ class LayoutLMv2VisualBackbone(nn.Module):
self_rank = torch.distributed.get_rank() self_rank = torch.distributed.get_rank()
node_size = torch.cuda.device_count() node_size = torch.cuda.device_count()
world_size = torch.distributed.get_world_size() world_size = torch.distributed.get_world_size()
if not (world_size & node_size == 0): if not (world_size % node_size == 0):
raise RuntimeError("Make sure the number of processes can be divided by the number of nodes") raise RuntimeError("Make sure the number of processes can be divided by the number of nodes")
node_global_ranks = [list(range(i * node_size, (i + 1) * node_size)) for i in range(world_size // node_size)] node_global_ranks = [list(range(i * node_size, (i + 1) * node_size)) for i in range(world_size // node_size)]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment