Commit bccfa7d0 authored by Myle Ott's avatar Myle Ott Committed by Facebook Github Bot
Browse files

Add fallback for SLURM config

Summary: Pull Request resolved: https://github.com/fairinternal/fairseq-py/pull/752

Differential Revision: D16417582

Pulled By: myleott

fbshipit-source-id: 6b4289febcf9290452bb91f1f2181a02c09c82a7
parent 47fd9852
...@@ -48,7 +48,14 @@ def infer_init_method(args): ...@@ -48,7 +48,14 @@ def infer_init_method(args):
port=args.distributed_port, port=args.distributed_port,
) )
nnodes = int(os.environ.get('SLURM_NNODES')) nnodes = int(os.environ.get('SLURM_NNODES'))
ntasks_per_node = int(os.environ.get('SLURM_NTASKS_PER_NODE')) ntasks_per_node = os.environ.get('SLURM_NTASKS_PER_NODE')
if ntasks_per_node is not None:
ntasks_per_node = int(ntasks_per_node)
else:
ntasks = int(os.environ.get('SLURM_NTASKS'))
nnodes = int(os.environ.get('SLURM_NNODES'))
assert ntasks % nnodes == 0
ntasks_per_node = int(ntasks / nnodes)
if ntasks_per_node == 1: if ntasks_per_node == 1:
assert args.distributed_world_size % nnodes == 0 assert args.distributed_world_size % nnodes == 0
gpus_per_node = args.distributed_world_size // nnodes gpus_per_node = args.distributed_world_size // nnodes
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment