Unverified Commit 6855ba1c authored by Jeff Rasley's avatar Jeff Rasley Committed by GitHub
Browse files

Update fan out flag for pdsh (#315)

* update fan out flag for pdsh
parent 3437342c
......@@ -21,6 +21,7 @@ DLTS_HOSTFILE = "/job/hostfile"
EXPORT_ENVS = ["NCCL", "PYTHON"]
DEEPSPEED_ENVIRONMENT_NAME = ".deepspeed_env"
DEEPSPEED_ENVIRONMENT_PATHS = [os.path.expanduser("~"), '.']
PDSH_MAX_FAN_OUT = 1024
def parse_args(args=None):
......@@ -294,7 +295,9 @@ def main(args=None):
active_workers = ",".join(active_resources.keys())
logger.info("Running on the following workers: %s" % active_workers)
pdsh_cmd_args = ['pdsh', '-w', active_workers]
# PDSH flags for max node fan out and specific hosts to launch on
# See https://linux.die.net/man/1/pdsh for flag details
pdsh_cmd_args = ['pdsh', '-f', str(PDSH_MAX_FAN_OUT), '-w', active_workers]
num_nodes = len(active_resources.keys())
num_gpus_per_node = None
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment