ulimit -u 200000 export OMP_NUM_THREADS=1 export NCCL_DEBUG=INFO export MIOPEN_FIND_MODE=3 export HSA_FORCE_FINE_GRAIN_PCIE=1 export MIOPEN_COMPILE_PARALLEL_LEVEL=1 export NCCL_PLUGIN_P2P=ucx export NCCL_SOCKET_IFNAME=ib0 export NCCL_P2P_LEVEL=5 export NCCL_NET_PLUGIN=none echo "START TIME: $(date)" hostfile=./hostfile np=$(cat $hostfile|sort|uniq |wc -l) np=$(($np*8)) nodename=$(cat $hostfile |sed -n "1p") dist_url=`echo $nodename | awk '{print $1}'` which mpirun mpirun -np $np --allow-run-as-root --hostfile hostfile --bind-to none --mca btl_tcp_if_include $dist_url run_train_single.sh echo "END TIME: $(date)"