ulimit -u 200000 echo "START TIME: $(date)" hostfile=./hostfile np=$(cat $hostfile|sort|uniq |wc -l) np=$(($np*8)) echo $np nodename=$(cat $hostfile |sed -n "1p") dist_url=`echo $nodename | awk '{print $1}'` which mpirun # 添加pythonlib环境, 用户需修改为自己的环境变量地址 export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/path/of/conda/envs/{env_name}/lib export PYTHON=python3 export NPROC_PER_NODE=4 # 使用mpirun 进行多卡训练,分别对单张卡进行单线程启动 # -np 显卡数量 # -x 将变量传递到single_process.sh脚本中 mpirun -np $np --allow-run-as-root --hostfile hostfile --bind-to none -x dist_url -x PYTHON -x NPROC_PER_NODE `pwd`/single_process.sh echo "END TIME: $(date)"