#!/bin/bash #SBATCH -J test #SBATCH -p wzhdexclu03 #SBATCH -N 1 ##SBATCH -n 32 #SBATCH --ntasks-per-node=1 #SBATCH --cpus-per-task=32 #SBATCH --gres=dcu:1 source ~/miniconda3/etc/profile.d/conda.sh conda activate torch1.10-dtk22.10-py38 #conda activate base module purge module load compiler/devtoolset/7.3.1 mpi/hpcx/gcc-7.3.1 compiler/dtk/23.04 module list python -u driver.py #需要运行的程序 export LD_LIBRARY_PATH=${UCX_HOME}/lib:$LD_LIBRARY_PATH env > env_$SLURM_JOBID node_list=(`nodeset -e ${SLURM_NODELIST}`) master_node=${node_list[0]} for((i=0;i<${SLURM_NNODES};i++)) do echo ${node_list[$i]} slots=1 >> hostfile-$SLURM_JOB_ID done echo mpirun -np ${SLURM_NNODES} --hostfile hostfile-$SLURM_JOB_ID ./single_process.sh ${SLURM_NNODES} ${master_node} mpirun -np ${SLURM_NNODES} --hostfile hostfile-$SLURM_JOB_ID ./single_process.sh ${SLURM_NNODES} ${master_node}