mpi.slurm 897 Bytes
Newer Older
Sugon_ldc's avatar
Sugon_ldc committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#!/bin/bash
#SBATCH -J test
#SBATCH -p wzhdexclu03
#SBATCH -N 1
##SBATCH -n 32
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=32
#SBATCH --gres=dcu:1
source ~/miniconda3/etc/profile.d/conda.sh
conda activate torch1.10-dtk22.10-py38

#conda activate base
module purge 
module load compiler/devtoolset/7.3.1  mpi/hpcx/gcc-7.3.1  compiler/dtk/23.04
module list

python -u  driver.py #需要运行的程序

export LD_LIBRARY_PATH=${UCX_HOME}/lib:$LD_LIBRARY_PATH
env > env_$SLURM_JOBID

node_list=(`nodeset -e ${SLURM_NODELIST}`)
master_node=${node_list[0]}

for((i=0;i<${SLURM_NNODES};i++))
do
    echo ${node_list[$i]} slots=1 >> hostfile-$SLURM_JOB_ID
done


echo mpirun -np ${SLURM_NNODES} --hostfile hostfile-$SLURM_JOB_ID ./single_process.sh ${SLURM_NNODES} ${master_node}

mpirun -np ${SLURM_NNODES} --hostfile hostfile-$SLURM_JOB_ID ./single_process.sh ${SLURM_NNODES} ${master_node}