run_multi_onenode.sh 535 Bytes
Newer Older
Sugon_ldc's avatar
Sugon_ldc committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
#!/bin/bash
#SBATCH -p wzhdexclu03
#SBATCH -N 1
#SBATCH --cpus-per-task=1
#SBATCH --ntasks-per-node=32
#SBATCH --gres=dcu:4
#SBATCH -J onenode_4dcu
#SBATCH -o logs/pt-%j.out
#SBATCH -e logs/pt-%j.err

source ~/miniconda3/etc/profile.d/conda.sh
conda activate torch1.10-dtk22.10-py38

#conda activate base
module purge 
module load compiler/devtoolset/7.3.1  mpi/hpcx/gcc-7.3.1  compiler/dtk/23.04
module list


export HIP_VISIBLE_DEVICES=0,1,2,3
python3 -m torch.distributed.run --nproc_per_node 4 driver.py #需要运行的程序