run_en_base_dp.slurm 639 Bytes
Newer Older
yongshk's avatar
add new  
yongshk committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
#!/bin/bash
#SBATCH --job-name=transformer
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=8
#SBATCH --partition=kshdnormal01
#SBATCH --time=96:00:00
#SBATCH --gres=dcu:4
#SBATCH --exclusive

# load the environment
source activate pyth

# run python
python --version
export HIP_LAUNCH_BLOCKING=1
# export ROCBLAS_LAYER=3
# export ROCBLAS_COMPUTETYPE_FP16R=1
# mpirun --allow-run-as-root -np 4 -H localhost:4 \
# export MIOPEN_DEBUG_DISABLE_FIND_DB=1
# export NCCL_SOCKET_IFNAME=eno1
# export HSA_USERPTR_FOR_PAGED_MEM=0
# export HIP_LAUNCH_BLOCKING=1
#mpirun --allow-run-as-root -np 4 -H localhost:4 
bash /pytorch/run_enwik8_base_dp.sh train