#!/bin/bash #SBATCH --job-name=transformer #SBATCH --nodes=1 #SBATCH --ntasks-per-node=8 #SBATCH --partition=kshdnormal01 #SBATCH --time=96:00:00 #SBATCH --gres=dcu:4 #SBATCH --exclusive # load the environment source activate pyth # run python python --version export HIP_LAUNCH_BLOCKING=1 # export ROCBLAS_LAYER=3 # export ROCBLAS_COMPUTETYPE_FP16R=1 # mpirun --allow-run-as-root -np 4 -H localhost:4 \ # export MIOPEN_DEBUG_DISABLE_FIND_DB=1 # export NCCL_SOCKET_IFNAME=eno1 # export HSA_USERPTR_FOR_PAGED_MEM=0 # export HIP_LAUNCH_BLOCKING=1 #mpirun --allow-run-as-root -np 4 -H localhost:4 bash /pytorch/run_enwik8_base_dp.sh train