run_bert_squad_4dcus.sh 435 Bytes
Newer Older
yangzhong's avatar
yangzhong committed
1
2
3
4
5
6
7
8
9
10
11
12
13
#!/usr/bin/env bash
HOME_PATH=/public/home/hepj
WORK_PATH=${HOME_PATH}/bert-pytorch/2node-run-squad
which python3
source env.sh
hostfile=./hostfile
np=$(cat $hostfile|sort|uniq |wc -l)
np=$(($np*8))

nodename=$(cat $hostfile |sed -n "1p")
dist_url=`echo $nodename | awk '{print $1}'`
mpirun -np ${np} --hostfile hostfile-$SLURM_JOB_ID  --bind-to none --mca btl_tcp_if_include $dist_url ${WORK_PATH}/2nodes_single_process.sh $dist_url