"vscode:/vscode.git/clone" did not exist on "06bcfbd6295b0aa0b4a63b6bd6731c0995f0802d"
run_pretrain.sh 453 Bytes
Newer Older
hepj987's avatar
hepj987 committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
#!/bin/bash

#set -x
hostfile=./hostfile
num_node=$(cat $hostfile|sort|uniq |wc -l)
 
num_gpu=$(($num_node*4))
nodename=$(cat $hostfile |sed -n "1p")
dist_url=`echo $nodename | awk '{print $1}'`
echo $dist_url
rm `pwd`/hostfile-dl -f
cat $hostfile|sort|uniq >`pwd`/tmp
 
for i in `cat ./tmp`
do
    echo ${i} slots=4 >> `pwd`/hostfile-dl
done

mpirun -np ${num_gpu} --hostfile `pwd`/hostfile-dl --bind-to none `pwd`/single_process_pretrain.sh $dist_url