setup_docker.sh 1.67 KB
Newer Older
Jeff Rasley's avatar
Jeff Rasley committed
1
2
3
4
5
6
7
#!/bin/bash

azure_config=azure_config.json
if [ ! -f ${azure_config} ]; then
    echo "Cannot find $azure_config"
    exit 1
fi
8
9
location=`cat ${azure_config} | jq .location | sed 's/"//g'`
rg=deepspeed_rg_$location
Jeff Rasley's avatar
Jeff Rasley committed
10

11
12
13
14
15
16
17
parallel=true
command -v pdsh
if [ $? != 0 ]; then
    echo "Installing pdsh will allow for the docker pull to be done in parallel across the cluster. See: 'apt-get install pdsh'"
    parallel=false
fi

Jeff Rasley's avatar
Jeff Rasley committed
18
19
20
21
22
23
24
ssh_key=`cat ${azure_config} | jq .ssh_private_key | sed 's/"//g'`
if [ $ssh_key == "null" ]; then echo 'missing ssh_private_key in config'; exit 1; fi
num_vms=`cat ${azure_config} | jq .num_vms`
if [ $num_vms == "null" ]; then echo 'missing num_vms in config'; exit 1; fi

args="-i ${ssh_key} -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null"
username=deepspeed
25
26
27
28
29
30
31
32
33
34
35
36
37
38

update_script="
docker pull deepspeed/deepspeed:latest;
ln -s workdir/DeepSpeed/azure/attach.sh attach.sh;
cd workdir/DeepSpeed;
git pull;
git submodule update --init --recursive;
bash azure/start_container.sh;
"

if [ $parallel == true ]; then
    echo "parallel docker pull"
    hosts=""
    for node_id in {0..1}; do
39
        addr=`az vm list-ip-addresses  -g $rg | jq .[${node_id}].virtualMachine.network.publicIpAddresses[0].ipAddress | sed 's/"//g'`
40
41
        hosts="${addr},${hosts}"
    done
42
     PDSH_RCMD_TYPE=ssh  PDSH_SSH_ARGS_APPEND=${args} pdsh -w $hosts -l ${username} $update_script
43
44
45
else
    echo "sequential docker pull"
    for node_id in `seq 0 $((num_vms - 1))`; do
46
        ip_addr=`az vm list-ip-addresses  -g $rg | jq .[${node_id}].virtualMachine.network.publicIpAddresses[0].ipAddress | sed 's/"//g'`
47
48
49
50
        addr=${username}@${ip_addr}
        ssh ${args} $addr $update_script
    done
fi