install.sh 4.49 KB
Newer Older
Jeff Rasley's avatar
Jeff Rasley committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
#!/bin/bash

set -e
err_report() {
    echo "Error on line $1"
    echo "Fail to install deepspeed"
}
trap 'err_report $LINENO' ERR

usage() {
  echo """
Usage: install.sh [options...]

By default will install deepspeed and all third party dependecies accross all machines listed in
hostfile (hostfile: /job/hostfile). If no hostfile exists, will only install locally

[optional]
    -d, --deepspeed_only    Install only deepspeed and no third party dependencies
    -t, --third_party_only  Install only third party dependencies and not deepspeed
    -l, --local_only        Installs only on local machine
21
    -H, --hostfile          Path to MPI-style hostfile (default: /job/hostfile)
Jeff Rasley's avatar
Jeff Rasley committed
22
23
24
25
26
27
28
29
30
31
    -h, --help              This help text
  """
}

ds_only=0
tp_only=0
deepspeed_install=1
third_party_install=1
local_only=0
entire_dlts_job=1
32
hostfile=/job/hostfile
Jeff Rasley's avatar
Jeff Rasley committed
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53

while [[ $# -gt 0 ]]
do
key="$1"
case $key in
    -d|--deepspeed_only)
    deepspeed_install=1;
    third_party_install=0;
    ds_only=1;
    shift
    ;;
    -t|--third_party_only)
    deepspeed_install=0;
    third_party_install=1;
    tp_only=1;
    shift
    ;;
    -l|--local_only)
    local_only=1;
    shift
    ;;
54
55
56
57
58
59
60
61
62
    -H|--hostfile)
    hostfile=$2
    if [ ! -f $2 ]; then
        echo "User provided hostfile does not exist at $hostfile, exiting"
        exit 1
    fi
    shift
    shift
    ;;
Jeff Rasley's avatar
Jeff Rasley committed
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
    -h|--help)
    usage
    exit 0
    ;;
    *)
    echo "Unkown argument(s)"
    usage
    exit 1
    shift
    ;;
esac
done

if [ "$ds_only" == "1" ] && [ "$tp_only" == "1" ]; then
    echo "-d and -t are mutually exclusive, only choose one or none"
    usage
    exit 1
fi

echo "Updating git hash/branch info"
echo "git_hash = '$(git rev-parse --short HEAD)'" > deepspeed/version_info.py
echo "git_branch = '$(git rev-parse --abbrev-ref HEAD)'" >> deepspeed/version_info.py
cat deepspeed/version_info.py

install_apex='sudo -H pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" third_party/apex'

89
90
if [ ! -f $hostfile ]; then
        echo "No hostfile exists at $hostfile, installing locally"
Jeff Rasley's avatar
Jeff Rasley committed
91
92
93
        local_only=1
fi

94
95
96
# Ensure dependencies are installed locally
sudo -H pip install -r requirements.txt

97
98
99
100
101
102
103
104
105
106
107
108
109
110
# Build wheels
if [ "$third_party_install" == "1" ]; then
    echo "Checking out sub-module(s)"
    git submodule update --init --recursive

    echo "Building apex wheel"
    cd third_party/apex
    python setup.py --cpp_ext --cuda_ext bdist_wheel
    cd -
fi
if [ "$deepspeed_install" == "1" ]; then
    echo "Installing deepspeed"
    python setup.py bdist_wheel
fi
Jeff Rasley's avatar
Jeff Rasley committed
111
112


113
114
if [ "$local_only" == "1" ]; then
    if [ "$third_party_install" == "1" ]; then
Jeff Rasley's avatar
Jeff Rasley committed
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
        echo "Installing apex"
        sudo -H pip uninstall -y apex
        sudo -H pip install third_party/apex/dist/apex*.whl
    fi
    if [ "$deepspeed_install" == "1" ]; then
        echo "Installing deepspeed"
        sudo -H pip uninstall -y deepspeed
        sudo -H pip install dist/deepspeed*.whl
        python -c 'import deepspeed; print("deepspeed info:", deepspeed.__version__, deepspeed.__git_branch__, deepspeed.__git_hash__)'
        echo "Installation is successful"
    fi
else
    local_path=`pwd`
    if [ -f $hostfile ]; then
        hosts=`cat $hostfile | awk '{print $1}' | paste -sd "," -`;
    else
        echo "hostfile not found, cannot proceed"
        exit 1
    fi
    export PDSH_RCMD_TYPE=ssh;
135
    tmp_wheel_path="/tmp/deepspeed_wheels"
Jeff Rasley's avatar
Jeff Rasley committed
136

137
    pdsh -w $hosts "if [ -d $tmp_wheel_path ]; then rm $tmp_wheel_path/*.whl; else mkdir -pv $tmp_wheel_path; fi"
138
139
    pdcp -w $hosts requirements.txt ${tmp_wheel_path}/
    pdsh -w $hosts "sudo -H pip install -r ${tmp_wheel_path}/requirements.txt"
Jeff Rasley's avatar
Jeff Rasley committed
140
141
    if [ "$third_party_install" == "1" ]; then
        pdsh -w $hosts "sudo -H pip uninstall -y apex"
142
143
        pdcp -w $hosts third_party/apex/dist/apex*.whl $tmp_wheel_path/
        pdsh -w $hosts "sudo -H pip install $tmp_wheel_path/apex*.whl"
Jeff Rasley's avatar
Jeff Rasley committed
144
145
146
147
148
        pdsh -w $hosts 'python -c "import apex"'
    fi
    if [ "$deepspeed_install" == "1" ]; then
        echo "Installing deepspeed"
        pdsh -w $hosts "sudo -H pip uninstall -y deepspeed"
149
150
        pdcp -w $hosts dist/deepspeed*.whl $tmp_wheel_path/
        pdsh -w $hosts "sudo -H pip install $tmp_wheel_path/deepspeed*.whl"
Jeff Rasley's avatar
Jeff Rasley committed
151
152
153
        pdsh -w $hosts "python -c 'import deepspeed; print(\"deepspeed info:\", deepspeed.__version__, deepspeed.__git_branch__, deepspeed.__git_hash__)'"
        echo "Installation is successful"
    fi
154
    pdsh -w $hosts "if [ -d $tmp_wheel_path ]; then rm $tmp_wheel_path/*.whl $tmp_wheel_path/requirements.txt; rmdir $tmp_wheel_path; fi"
Jeff Rasley's avatar
Jeff Rasley committed
155
fi