install.sh 3.98 KB
Newer Older
Jeff Rasley's avatar
Jeff Rasley committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
#!/bin/bash

set -e
err_report() {
    echo "Error on line $1"
    echo "Fail to install deepspeed"
}
trap 'err_report $LINENO' ERR

usage() {
  echo """
Usage: install.sh [options...]

By default will install deepspeed and all third party dependecies accross all machines listed in
hostfile (hostfile: /job/hostfile). If no hostfile exists, will only install locally

[optional]
18
    -l, --local_only        Install only on local machine
19
20
21
    -s, --pip_sudo          Run pip install with sudo (default: no sudo)
    -r, --allow_sudo        Allow script to be run by root (probably don't want this, instead use --pip_sudo)
    -n, --no_clean          Do not clean prior build state, by default prior build files are removed before building wheels
22
    -m, --pip_mirror        Use the specified pip mirror (default: the default pip mirror)
23
    -H, --hostfile          Path to MPI-style hostfile (default: /job/hostfile)
24
    -v, --verbose           Verbose logging
Jeff Rasley's avatar
Jeff Rasley committed
25
26
27
28
29
30
31
32
33
    -h, --help              This help text
  """
}

ds_only=0
tp_only=0
deepspeed_install=1
third_party_install=1
local_only=0
34
pip_sudo=0
Jeff Rasley's avatar
Jeff Rasley committed
35
entire_dlts_job=1
36
hostfile=/job/hostfile
37
pip_mirror=""
38
apex_commit=""
Jeff Rasley's avatar
Jeff Rasley committed
39
skip_requirements=0
40
41
allow_sudo=0
no_clean=0
42
verbose=0
Jeff Rasley's avatar
Jeff Rasley committed
43
44
45
46
47

while [[ $# -gt 0 ]]
do
key="$1"
case $key in
48
49
50
51
52
53
54
55
56
    -s|--pip_sudo)
    pip_sudo=1;
    shift
    ;;
    -m|--pip_mirror)
    pip_mirror=$2;
    shift
    shift
    ;;
57
58
    -v|--verbose)
    verbose=1;
Jeff Rasley's avatar
Jeff Rasley committed
59
60
    shift
    ;;
61
62
63
64
65
66
67
68
    -r|--allow_sudo)
    allow_sudo=1;
    shift
    ;;
    -n|--no_clean)
    no_clean=1;
    shift
    ;;
69
70
71
72
73
74
75
76
77
    -H|--hostfile)
    hostfile=$2
    if [ ! -f $2 ]; then
        echo "User provided hostfile does not exist at $hostfile, exiting"
        exit 1
    fi
    shift
    shift
    ;;
Jeff Rasley's avatar
Jeff Rasley committed
78
79
80
81
82
83
84
85
86
87
88
89
90
    -h|--help)
    usage
    exit 0
    ;;
    *)
    echo "Unkown argument(s)"
    usage
    exit 1
    shift
    ;;
esac
done

91
92
93
94
95
96
97
98
99
user=`whoami`
if [ "$allow_sudo" == "0" ]; then
    if [ "$user" == "root" ]; then
        echo "WARNING: running as root, if you want to install DeepSpeed with sudo please use -s/--pip_sudo instead"
        usage
        exit 1
    fi
fi

Jeff Rasley's avatar
Jeff Rasley committed
100
101
102
103
104
105
if [ "$ds_only" == "1" ] && [ "$tp_only" == "1" ]; then
    echo "-d and -t are mutually exclusive, only choose one or none"
    usage
    exit 1
fi

106
107
108
109
110
111
if [ "$verbose" == "1" ]; then
    VERBOSE="-v"
else
    VERBOSE=""
fi

112
113
114
rm_if_exist() {
    echo "Attempting to remove $1"
    if [ -f $1 ]; then
115
        rm $VERBOSE $1
116
    elif [ -d $1 ]; then
117
        rm -r $VERBOSE $1
118
119
120
121
122
    fi
}

if [ "$no_clean" == "0" ]; then
    # remove deepspeed build files
123
    rm_if_exist deepspeed/git_version_info_installed.py
124
125
126
127
128
    rm_if_exist dist
    rm_if_exist build
    rm_if_exist deepspeed.egg-info
fi

129
if [ "$pip_sudo" == "1" ]; then
130
    PIP_SUDO="sudo -H"
131
else
132
    PIP_SUDO=""
133
134
135
fi

if [ "$pip_mirror" != "" ]; then
136
    PIP_INSTALL="pip install $VERBOSE -i $pip_mirror"
137
else
138
    PIP_INSTALL="pip install $VERBOSE"
139
140
fi

141

142
if [ ! -f $hostfile ]; then
143
144
    echo "No hostfile exists at $hostfile, installing locally"
    local_only=1
Jeff Rasley's avatar
Jeff Rasley committed
145
146
fi

147
148
echo "Building deepspeed wheel"
python setup.py $VERBOSE bdist_wheel
Jeff Rasley's avatar
Jeff Rasley committed
149

150
if [ "$local_only" == "1" ]; then
151
152
153
154
    echo "Installing deepspeed"
    $PIP_SUDO pip uninstall -y deepspeed
    $PIP_SUDO $PIP_INSTALL dist/deepspeed*.whl
    ds_report
Jeff Rasley's avatar
Jeff Rasley committed
155
156
157
158
159
160
161
162
else
    local_path=`pwd`
    if [ -f $hostfile ]; then
        hosts=`cat $hostfile | awk '{print $1}' | paste -sd "," -`;
    else
        echo "hostfile not found, cannot proceed"
        exit 1
    fi
163
    export PDSH_RCMD_TYPE=ssh
164
    tmp_wheel_path="/tmp/deepspeed_wheels"
Jeff Rasley's avatar
Jeff Rasley committed
165

166
    pdsh -w $hosts "if [ -d $tmp_wheel_path ]; then rm $tmp_wheel_path/*.whl; else mkdir -pv $tmp_wheel_path; fi"
Jeff Rasley's avatar
Jeff Rasley committed
167
    pdcp -w $hosts requirements/requirements.txt ${tmp_wheel_path}/
168
169
170
171
172
173
174

    echo "Installing deepspeed"
    pdsh -w $hosts "$PIP_SUDO pip uninstall -y deepspeed"
    pdcp -w $hosts dist/deepspeed*.whl $tmp_wheel_path/
    pdsh -w $hosts "$PIP_SUDO $PIP_INSTALL $tmp_wheel_path/deepspeed*.whl"
    pdsh -w $hosts "ds_report"
    pdsh -w $hosts "if [ -d $tmp_wheel_path ]; then rm $tmp_wheel_path/*.whl; rmdir $tmp_wheel_path; fi"
Jeff Rasley's avatar
Jeff Rasley committed
175
fi