install.sh 4.66 KB
Newer Older
Jeff Rasley's avatar
Jeff Rasley committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
#!/bin/bash

set -e
err_report() {
    echo "Error on line $1"
    echo "Fail to install deepspeed"
}
trap 'err_report $LINENO' ERR

usage() {
  echo """
Usage: install.sh [options...]

By default will install deepspeed and all third party dependecies accross all machines listed in
hostfile (hostfile: /job/hostfile). If no hostfile exists, will only install locally

[optional]
    -d, --deepspeed_only    Install only deepspeed and no third party dependencies
    -t, --third_party_only  Install only third party dependencies and not deepspeed
    -l, --local_only        Installs only on local machine
21
    -H, --hostfile          Path to MPI-style hostfile (default: /job/hostfile)
Jeff Rasley's avatar
Jeff Rasley committed
22
23
24
25
26
27
28
29
30
31
    -h, --help              This help text
  """
}

ds_only=0
tp_only=0
deepspeed_install=1
third_party_install=1
local_only=0
entire_dlts_job=1
32
hostfile=/job/hostfile
Jeff Rasley's avatar
Jeff Rasley committed
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53

while [[ $# -gt 0 ]]
do
key="$1"
case $key in
    -d|--deepspeed_only)
    deepspeed_install=1;
    third_party_install=0;
    ds_only=1;
    shift
    ;;
    -t|--third_party_only)
    deepspeed_install=0;
    third_party_install=1;
    tp_only=1;
    shift
    ;;
    -l|--local_only)
    local_only=1;
    shift
    ;;
54
55
56
57
58
59
60
61
62
    -H|--hostfile)
    hostfile=$2
    if [ ! -f $2 ]; then
        echo "User provided hostfile does not exist at $hostfile, exiting"
        exit 1
    fi
    shift
    shift
    ;;
Jeff Rasley's avatar
Jeff Rasley committed
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
    -h|--help)
    usage
    exit 0
    ;;
    *)
    echo "Unkown argument(s)"
    usage
    exit 1
    shift
    ;;
esac
done

if [ "$ds_only" == "1" ] && [ "$tp_only" == "1" ]; then
    echo "-d and -t are mutually exclusive, only choose one or none"
    usage
    exit 1
fi

echo "Updating git hash/branch info"
83
84
85
echo "git_hash = '$(git rev-parse --short HEAD)'" > deepspeed/git_version_info.py
echo "git_branch = '$(git rev-parse --abbrev-ref HEAD)'" >> deepspeed/git_version_info.py
cat deepspeed/git_version_info.py
Jeff Rasley's avatar
Jeff Rasley committed
86
87
88

install_apex='sudo -H pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" third_party/apex'

89
90
if [ ! -f $hostfile ]; then
        echo "No hostfile exists at $hostfile, installing locally"
Jeff Rasley's avatar
Jeff Rasley committed
91
92
93
        local_only=1
fi

94
95
96
# Ensure dependencies are installed locally
sudo -H pip install -r requirements.txt

97
98
99
100
101
102
103
104
105
# Build wheels
if [ "$third_party_install" == "1" ]; then
    echo "Checking out sub-module(s)"
    git submodule update --init --recursive

    echo "Building apex wheel"
    cd third_party/apex
    python setup.py --cpp_ext --cuda_ext bdist_wheel
    cd -
106
107
108
109

    echo "Installing apex locally so that deepspeed will build"
    sudo -H pip uninstall -y apex
    sudo -H pip install third_party/apex/dist/apex*.whl
110
111
112
113
114
fi
if [ "$deepspeed_install" == "1" ]; then
    echo "Installing deepspeed"
    python setup.py bdist_wheel
fi
Jeff Rasley's avatar
Jeff Rasley committed
115

116
117
if [ "$local_only" == "1" ]; then
    if [ "$third_party_install" == "1" ]; then
118
        echo "Installing apex locally"
Jeff Rasley's avatar
Jeff Rasley committed
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
        sudo -H pip uninstall -y apex
        sudo -H pip install third_party/apex/dist/apex*.whl
    fi
    if [ "$deepspeed_install" == "1" ]; then
        echo "Installing deepspeed"
        sudo -H pip uninstall -y deepspeed
        sudo -H pip install dist/deepspeed*.whl
        python -c 'import deepspeed; print("deepspeed info:", deepspeed.__version__, deepspeed.__git_branch__, deepspeed.__git_hash__)'
        echo "Installation is successful"
    fi
else
    local_path=`pwd`
    if [ -f $hostfile ]; then
        hosts=`cat $hostfile | awk '{print $1}' | paste -sd "," -`;
    else
        echo "hostfile not found, cannot proceed"
        exit 1
    fi
    export PDSH_RCMD_TYPE=ssh;
138
    tmp_wheel_path="/tmp/deepspeed_wheels"
Jeff Rasley's avatar
Jeff Rasley committed
139

140
    pdsh -w $hosts "if [ -d $tmp_wheel_path ]; then rm $tmp_wheel_path/*.whl; else mkdir -pv $tmp_wheel_path; fi"
141
142
    pdcp -w $hosts requirements.txt ${tmp_wheel_path}/
    pdsh -w $hosts "sudo -H pip install -r ${tmp_wheel_path}/requirements.txt"
Jeff Rasley's avatar
Jeff Rasley committed
143
144
    if [ "$third_party_install" == "1" ]; then
        pdsh -w $hosts "sudo -H pip uninstall -y apex"
145
146
        pdcp -w $hosts third_party/apex/dist/apex*.whl $tmp_wheel_path/
        pdsh -w $hosts "sudo -H pip install $tmp_wheel_path/apex*.whl"
Jeff Rasley's avatar
Jeff Rasley committed
147
148
149
150
151
        pdsh -w $hosts 'python -c "import apex"'
    fi
    if [ "$deepspeed_install" == "1" ]; then
        echo "Installing deepspeed"
        pdsh -w $hosts "sudo -H pip uninstall -y deepspeed"
152
153
        pdcp -w $hosts dist/deepspeed*.whl $tmp_wheel_path/
        pdsh -w $hosts "sudo -H pip install $tmp_wheel_path/deepspeed*.whl"
Jeff Rasley's avatar
Jeff Rasley committed
154
155
156
        pdsh -w $hosts "python -c 'import deepspeed; print(\"deepspeed info:\", deepspeed.__version__, deepspeed.__git_branch__, deepspeed.__git_hash__)'"
        echo "Installation is successful"
    fi
157
    pdsh -w $hosts "if [ -d $tmp_wheel_path ]; then rm $tmp_wheel_path/*.whl $tmp_wheel_path/requirements.txt; rmdir $tmp_wheel_path; fi"
Jeff Rasley's avatar
Jeff Rasley committed
158
fi