install.sh 4.48 KB
Newer Older
Jeff Rasley's avatar
Jeff Rasley committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
#!/bin/bash

set -e
err_report() {
    echo "Error on line $1"
    echo "Fail to install deepspeed"
}
trap 'err_report $LINENO' ERR

usage() {
  echo """
Usage: install.sh [options...]

By default will install deepspeed and all third party dependecies accross all machines listed in
hostfile (hostfile: /job/hostfile). If no hostfile exists, will only install locally

[optional]
    -d, --deepspeed_only    Install only deepspeed and no third party dependencies
    -t, --third_party_only  Install only third party dependencies and not deepspeed
    -l, --local_only        Installs only on local machine
21
    -H, --hostfile          Path to MPI-style hostfile (default: /job/hostfile)
Jeff Rasley's avatar
Jeff Rasley committed
22
23
24
25
26
27
28
29
30
31
    -h, --help              This help text
  """
}

ds_only=0
tp_only=0
deepspeed_install=1
third_party_install=1
local_only=0
entire_dlts_job=1
32
hostfile=/job/hostfile
Jeff Rasley's avatar
Jeff Rasley committed
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53

while [[ $# -gt 0 ]]
do
key="$1"
case $key in
    -d|--deepspeed_only)
    deepspeed_install=1;
    third_party_install=0;
    ds_only=1;
    shift
    ;;
    -t|--third_party_only)
    deepspeed_install=0;
    third_party_install=1;
    tp_only=1;
    shift
    ;;
    -l|--local_only)
    local_only=1;
    shift
    ;;
54
55
56
57
58
59
60
61
62
    -H|--hostfile)
    hostfile=$2
    if [ ! -f $2 ]; then
        echo "User provided hostfile does not exist at $hostfile, exiting"
        exit 1
    fi
    shift
    shift
    ;;
Jeff Rasley's avatar
Jeff Rasley committed
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
    -h|--help)
    usage
    exit 0
    ;;
    *)
    echo "Unkown argument(s)"
    usage
    exit 1
    shift
    ;;
esac
done

if [ "$ds_only" == "1" ] && [ "$tp_only" == "1" ]; then
    echo "-d and -t are mutually exclusive, only choose one or none"
    usage
    exit 1
fi

echo "Updating git hash/branch info"
83
84
85
echo "git_hash = '$(git rev-parse --short HEAD)'" > deepspeed/git_version_info.py
echo "git_branch = '$(git rev-parse --abbrev-ref HEAD)'" >> deepspeed/git_version_info.py
cat deepspeed/git_version_info.py
Jeff Rasley's avatar
Jeff Rasley committed
86
87
88

install_apex='sudo -H pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" third_party/apex'

89
90
if [ ! -f $hostfile ]; then
        echo "No hostfile exists at $hostfile, installing locally"
Jeff Rasley's avatar
Jeff Rasley committed
91
92
93
        local_only=1
fi

94
95
96
# Ensure dependencies are installed locally
sudo -H pip install -r requirements.txt

97
98
99
100
101
102
103
104
105
# Build wheels
if [ "$third_party_install" == "1" ]; then
    echo "Checking out sub-module(s)"
    git submodule update --init --recursive

    echo "Building apex wheel"
    cd third_party/apex
    python setup.py --cpp_ext --cuda_ext bdist_wheel
    cd -
106
107
108
109

    echo "Installing apex locally so that deepspeed will build"
    sudo -H pip uninstall -y apex
    sudo -H pip install third_party/apex/dist/apex*.whl
110
111
fi
if [ "$deepspeed_install" == "1" ]; then
112
    echo "Building deepspeed wheel"
113
114
    python setup.py bdist_wheel
fi
Jeff Rasley's avatar
Jeff Rasley committed
115

116
if [ "$local_only" == "1" ]; then
Jeff Rasley's avatar
Jeff Rasley committed
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
    if [ "$deepspeed_install" == "1" ]; then
        echo "Installing deepspeed"
        sudo -H pip uninstall -y deepspeed
        sudo -H pip install dist/deepspeed*.whl
        python -c 'import deepspeed; print("deepspeed info:", deepspeed.__version__, deepspeed.__git_branch__, deepspeed.__git_hash__)'
        echo "Installation is successful"
    fi
else
    local_path=`pwd`
    if [ -f $hostfile ]; then
        hosts=`cat $hostfile | awk '{print $1}' | paste -sd "," -`;
    else
        echo "hostfile not found, cannot proceed"
        exit 1
    fi
    export PDSH_RCMD_TYPE=ssh;
133
    tmp_wheel_path="/tmp/deepspeed_wheels"
Jeff Rasley's avatar
Jeff Rasley committed
134

135
    pdsh -w $hosts "if [ -d $tmp_wheel_path ]; then rm $tmp_wheel_path/*.whl; else mkdir -pv $tmp_wheel_path; fi"
136
137
    pdcp -w $hosts requirements.txt ${tmp_wheel_path}/
    pdsh -w $hosts "sudo -H pip install -r ${tmp_wheel_path}/requirements.txt"
Jeff Rasley's avatar
Jeff Rasley committed
138
139
    if [ "$third_party_install" == "1" ]; then
        pdsh -w $hosts "sudo -H pip uninstall -y apex"
140
141
        pdcp -w $hosts third_party/apex/dist/apex*.whl $tmp_wheel_path/
        pdsh -w $hosts "sudo -H pip install $tmp_wheel_path/apex*.whl"
Jeff Rasley's avatar
Jeff Rasley committed
142
143
144
145
146
        pdsh -w $hosts 'python -c "import apex"'
    fi
    if [ "$deepspeed_install" == "1" ]; then
        echo "Installing deepspeed"
        pdsh -w $hosts "sudo -H pip uninstall -y deepspeed"
147
148
        pdcp -w $hosts dist/deepspeed*.whl $tmp_wheel_path/
        pdsh -w $hosts "sudo -H pip install $tmp_wheel_path/deepspeed*.whl"
Jeff Rasley's avatar
Jeff Rasley committed
149
150
151
        pdsh -w $hosts "python -c 'import deepspeed; print(\"deepspeed info:\", deepspeed.__version__, deepspeed.__git_branch__, deepspeed.__git_hash__)'"
        echo "Installation is successful"
    fi
152
    pdsh -w $hosts "if [ -d $tmp_wheel_path ]; then rm $tmp_wheel_path/*.whl $tmp_wheel_path/requirements.txt; rmdir $tmp_wheel_path; fi"
Jeff Rasley's avatar
Jeff Rasley committed
153
fi