install.sh 4.25 KB
Newer Older
Jeff Rasley's avatar
Jeff Rasley committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
#!/bin/bash

set -e
err_report() {
    echo "Error on line $1"
    echo "Fail to install deepspeed"
}
trap 'err_report $LINENO' ERR

usage() {
  echo """
Usage: install.sh [options...]

By default will install deepspeed and all third party dependecies accross all machines listed in
hostfile (hostfile: /job/hostfile). If no hostfile exists, will only install locally

[optional]
    -d, --deepspeed_only    Install only deepspeed and no third party dependencies
    -t, --third_party_only  Install only third party dependencies and not deepspeed
    -l, --local_only        Installs only on local machine
21
    -H, --hostfile          Path to MPI-style hostfile (default: /job/hostfile)
Jeff Rasley's avatar
Jeff Rasley committed
22
23
24
25
26
27
28
29
30
31
    -h, --help              This help text
  """
}

ds_only=0
tp_only=0
deepspeed_install=1
third_party_install=1
local_only=0
entire_dlts_job=1
32
hostfile=/job/hostfile
Jeff Rasley's avatar
Jeff Rasley committed
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53

while [[ $# -gt 0 ]]
do
key="$1"
case $key in
    -d|--deepspeed_only)
    deepspeed_install=1;
    third_party_install=0;
    ds_only=1;
    shift
    ;;
    -t|--third_party_only)
    deepspeed_install=0;
    third_party_install=1;
    tp_only=1;
    shift
    ;;
    -l|--local_only)
    local_only=1;
    shift
    ;;
54
55
56
57
58
59
60
61
62
    -H|--hostfile)
    hostfile=$2
    if [ ! -f $2 ]; then
        echo "User provided hostfile does not exist at $hostfile, exiting"
        exit 1
    fi
    shift
    shift
    ;;
Jeff Rasley's avatar
Jeff Rasley committed
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
    -h|--help)
    usage
    exit 0
    ;;
    *)
    echo "Unkown argument(s)"
    usage
    exit 1
    shift
    ;;
esac
done

if [ "$ds_only" == "1" ] && [ "$tp_only" == "1" ]; then
    echo "-d and -t are mutually exclusive, only choose one or none"
    usage
    exit 1
fi

echo "Updating git hash/branch info"
echo "git_hash = '$(git rev-parse --short HEAD)'" > deepspeed/version_info.py
echo "git_branch = '$(git rev-parse --abbrev-ref HEAD)'" >> deepspeed/version_info.py
cat deepspeed/version_info.py

install_apex='sudo -H pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" third_party/apex'

89
90
if [ ! -f $hostfile ]; then
        echo "No hostfile exists at $hostfile, installing locally"
Jeff Rasley's avatar
Jeff Rasley committed
91
92
93
        local_only=1
fi

94
95
96
97
98
99
100
101
102
103
104
105
106
107
# Build wheels
if [ "$third_party_install" == "1" ]; then
    echo "Checking out sub-module(s)"
    git submodule update --init --recursive

    echo "Building apex wheel"
    cd third_party/apex
    python setup.py --cpp_ext --cuda_ext bdist_wheel
    cd -
fi
if [ "$deepspeed_install" == "1" ]; then
    echo "Installing deepspeed"
    python setup.py bdist_wheel
fi
Jeff Rasley's avatar
Jeff Rasley committed
108
109


110
111
if [ "$local_only" == "1" ]; then
    if [ "$third_party_install" == "1" ]; then
Jeff Rasley's avatar
Jeff Rasley committed
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
        echo "Installing apex"
        sudo -H pip uninstall -y apex
        sudo -H pip install third_party/apex/dist/apex*.whl
    fi
    if [ "$deepspeed_install" == "1" ]; then
        echo "Installing deepspeed"
        sudo -H pip uninstall -y deepspeed
        sudo -H pip install dist/deepspeed*.whl
        python -c 'import deepspeed; print("deepspeed info:", deepspeed.__version__, deepspeed.__git_branch__, deepspeed.__git_hash__)'
        echo "Installation is successful"
    fi
else
    local_path=`pwd`
    if [ -f $hostfile ]; then
        hosts=`cat $hostfile | awk '{print $1}' | paste -sd "," -`;
    else
        echo "hostfile not found, cannot proceed"
        exit 1
    fi
    export PDSH_RCMD_TYPE=ssh;
132
    tmp_wheel_path="/tmp/deepspeed_wheels"
Jeff Rasley's avatar
Jeff Rasley committed
133

134
    pdsh -w $hosts "if [ -d $tmp_wheel_path ]; then rm $tmp_wheel_path/*.whl; else mkdir -pv $tmp_wheel_path; fi"
Jeff Rasley's avatar
Jeff Rasley committed
135
136
    if [ "$third_party_install" == "1" ]; then
        pdsh -w $hosts "sudo -H pip uninstall -y apex"
137
138
        pdcp -w $hosts third_party/apex/dist/apex*.whl $tmp_wheel_path/
        pdsh -w $hosts "sudo -H pip install $tmp_wheel_path/apex*.whl"
Jeff Rasley's avatar
Jeff Rasley committed
139
140
141
142
143
        pdsh -w $hosts 'python -c "import apex"'
    fi
    if [ "$deepspeed_install" == "1" ]; then
        echo "Installing deepspeed"
        pdsh -w $hosts "sudo -H pip uninstall -y deepspeed"
144
145
        pdcp -w $hosts dist/deepspeed*.whl $tmp_wheel_path/
        pdsh -w $hosts "sudo -H pip install $tmp_wheel_path/deepspeed*.whl"
Jeff Rasley's avatar
Jeff Rasley committed
146
147
148
        pdsh -w $hosts "python -c 'import deepspeed; print(\"deepspeed info:\", deepspeed.__version__, deepspeed.__git_branch__, deepspeed.__git_hash__)'"
        echo "Installation is successful"
    fi
149
    pdsh -w $hosts "if [ -d $tmp_wheel_path ]; then rm $tmp_wheel_path/*.whl; rmdir $tmp_wheel_path; fi"
Jeff Rasley's avatar
Jeff Rasley committed
150
fi