You need to sign in or sign up before continuing.
Unverified Commit 00825428 authored by Jeff Rasley's avatar Jeff Rasley Committed by GitHub
Browse files

update install to use pdcp to distribute wheels (#12)

update install to use pdcp to distribute wheels
parent d6846203
...@@ -18,6 +18,7 @@ hostfile (hostfile: /job/hostfile). If no hostfile exists, will only install loc ...@@ -18,6 +18,7 @@ hostfile (hostfile: /job/hostfile). If no hostfile exists, will only install loc
-d, --deepspeed_only Install only deepspeed and no third party dependencies -d, --deepspeed_only Install only deepspeed and no third party dependencies
-t, --third_party_only Install only third party dependencies and not deepspeed -t, --third_party_only Install only third party dependencies and not deepspeed
-l, --local_only Installs only on local machine -l, --local_only Installs only on local machine
-H, --hostfile Path to MPI-style hostfile (default: /job/hostfile)
-h, --help This help text -h, --help This help text
""" """
} }
...@@ -28,6 +29,7 @@ deepspeed_install=1 ...@@ -28,6 +29,7 @@ deepspeed_install=1
third_party_install=1 third_party_install=1
local_only=0 local_only=0
entire_dlts_job=1 entire_dlts_job=1
hostfile=/job/hostfile
while [[ $# -gt 0 ]] while [[ $# -gt 0 ]]
do do
...@@ -49,6 +51,15 @@ case $key in ...@@ -49,6 +51,15 @@ case $key in
local_only=1; local_only=1;
shift shift
;; ;;
-H|--hostfile)
hostfile=$2
if [ ! -f $2 ]; then
echo "User provided hostfile does not exist at $hostfile, exiting"
exit 1
fi
shift
shift
;;
-h|--help) -h|--help)
usage usage
exit 0 exit 0
...@@ -75,13 +86,13 @@ cat deepspeed/version_info.py ...@@ -75,13 +86,13 @@ cat deepspeed/version_info.py
install_apex='sudo -H pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" third_party/apex' install_apex='sudo -H pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" third_party/apex'
if [ ! -f /job/hostfile ]; then if [ ! -f $hostfile ]; then
echo "No hostfile exists at /job/hostfile, installing locally" echo "No hostfile exists at $hostfile, installing locally"
local_only=1 local_only=1
fi fi
if [ "$local_only" == "1" ]; then # Build wheels
if [ "$third_party_install" == "1" ]; then if [ "$third_party_install" == "1" ]; then
echo "Checking out sub-module(s)" echo "Checking out sub-module(s)"
git submodule update --init --recursive git submodule update --init --recursive
...@@ -89,23 +100,28 @@ if [ "$local_only" == "1" ]; then ...@@ -89,23 +100,28 @@ if [ "$local_only" == "1" ]; then
cd third_party/apex cd third_party/apex
python setup.py --cpp_ext --cuda_ext bdist_wheel python setup.py --cpp_ext --cuda_ext bdist_wheel
cd - cd -
fi
if [ "$deepspeed_install" == "1" ]; then
echo "Installing deepspeed"
python setup.py bdist_wheel
fi
if [ "$local_only" == "1" ]; then
if [ "$third_party_install" == "1" ]; then
echo "Installing apex" echo "Installing apex"
sudo -H pip uninstall -y apex sudo -H pip uninstall -y apex
sudo -H pip install third_party/apex/dist/apex*.whl sudo -H pip install third_party/apex/dist/apex*.whl
fi fi
if [ "$deepspeed_install" == "1" ]; then if [ "$deepspeed_install" == "1" ]; then
echo "Installing deepspeed" echo "Installing deepspeed"
python setup.py bdist_wheel
sudo -H pip uninstall -y deepspeed sudo -H pip uninstall -y deepspeed
sudo -H pip install dist/deepspeed*.whl sudo -H pip install dist/deepspeed*.whl
python -c 'import deepspeed; print("deepspeed info:", deepspeed.__version__, deepspeed.__git_branch__, deepspeed.__git_hash__)' python -c 'import deepspeed; print("deepspeed info:", deepspeed.__version__, deepspeed.__git_branch__, deepspeed.__git_hash__)'
echo "Installation is successful" echo "Installation is successful"
fi fi
else else
local_path=`pwd` local_path=`pwd`
hostfile=/job/hostfile
if [ -f $hostfile ]; then if [ -f $hostfile ]; then
hosts=`cat $hostfile | awk '{print $1}' | paste -sd "," -`; hosts=`cat $hostfile | awk '{print $1}' | paste -sd "," -`;
else else
...@@ -113,25 +129,22 @@ else ...@@ -113,25 +129,22 @@ else
exit 1 exit 1
fi fi
export PDSH_RCMD_TYPE=ssh; export PDSH_RCMD_TYPE=ssh;
tmp_wheel_path="/tmp/deepspeed_wheels"
pdsh -w $hosts "if [ -d $tmp_wheel_path ]; then rm $tmp_wheel_path/*.whl; else mkdir -pv $tmp_wheel_path; fi"
if [ "$third_party_install" == "1" ]; then if [ "$third_party_install" == "1" ]; then
echo "Checking out sub-module(s)"
git submodule update --init --recursive
echo "Installing apex"
cd third_party/apex
python setup.py --cpp_ext --cuda_ext bdist_wheel
cd -
pdsh -w $hosts "sudo -H pip uninstall -y apex" pdsh -w $hosts "sudo -H pip uninstall -y apex"
pdsh -w $hosts "cd $local_path; sudo -H pip install third_party/apex/dist/apex*.whl" pdcp -w $hosts third_party/apex/dist/apex*.whl $tmp_wheel_path/
pdsh -w $hosts "sudo -H pip install $tmp_wheel_path/apex*.whl"
pdsh -w $hosts 'python -c "import apex"' pdsh -w $hosts 'python -c "import apex"'
fi fi
if [ "$deepspeed_install" == "1" ]; then if [ "$deepspeed_install" == "1" ]; then
echo "Installing deepspeed" echo "Installing deepspeed"
python setup.py bdist_wheel
pdsh -w $hosts "sudo -H pip uninstall -y deepspeed" pdsh -w $hosts "sudo -H pip uninstall -y deepspeed"
pdsh -w $hosts "cd $local_path; sudo -H pip install dist/deepspeed*.whl" pdcp -w $hosts dist/deepspeed*.whl $tmp_wheel_path/
pdsh -w $hosts "sudo -H pip install $tmp_wheel_path/deepspeed*.whl"
pdsh -w $hosts "python -c 'import deepspeed; print(\"deepspeed info:\", deepspeed.__version__, deepspeed.__git_branch__, deepspeed.__git_hash__)'" pdsh -w $hosts "python -c 'import deepspeed; print(\"deepspeed info:\", deepspeed.__version__, deepspeed.__git_branch__, deepspeed.__git_hash__)'"
echo "Installation is successful" echo "Installation is successful"
fi fi
pdsh -w $hosts "if [ -d $tmp_wheel_path ]; then rm $tmp_wheel_path/*.whl; rmdir $tmp_wheel_path; fi"
fi fi
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment