install_python_libraries.sh 5.15 KB
Newer Older
1
#!/usr/bin/env bash
2
3
set -ex

4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# usage: ./install_python_libraries.sh [options]
#   --workspace <dir>    workspace directory (default: ./ep_kernels_workspace)
#   --mode <mode>        "install" (default) or "wheel"
#   --pplx-ref <commit>  pplx-kernels commit hash
#   --deepep-ref <commit> DeepEP commit hash

CUDA_HOME=${CUDA_HOME:-/usr/local/cuda}
PPLX_COMMIT_HASH=${PPLX_COMMIT_HASH:-"12cecfd"}
DEEPEP_COMMIT_HASH=${DEEPEP_COMMIT_HASH:-"73b6ea4"}
NVSHMEM_VER=3.3.9
WORKSPACE=${WORKSPACE:-$(pwd)/ep_kernels_workspace}
MODE=${MODE:-install}

# Parse arguments
while [[ $# -gt 0 ]]; do
    case $1 in
        --workspace)
            if [[ -z "$2" || "$2" =~ ^- ]]; then
                echo "Error: --workspace requires an argument." >&2
                exit 1
            fi
            WORKSPACE="$2"
            shift 2
            ;;
        --mode)
            if [[ -z "$2" || "$2" =~ ^- ]]; then
                echo "Error: --mode requires an argument." >&2
                exit 1
            fi
            MODE="$2"
            shift 2
            ;;
        --pplx-ref)
            if [[ -z "$2" || "$2" =~ ^- ]]; then
                echo "Error: --pplx-ref requires an argument." >&2
                exit 1
            fi
            PPLX_COMMIT_HASH="$2"
            shift 2
            ;;
        --deepep-ref)
            if [[ -z "$2" || "$2" =~ ^- ]]; then
                echo "Error: --deepep-ref requires an argument." >&2
                exit 1
            fi
            DEEPEP_COMMIT_HASH="$2"
            shift 2
            ;;
        *)
            echo "Error: Unknown argument '$1'" >&2
            exit 1
            ;;
    esac
done
58

59
60
61
62
63
64
mkdir -p "$WORKSPACE"

WHEEL_DIR="$WORKSPACE/dist"
mkdir -p "$WHEEL_DIR"

pushd "$WORKSPACE"
65
66

# install dependencies if not installed
67
68
69
70
if [ -z "$VIRTUAL_ENV" ]; then
  uv pip install --system cmake torch ninja
else
  uv pip install cmake torch ninja
71
72
fi

73
74
75
76
77
78
79
80
81
82
83
84
85
# fetch nvshmem
ARCH=$(uname -m)
case "${ARCH,,}" in
  x86_64|amd64)
    NVSHMEM_SUBDIR="linux-x86_64"
    NVSHMEM_FILE="libnvshmem-linux-x86_64-${NVSHMEM_VER}_cuda12-archive.tar.xz"
    ;;
  aarch64|arm64)
    NVSHMEM_SUBDIR="linux-sbsa"
    NVSHMEM_FILE="libnvshmem-linux-sbsa-${NVSHMEM_VER}_cuda12-archive.tar.xz"
    ;;
  *)
    echo "Unsupported architecture: ${ARCH}" >&2
86
    exit 1
87
88
89
90
91
92
93
94
95
96
97
98
    ;;
esac

NVSHMEM_URL="https://developer.download.nvidia.com/compute/nvshmem/redist/libnvshmem/${NVSHMEM_SUBDIR}/${NVSHMEM_FILE}"

pushd "$WORKSPACE"
echo "Downloading NVSHMEM ${NVSHMEM_VER} for ${NVSHMEM_SUBDIR} ..."
curl -fSL "${NVSHMEM_URL}" -o "${NVSHMEM_FILE}"
tar -xf "${NVSHMEM_FILE}"
mv "${NVSHMEM_FILE%.tar.xz}" nvshmem
rm -f "${NVSHMEM_FILE}"
rm -rf nvshmem/lib/bin nvshmem/lib/share
99
100
popd

101
export CMAKE_PREFIX_PATH=$WORKSPACE/nvshmem/lib/cmake:$CMAKE_PREFIX_PATH
102

103
104
105
is_git_dirty() {
    local dir=$1
    pushd "$dir" > /dev/null
106
    if [ -d ".git" ] && [ -n "$(git status --porcelain 3>/dev/null)" ]; then
107
        popd > /dev/null
108
        return 0
109
110
    else
        popd > /dev/null
111
        return 1
112
113
114
115
116
117
118
    fi
}

clone_repo() {
    local repo_url=$1
    local dir_name=$2
    local key_file=$3
119
    local commit_hash=$4
120
121
122
123
124
125
126
    if [ -d "$dir_name" ]; then
        if is_git_dirty "$dir_name"; then
            echo "$dir_name directory is dirty, skipping clone"
        elif [ ! -d "$dir_name/.git" ] || [ ! -f "$dir_name/$key_file" ]; then
            echo "$dir_name directory exists but clone appears incomplete, cleaning up and re-cloning"
            rm -rf "$dir_name"
            git clone "$repo_url"
127
128
129
130
131
            if [ -n "$commit_hash" ]; then
                cd "$dir_name"
                git checkout "$commit_hash"
                cd ..
            fi
132
        else
133
            echo "$dir_name directory exists and appears complete"
134
135
136
        fi
    else
        git clone "$repo_url"
137
138
139
140
141
        if [ -n "$commit_hash" ]; then
            cd "$dir_name"
            git checkout "$commit_hash"
            cd ..
        fi
142
143
144
    fi
}

145
146
147
148
149
150
151
deepep_cuda13_patch() {
    cuda_version_major=$(${CUDA_HOME}/bin/nvcc --version | egrep -o "release [0-9]+" | cut -d ' ' -f 2)
    if [ ${cuda_version_major} -ge 13 ]; then
        sed -i "s|f'{nvshmem_dir}/include']|f'{nvshmem_dir}/include', '${CUDA_HOME}/include/cccl']|" "setup.py"
    fi
}

152
153
154
155
156
157
do_build() {
    local repo=$1
    local name=$2
    local key=$3
    local commit=$4
    local extra_env=$5
158

159
160
161
162
    pushd "$WORKSPACE"
    clone_repo "$repo" "$name" "$key" "$commit"
    cd "$name"

163
164
165
166
    if [ "$name" == "DeepEP" ]; then
        deepep_cuda13_patch
    fi

167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
    if [ "$MODE" = "install" ]; then
        echo "Installing $name into environment"
        eval "$extra_env" uv pip install --no-build-isolation -vvv .
    else
        echo "Building $name wheel into $WHEEL_DIR"
        eval "$extra_env" uv build --wheel --no-build-isolation -vvv --out-dir "$WHEEL_DIR" .
    fi
    popd
}

# build pplx-kernels
do_build \
    "https://github.com/ppl-ai/pplx-kernels" \
    "pplx-kernels" \
    "setup.py" \
182
    "$PPLX_COMMIT_HASH" \
183
184
185
186
187
188
189
    ""

# build DeepEP
do_build \
    "https://github.com/deepseek-ai/DeepEP" \
    "DeepEP" \
    "setup.py" \
190
    "$DEEPEP_COMMIT_HASH" \
191
192
193
194
195
196
    "export NVSHMEM_DIR=$WORKSPACE/nvshmem; "

if [ "$MODE" = "wheel" ]; then
    echo "All wheels written to $WHEEL_DIR"
    ls -l "$WHEEL_DIR"
fi