install_python_libraries.sh 5.85 KB
Newer Older
1
#!/usr/bin/env bash
2
3
set -ex

4
5
6
7
8
# usage: ./install_python_libraries.sh [options]
#   --workspace <dir>    workspace directory (default: ./ep_kernels_workspace)
#   --mode <mode>        "install" (default) or "wheel"
#   --pplx-ref <commit>  pplx-kernels commit hash
#   --deepep-ref <commit> DeepEP commit hash
9
#   --nvshmem-ver <ver>  NVSHMEM version 
10
11
12
13

CUDA_HOME=${CUDA_HOME:-/usr/local/cuda}
PPLX_COMMIT_HASH=${PPLX_COMMIT_HASH:-"12cecfd"}
DEEPEP_COMMIT_HASH=${DEEPEP_COMMIT_HASH:-"73b6ea4"}
14
NVSHMEM_VER=${NVSHMEM_VER:-"3.3.24"}  # Default supports both CUDA 12 and 13
15
16
WORKSPACE=${WORKSPACE:-$(pwd)/ep_kernels_workspace}
MODE=${MODE:-install}
17
CUDA_VERSION_MAJOR=$(${CUDA_HOME}/bin/nvcc --version | egrep -o "release [0-9]+" | cut -d ' ' -f 2)
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53

# Parse arguments
while [[ $# -gt 0 ]]; do
    case $1 in
        --workspace)
            if [[ -z "$2" || "$2" =~ ^- ]]; then
                echo "Error: --workspace requires an argument." >&2
                exit 1
            fi
            WORKSPACE="$2"
            shift 2
            ;;
        --mode)
            if [[ -z "$2" || "$2" =~ ^- ]]; then
                echo "Error: --mode requires an argument." >&2
                exit 1
            fi
            MODE="$2"
            shift 2
            ;;
        --pplx-ref)
            if [[ -z "$2" || "$2" =~ ^- ]]; then
                echo "Error: --pplx-ref requires an argument." >&2
                exit 1
            fi
            PPLX_COMMIT_HASH="$2"
            shift 2
            ;;
        --deepep-ref)
            if [[ -z "$2" || "$2" =~ ^- ]]; then
                echo "Error: --deepep-ref requires an argument." >&2
                exit 1
            fi
            DEEPEP_COMMIT_HASH="$2"
            shift 2
            ;;
54
55
56
57
58
59
60
61
62
63
64
65
        --nvshmem-ver)
            if [[ -z "$2" || "$2" =~ ^- ]]; then
                echo "Error: --nvshmem-ver requires an argument." >&2
                exit 1
            fi
            if [[ "$2" =~ / ]]; then
                echo "Error: NVSHMEM version should not contain slashes." >&2
                exit 1
            fi
            NVSHMEM_VER="$2"
            shift 2
            ;;
66
67
68
69
70
71
        *)
            echo "Error: Unknown argument '$1'" >&2
            exit 1
            ;;
    esac
done
72

73
74
75
76
77
78
79
# Validate NVSHMEM_VER to prevent path traversal attacks
# Only allow alphanumeric characters, dots, and hyphens (typical version string chars)
if [[ ! "$NVSHMEM_VER" =~ ^[a-zA-Z0-9.-]+$ ]]; then
    echo "Error: NVSHMEM_VER contains invalid characters. Only alphanumeric, dots, and hyphens are allowed." >&2
    exit 1
fi

80
81
82
83
84
85
mkdir -p "$WORKSPACE"

WHEEL_DIR="$WORKSPACE/dist"
mkdir -p "$WHEEL_DIR"

pushd "$WORKSPACE"
86
87

# install dependencies if not installed
88
89
90
91
if [ -z "$VIRTUAL_ENV" ]; then
  uv pip install --system cmake torch ninja
else
  uv pip install cmake torch ninja
92
93
fi

94
95
96
97
98
99
100
101
102
103
104
# fetch nvshmem
ARCH=$(uname -m)
case "${ARCH,,}" in
  x86_64|amd64)
    NVSHMEM_SUBDIR="linux-x86_64"
    ;;
  aarch64|arm64)
    NVSHMEM_SUBDIR="linux-sbsa"
    ;;
  *)
    echo "Unsupported architecture: ${ARCH}" >&2
105
    exit 1
106
107
108
    ;;
esac

109
NVSHMEM_FILE="libnvshmem-${NVSHMEM_SUBDIR}-${NVSHMEM_VER}_cuda${CUDA_VERSION_MAJOR}-archive.tar.xz"
110
111
112
113
114
115
116
117
118
NVSHMEM_URL="https://developer.download.nvidia.com/compute/nvshmem/redist/libnvshmem/${NVSHMEM_SUBDIR}/${NVSHMEM_FILE}"

pushd "$WORKSPACE"
echo "Downloading NVSHMEM ${NVSHMEM_VER} for ${NVSHMEM_SUBDIR} ..."
curl -fSL "${NVSHMEM_URL}" -o "${NVSHMEM_FILE}"
tar -xf "${NVSHMEM_FILE}"
mv "${NVSHMEM_FILE%.tar.xz}" nvshmem
rm -f "${NVSHMEM_FILE}"
rm -rf nvshmem/lib/bin nvshmem/lib/share
119
120
popd

121
export CMAKE_PREFIX_PATH=$WORKSPACE/nvshmem/lib/cmake:$CMAKE_PREFIX_PATH
122

123
124
125
is_git_dirty() {
    local dir=$1
    pushd "$dir" > /dev/null
126
    if [ -d ".git" ] && [ -n "$(git status --porcelain 3>/dev/null)" ]; then
127
        popd > /dev/null
128
        return 0
129
130
    else
        popd > /dev/null
131
        return 1
132
133
134
135
136
137
138
    fi
}

clone_repo() {
    local repo_url=$1
    local dir_name=$2
    local key_file=$3
139
    local commit_hash=$4
140
141
142
143
144
145
146
    if [ -d "$dir_name" ]; then
        if is_git_dirty "$dir_name"; then
            echo "$dir_name directory is dirty, skipping clone"
        elif [ ! -d "$dir_name/.git" ] || [ ! -f "$dir_name/$key_file" ]; then
            echo "$dir_name directory exists but clone appears incomplete, cleaning up and re-cloning"
            rm -rf "$dir_name"
            git clone "$repo_url"
147
148
149
150
151
            if [ -n "$commit_hash" ]; then
                cd "$dir_name"
                git checkout "$commit_hash"
                cd ..
            fi
152
        else
153
            echo "$dir_name directory exists and appears complete"
154
155
156
        fi
    else
        git clone "$repo_url"
157
158
159
160
161
        if [ -n "$commit_hash" ]; then
            cd "$dir_name"
            git checkout "$commit_hash"
            cd ..
        fi
162
163
164
    fi
}

165
166
167
168
169
170
do_build() {
    local repo=$1
    local name=$2
    local key=$3
    local commit=$4
    local extra_env=$5
171

172
173
174
175
    pushd "$WORKSPACE"
    clone_repo "$repo" "$name" "$key" "$commit"
    cd "$name"

176
177
178
    # DeepEP CUDA 13 patch
    if [[ "$name" == "DeepEP" && "${CUDA_VERSION_MAJOR}" -ge 13 ]]; then
        sed -i "s|f'{nvshmem_dir}/include']|f'{nvshmem_dir}/include', '${CUDA_HOME}/include/cccl']|" "setup.py"
179
180
    fi

181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
    if [ "$MODE" = "install" ]; then
        echo "Installing $name into environment"
        eval "$extra_env" uv pip install --no-build-isolation -vvv .
    else
        echo "Building $name wheel into $WHEEL_DIR"
        eval "$extra_env" uv build --wheel --no-build-isolation -vvv --out-dir "$WHEEL_DIR" .
    fi
    popd
}

# build pplx-kernels
do_build \
    "https://github.com/ppl-ai/pplx-kernels" \
    "pplx-kernels" \
    "setup.py" \
196
    "$PPLX_COMMIT_HASH" \
197
198
199
200
201
202
203
    ""

# build DeepEP
do_build \
    "https://github.com/deepseek-ai/DeepEP" \
    "DeepEP" \
    "setup.py" \
204
    "$DEEPEP_COMMIT_HASH" \
205
206
207
208
209
210
    "export NVSHMEM_DIR=$WORKSPACE/nvshmem; "

if [ "$MODE" = "wheel" ]; then
    echo "All wheels written to $WHEEL_DIR"
    ls -l "$WHEEL_DIR"
fi