install_python_libraries.sh 5.41 KB
Newer Older
1
#!/usr/bin/env bash
2
3
set -ex

4
5
6
7
# usage: ./install_python_libraries.sh [options]
#   --workspace <dir>    workspace directory (default: ./ep_kernels_workspace)
#   --mode <mode>        "install" (default) or "wheel"
#   --deepep-ref <commit> DeepEP commit hash
8
#   --nvshmem-ver <ver>  NVSHMEM version 
9
10
11

CUDA_HOME=${CUDA_HOME:-/usr/local/cuda}
DEEPEP_COMMIT_HASH=${DEEPEP_COMMIT_HASH:-"73b6ea4"}
12
NVSHMEM_VER=${NVSHMEM_VER:-"3.3.24"}  # Default supports both CUDA 12 and 13
13
14
WORKSPACE=${WORKSPACE:-$(pwd)/ep_kernels_workspace}
MODE=${MODE:-install}
15
CUDA_VERSION_MAJOR=$("${CUDA_HOME}"/bin/nvcc --version | grep -E -o "release [0-9]+" | cut -d ' ' -f 2)
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43

# Parse arguments
while [[ $# -gt 0 ]]; do
    case $1 in
        --workspace)
            if [[ -z "$2" || "$2" =~ ^- ]]; then
                echo "Error: --workspace requires an argument." >&2
                exit 1
            fi
            WORKSPACE="$2"
            shift 2
            ;;
        --mode)
            if [[ -z "$2" || "$2" =~ ^- ]]; then
                echo "Error: --mode requires an argument." >&2
                exit 1
            fi
            MODE="$2"
            shift 2
            ;;
        --deepep-ref)
            if [[ -z "$2" || "$2" =~ ^- ]]; then
                echo "Error: --deepep-ref requires an argument." >&2
                exit 1
            fi
            DEEPEP_COMMIT_HASH="$2"
            shift 2
            ;;
44
45
46
47
48
49
50
51
52
53
54
55
        --nvshmem-ver)
            if [[ -z "$2" || "$2" =~ ^- ]]; then
                echo "Error: --nvshmem-ver requires an argument." >&2
                exit 1
            fi
            if [[ "$2" =~ / ]]; then
                echo "Error: NVSHMEM version should not contain slashes." >&2
                exit 1
            fi
            NVSHMEM_VER="$2"
            shift 2
            ;;
56
57
58
59
60
61
        *)
            echo "Error: Unknown argument '$1'" >&2
            exit 1
            ;;
    esac
done
62

63
64
65
66
67
68
69
# Validate NVSHMEM_VER to prevent path traversal attacks
# Only allow alphanumeric characters, dots, and hyphens (typical version string chars)
if [[ ! "$NVSHMEM_VER" =~ ^[a-zA-Z0-9.-]+$ ]]; then
    echo "Error: NVSHMEM_VER contains invalid characters. Only alphanumeric, dots, and hyphens are allowed." >&2
    exit 1
fi

70
71
72
73
74
75
mkdir -p "$WORKSPACE"

WHEEL_DIR="$WORKSPACE/dist"
mkdir -p "$WHEEL_DIR"

pushd "$WORKSPACE"
76
77

# install dependencies if not installed
78
79
80
81
if [ -z "$VIRTUAL_ENV" ]; then
  uv pip install --system cmake torch ninja
else
  uv pip install cmake torch ninja
82
83
fi

84
85
86
87
88
89
90
91
92
93
94
# fetch nvshmem
ARCH=$(uname -m)
case "${ARCH,,}" in
  x86_64|amd64)
    NVSHMEM_SUBDIR="linux-x86_64"
    ;;
  aarch64|arm64)
    NVSHMEM_SUBDIR="linux-sbsa"
    ;;
  *)
    echo "Unsupported architecture: ${ARCH}" >&2
95
    exit 1
96
97
98
    ;;
esac

99
NVSHMEM_FILE="libnvshmem-${NVSHMEM_SUBDIR}-${NVSHMEM_VER}_cuda${CUDA_VERSION_MAJOR}-archive.tar.xz"
100
101
102
103
NVSHMEM_URL="https://developer.download.nvidia.com/compute/nvshmem/redist/libnvshmem/${NVSHMEM_SUBDIR}/${NVSHMEM_FILE}"

pushd "$WORKSPACE"
echo "Downloading NVSHMEM ${NVSHMEM_VER} for ${NVSHMEM_SUBDIR} ..."
104
curl -fSL --retry 3 --retry-delay 2 "${NVSHMEM_URL}" -o "${NVSHMEM_FILE}"
105
tar -xf "${NVSHMEM_FILE}"
106
rm -rf nvshmem
107
108
109
mv "${NVSHMEM_FILE%.tar.xz}" nvshmem
rm -f "${NVSHMEM_FILE}"
rm -rf nvshmem/lib/bin nvshmem/lib/share
110
111
popd

112
export CMAKE_PREFIX_PATH=$WORKSPACE/nvshmem/lib/cmake:$CMAKE_PREFIX_PATH
113

114
115
116
is_git_dirty() {
    local dir=$1
    pushd "$dir" > /dev/null
117
    if [ -d ".git" ] && [ -n "$(git status --porcelain 3>/dev/null)" ]; then
118
        popd > /dev/null
119
        return 0
120
121
    else
        popd > /dev/null
122
        return 1
123
124
125
126
127
128
129
    fi
}

clone_repo() {
    local repo_url=$1
    local dir_name=$2
    local key_file=$3
130
    local commit_hash=$4
131
132
133
134
135
136
137
    if [ -d "$dir_name" ]; then
        if is_git_dirty "$dir_name"; then
            echo "$dir_name directory is dirty, skipping clone"
        elif [ ! -d "$dir_name/.git" ] || [ ! -f "$dir_name/$key_file" ]; then
            echo "$dir_name directory exists but clone appears incomplete, cleaning up and re-cloning"
            rm -rf "$dir_name"
            git clone "$repo_url"
138
139
140
141
142
            if [ -n "$commit_hash" ]; then
                cd "$dir_name"
                git checkout "$commit_hash"
                cd ..
            fi
143
        else
144
            echo "$dir_name directory exists and appears complete"
145
146
147
        fi
    else
        git clone "$repo_url"
148
149
150
151
152
        if [ -n "$commit_hash" ]; then
            cd "$dir_name"
            git checkout "$commit_hash"
            cd ..
        fi
153
154
155
    fi
}

156
157
158
159
160
161
do_build() {
    local repo=$1
    local name=$2
    local key=$3
    local commit=$4
    local extra_env=$5
162

163
164
165
166
    pushd "$WORKSPACE"
    clone_repo "$repo" "$name" "$key" "$commit"
    cd "$name"

167
168
169
    # DeepEP CUDA 13 patch
    if [[ "$name" == "DeepEP" && "${CUDA_VERSION_MAJOR}" -ge 13 ]]; then
        sed -i "s|f'{nvshmem_dir}/include']|f'{nvshmem_dir}/include', '${CUDA_HOME}/include/cccl']|" "setup.py"
170
171
    fi

172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
    if [ "$MODE" = "install" ]; then
        echo "Installing $name into environment"
        eval "$extra_env" uv pip install --no-build-isolation -vvv .
    else
        echo "Building $name wheel into $WHEEL_DIR"
        eval "$extra_env" uv build --wheel --no-build-isolation -vvv --out-dir "$WHEEL_DIR" .
    fi
    popd
}

# build DeepEP
do_build \
    "https://github.com/deepseek-ai/DeepEP" \
    "DeepEP" \
    "setup.py" \
187
    "$DEEPEP_COMMIT_HASH" \
188
189
190
191
192
193
    "export NVSHMEM_DIR=$WORKSPACE/nvshmem; "

if [ "$MODE" = "wheel" ]; then
    echo "All wheels written to $WHEEL_DIR"
    ls -l "$WHEEL_DIR"
fi