Unverified Commit 4b7ca37b authored by R3hankhan's avatar R3hankhan Committed by GitHub
Browse files

[CPU][IBM Z][Dockefile][Docs] Fix s390x builds for torch 2.11 and update docs for s390x (#39910)


Signed-off-by: default avatarRehan Khan <Rehan.Khan7@ibm.com>
parent 445b7093
...@@ -147,6 +147,9 @@ struct AttentionMetadata { ...@@ -147,6 +147,9 @@ struct AttentionMetadata {
case ISA::NEON: case ISA::NEON:
ss << "NEON, "; ss << "NEON, ";
break; break;
case ISA::VXE:
ss << "VXE, ";
break;
} }
ss << "workitem_group_num: " << workitem_group_num ss << "workitem_group_num: " << workitem_group_num
<< ", reduction_item_num: " << reduction_item_num << ", reduction_item_num: " << reduction_item_num
......
...@@ -54,12 +54,34 @@ struct Counter { ...@@ -54,12 +54,34 @@ struct Counter {
}; };
inline int64_t get_available_l2_size() { inline int64_t get_available_l2_size() {
#if defined(__s390x__)
static int64_t size = []() {
uint32_t l2_cache_size = 0;
auto caps = at::cpu::get_cpu_capabilities();
auto it = caps.find("l2_cache_size");
if (it != caps.end()) {
l2_cache_size = static_cast<uint32_t>(it->second.toInt());
}
if (l2_cache_size == 0) {
long sys_l2 = sysconf(_SC_LEVEL2_CACHE_SIZE);
if (sys_l2 > 0) {
l2_cache_size = static_cast<uint32_t>(sys_l2);
}
}
if (l2_cache_size == 0) {
l2_cache_size = 256 * 1024;
}
return static_cast<int64_t>(l2_cache_size) >> 1; // use 50% of L2 cache
}();
return size;
#else
static int64_t size = []() { static int64_t size = []() {
auto caps = at::cpu::get_cpu_capabilities(); auto caps = at::cpu::get_cpu_capabilities();
const uint32_t l2_cache_size = caps.at("l2_cache_size").toInt(); const uint32_t l2_cache_size = caps.at("l2_cache_size").toInt();
return l2_cache_size >> 1; // use 50% of L2 cache return l2_cache_size >> 1; // use 50% of L2 cache
}(); }();
return size; return size;
#endif
} }
template <int32_t alignment_v, typename T> template <int32_t alignment_v, typename T>
......
...@@ -42,7 +42,7 @@ FROM python-install AS pyarrow ...@@ -42,7 +42,7 @@ FROM python-install AS pyarrow
# Build Apache Arrow # Build Apache Arrow
WORKDIR /tmp WORKDIR /tmp
RUN --mount=type=cache,target=/root/.cache/uv \ RUN --mount=type=cache,target=/root/.cache/uv \
git clone https://github.com/apache/arrow.git && \ git clone https://github.com/apache/arrow.git -b maint-19.0.1 && \
cd arrow/cpp && \ cd arrow/cpp && \
mkdir release && cd release && \ mkdir release && cd release && \
cmake -DCMAKE_BUILD_TYPE=Release \ cmake -DCMAKE_BUILD_TYPE=Release \
...@@ -68,19 +68,6 @@ RUN --mount=type=cache,target=/root/.cache/uv \ ...@@ -68,19 +68,6 @@ RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install -r requirements-build.txt && \ uv pip install -r requirements-build.txt && \
python setup.py build_ext --build-type=$ARROW_BUILD_TYPE --bundle-arrow-cpp bdist_wheel python setup.py build_ext --build-type=$ARROW_BUILD_TYPE --bundle-arrow-cpp bdist_wheel
FROM python-install AS numa-build
# Install numactl (needed for numa.h dependency)
WORKDIR /tmp
RUN curl -LO https://github.com/numactl/numactl/archive/refs/tags/v2.0.16.tar.gz && \
tar -xvzf v2.0.16.tar.gz && \
cd numactl-2.0.16 && \
./autogen.sh && \
./configure && \
make
# Set include path
ENV C_INCLUDE_PATH="/usr/local/include:$C_INCLUDE_PATH"
FROM python-install AS rust FROM python-install AS rust
ENV CARGO_HOME=/root/.cargo ENV CARGO_HOME=/root/.cargo
ENV RUSTUP_HOME=/root/.rustup ENV RUSTUP_HOME=/root/.rustup
...@@ -91,6 +78,18 @@ RUN curl https://sh.rustup.rs -sSf | sh -s -- -y && \ ...@@ -91,6 +78,18 @@ RUN curl https://sh.rustup.rs -sSf | sh -s -- -y && \
rustup default stable && \ rustup default stable && \
rustup show rustup show
FROM python-install AS numa-build
WORKDIR /tmp
RUN curl -LO https://github.com/numactl/numactl/archive/refs/tags/v2.0.19.tar.gz && \
tar -xvzf v2.0.19.tar.gz && \
cd numactl-2.0.19 && \
./autogen.sh && \
./configure && \
make
# Set include path
ENV C_INCLUDE_PATH="/usr/local/include:$C_INCLUDE_PATH"
FROM python-install AS torch-vision FROM python-install AS torch-vision
# Install torchvision # Install torchvision
ARG TORCH_VISION_VERSION=v0.26.0 ARG TORCH_VISION_VERSION=v0.26.0
...@@ -133,7 +132,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \ ...@@ -133,7 +132,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
git clone --recursive https://github.com/numba/llvmlite.git -b v0.44.0 && \ git clone --recursive https://github.com/numba/llvmlite.git -b v0.44.0 && \
git clone --recursive https://github.com/numba/numba.git -b ${NUMBA_VERSION} && \ git clone --recursive https://github.com/numba/numba.git -b ${NUMBA_VERSION} && \
cd llvm-project && mkdir build && cd build && \ cd llvm-project && mkdir build && cd build && \
uv pip install 'cmake<4' setuptools numpy && \ uv pip install 'cmake<4' 'setuptools<70' numpy && \
export PREFIX=/usr/local && CMAKE_ARGS="${CMAKE_ARGS} -DLLVM_ENABLE_PROJECTS=lld;libunwind;compiler-rt" \ export PREFIX=/usr/local && CMAKE_ARGS="${CMAKE_ARGS} -DLLVM_ENABLE_PROJECTS=lld;libunwind;compiler-rt" \
CFLAGS="$(echo $CFLAGS | sed 's/-fno-plt //g')" \ CFLAGS="$(echo $CFLAGS | sed 's/-fno-plt //g')" \
CXXFLAGS="$(echo $CXXFLAGS | sed 's/-fno-plt //g')" \ CXXFLAGS="$(echo $CXXFLAGS | sed 's/-fno-plt //g')" \
...@@ -193,27 +192,22 @@ RUN --mount=type=cache,target=/root/.cache/uv \ ...@@ -193,27 +192,22 @@ RUN --mount=type=cache,target=/root/.cache/uv \
cd opencv-python && \ cd opencv-python && \
python -m build --wheel --installer=uv --outdir /tmp/opencv-python/dist python -m build --wheel --installer=uv --outdir /tmp/opencv-python/dist
# Build Outlines Core ## Todo(r3hankhan123): Remove guidance-builder stage once vLLM upgrades to new version of llguidance that fixes s390x issues. See https://github.com/guidance-ai/llguidance/issues/330
FROM python-install AS outlines-core-builder FROM python-install AS guidance-builder
WORKDIR /tmp WORKDIR /tmp
ENV CARGO_HOME=/root/.cargo ENV CARGO_HOME=/root/.cargo
ENV RUSTUP_HOME=/root/.rustup ENV RUSTUP_HOME=/root/.rustup
ENV PATH="$CARGO_HOME/bin:$RUSTUP_HOME/bin:$PATH" ENV PATH="$CARGO_HOME/bin:$RUSTUP_HOME/bin:$PATH"
COPY requirements/common.txt /tmp/requirements/common.txt
ARG OUTLINES_CORE_VERSION
RUN --mount=type=cache,target=/root/.cache/uv \ RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=bind,from=rust,source=/root/.cargo,target=/root/.cargo,rw \ --mount=type=bind,from=rust,source=/root/.cargo,target=/root/.cargo,rw \
--mount=type=bind,from=rust,source=/root/.rustup,target=/root/.rustup,rw \ --mount=type=bind,from=rust,source=/root/.rustup,target=/root/.rustup,rw \
OUTLINES_CORE_VERSION=${OUTLINES_CORE_VERSION:-$(grep -E '^outlines_core\s*==\s*[0-9.]+' /tmp/requirements/common.txt | grep -Eo '[0-9.]+')} && \ git clone https://github.com/guidance-ai/llguidance.git && \
if [ -z "${OUTLINES_CORE_VERSION}" ]; then echo "ERROR: Could not determine outlines_core version"; exit 1; fi && \ cd llguidance && \
git clone https://github.com/dottxt-ai/outlines-core.git && \ git checkout s390x-fix-v2 && \
cd outlines-core && \
git checkout tags/${OUTLINES_CORE_VERSION} && \
sed -i "s/version = \"0.0.0\"/version = \"${OUTLINES_CORE_VERSION}\"/" Cargo.toml && \
uv pip install maturin && \ uv pip install maturin && \
python -m maturin build --release --out dist python -m maturin build --release --out dist --compatibility linux
# Final build stage # # Final build stage
FROM python-install AS vllm-cpu FROM python-install AS vllm-cpu
ARG PYTHON_VERSION ARG PYTHON_VERSION
ARG PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" ARG PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu"
...@@ -229,10 +223,12 @@ ENV PKG_CONFIG_PATH="/opt/rh/gcc-toolset-14/root/usr/lib64/pkgconfig:/usr/local/ ...@@ -229,10 +223,12 @@ ENV PKG_CONFIG_PATH="/opt/rh/gcc-toolset-14/root/usr/lib64/pkgconfig:/usr/local/
ENV PATH="${VIRTUAL_ENV:+${VIRTUAL_ENV}/bin}:/opt/rh/gcc-toolset-14/root/usr/bin:/usr/local/bin:$CARGO_HOME/bin:$RUSTUP_HOME/bin:$PATH" ENV PATH="${VIRTUAL_ENV:+${VIRTUAL_ENV}/bin}:/opt/rh/gcc-toolset-14/root/usr/bin:/usr/local/bin:$CARGO_HOME/bin:$RUSTUP_HOME/bin:$PATH"
ENV PIP_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL} ENV PIP_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
ENV UV_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL} ENV UV_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
# Force pure Python protobuf to avoid s390x C++ extension crashes
ENV PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
COPY . /workspace/vllm COPY . /workspace/vllm
WORKDIR /workspace/vllm WORKDIR /workspace/vllm
RUN --mount=type=bind,from=numa-build,src=/tmp/numactl-2.0.16,target=/numactl \ RUN --mount=type=bind,from=numa-build,src=/tmp/numactl-2.0.19,target=/numactl \
make -C /numactl install make -C /numactl install
# Install dependencies, including PyTorch and Apache Arrow # Install dependencies, including PyTorch and Apache Arrow
...@@ -245,14 +241,14 @@ RUN --mount=type=cache,target=/root/.cache/uv \ ...@@ -245,14 +241,14 @@ RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=bind,from=numba-builder,source=/tmp/llvmlite/dist,target=/tmp/llvmlite-wheels/ \ --mount=type=bind,from=numba-builder,source=/tmp/llvmlite/dist,target=/tmp/llvmlite-wheels/ \
--mount=type=bind,from=numba-builder,source=/tmp/numba/dist,target=/tmp/numba-wheels/ \ --mount=type=bind,from=numba-builder,source=/tmp/numba/dist,target=/tmp/numba-wheels/ \
--mount=type=bind,from=opencv-builder,source=/tmp/opencv-python/dist,target=/tmp/opencv-wheels/ \ --mount=type=bind,from=opencv-builder,source=/tmp/opencv-python/dist,target=/tmp/opencv-wheels/ \
--mount=type=bind,from=outlines-core-builder,source=/tmp/outlines-core/dist,target=/tmp/outlines-core/dist/ \ --mount=type=bind,from=guidance-builder,source=/tmp/llguidance/dist,target=/tmp/guidance-wheels/ \
ARROW_WHL_FILE=$(ls /tmp/arrow-wheels/pyarrow-*.whl) && \ ARROW_WHL_FILE=$(ls /tmp/arrow-wheels/*.whl) && \
VISION_WHL_FILE=$(ls /tmp/vision-wheels/*.whl) && \ VISION_WHL_FILE=$(ls /tmp/vision-wheels/*.whl) && \
HF_XET_WHL_FILE=$(ls /tmp/hf-xet-wheels/*.whl) && \ HF_XET_WHL_FILE=$(ls /tmp/hf-xet-wheels/*.whl) && \
LLVM_WHL_FILE=$(ls /tmp/llvmlite-wheels/*.whl) && \ LLVM_WHL_FILE=$(ls /tmp/llvmlite-wheels/*.whl) && \
NUMBA_WHL_FILE=$(ls /tmp/numba-wheels/*.whl) && \ NUMBA_WHL_FILE=$(ls /tmp/numba-wheels/*.whl) && \
OPENCV_WHL_FILE=$(ls /tmp/opencv-wheels/*.whl) && \ OPENCV_WHL_FILE=$(ls /tmp/opencv-wheels/*.whl) && \
OUTLINES_CORE_WHL_FILE=$(ls /tmp/outlines-core/dist/*.whl) && \ GUIDANCE_WHL_FILE=$(ls /tmp/guidance-wheels/*.whl) && \
uv pip install -v \ uv pip install -v \
$ARROW_WHL_FILE \ $ARROW_WHL_FILE \
$VISION_WHL_FILE \ $VISION_WHL_FILE \
...@@ -260,7 +256,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \ ...@@ -260,7 +256,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
$LLVM_WHL_FILE \ $LLVM_WHL_FILE \
$NUMBA_WHL_FILE \ $NUMBA_WHL_FILE \
$OPENCV_WHL_FILE \ $OPENCV_WHL_FILE \
$OUTLINES_CORE_WHL_FILE \ $GUIDANCE_WHL_FILE \
--index-strategy unsafe-best-match \ --index-strategy unsafe-best-match \
-r requirements/build/cpu.txt \ -r requirements/build/cpu.txt \
-r requirements/cpu.txt -r requirements/cpu.txt
...@@ -271,6 +267,10 @@ RUN --mount=type=cache,target=/root/.cache/uv \ ...@@ -271,6 +267,10 @@ RUN --mount=type=cache,target=/root/.cache/uv \
VLLM_TARGET_DEVICE=cpu VLLM_CPU_MOE_PREPACK=0 python setup.py bdist_wheel && \ VLLM_TARGET_DEVICE=cpu VLLM_CPU_MOE_PREPACK=0 python setup.py bdist_wheel && \
uv pip install "$(echo dist/*.whl)[tensorizer]" uv pip install "$(echo dist/*.whl)[tensorizer]"
# Remove protobuf C++ extension that crashes on s390x
RUN rm -rf /opt/vllm/lib64/python${PYTHON_VERSION}/site-packages/google/_upb/*.so \
/opt/vllm/lib64/python${PYTHON_VERSION}/site-packages/google/protobuf/pyext/*.so 2>/dev/null || true
# setup non-root user for vllm # setup non-root user for vllm
RUN umask 002 && \ RUN umask 002 && \
/usr/sbin/useradd --uid 2000 --gid 0 vllm && \ /usr/sbin/useradd --uid 2000 --gid 0 vllm && \
......
...@@ -3,15 +3,15 @@ ...@@ -3,15 +3,15 @@
vLLM has experimental support for s390x architecture on IBM Z platform. For now, users must build from source to natively run on IBM Z platform. vLLM has experimental support for s390x architecture on IBM Z platform. For now, users must build from source to natively run on IBM Z platform.
Currently, the CPU implementation for s390x architecture supports FP32 datatype only. Currently, the CPU implementation for s390x architecture supports FP32, BF16 and FP16.
--8<-- [end:installation] --8<-- [end:installation]
--8<-- [start:requirements] --8<-- [start:requirements]
- OS: `Linux` - OS: `Linux`
- SDK: `gcc/g++ >= 12.3.0` or later with Command Line Tools - SDK: `gcc/g++ >= 14.0.0` or later with Command Line Tools
- Instruction Set Architecture (ISA): VXE support is required. Works with Z14 and above. - Instruction Set Architecture (ISA): VXE support is required. Works with Z14 and above.
- Build install python packages: `pyarrow`, `torch` and `torchvision` - Build install python packages: `torchvision`, `llvmlite`, `numba`, `pyarrow (for testing)`, `opencv-headless`
--8<-- [end:requirements] --8<-- [end:requirements]
--8<-- [start:set-up-using-python] --8<-- [start:set-up-using-python]
...@@ -24,13 +24,14 @@ Currently, there are no pre-built IBM Z CPU wheels. ...@@ -24,13 +24,14 @@ Currently, there are no pre-built IBM Z CPU wheels.
--8<-- [end:pre-built-wheels] --8<-- [end:pre-built-wheels]
--8<-- [start:build-wheel-from-source] --8<-- [start:build-wheel-from-source]
Install the following packages from the package manager before building the vLLM. For example on RHEL 9.4: Install the following packages from the package manager before building the vLLM. For example on RHEL 9.6:
```bash ```bash
dnf install -y \ dnf install -y \
which procps findutils tar vim git gcc g++ make patch make cython zlib-devel \ which procps findutils tar vim git gcc-toolset-14 gcc-toolset-14-binutils gcc-toolset-14-libatomic-devel zlib-devel \
libjpeg-turbo-devel libtiff-devel libpng-devel libwebp-devel freetype-devel harfbuzz-devel \ libjpeg-turbo-devel libtiff-devel libpng-devel libwebp-devel freetype-devel harfbuzz-devel \
openssl-devel openblas openblas-devel wget autoconf automake libtool cmake numactl-devel openssl-devel openblas openblas-devel autoconf automake libtool cmake numpy libsndfile \
clang llvm-devel llvm-static clang-devel
``` ```
Install rust>=1.80 which is needed for `outlines-core` and `uvloop` python packages installation. Install rust>=1.80 which is needed for `outlines-core` and `uvloop` python packages installation.
...@@ -43,13 +44,13 @@ curl https://sh.rustup.rs -sSf | sh -s -- -y && \ ...@@ -43,13 +44,13 @@ curl https://sh.rustup.rs -sSf | sh -s -- -y && \
Execute the following commands to build and install vLLM from source. Execute the following commands to build and install vLLM from source.
!!! tip !!! tip
Please build the following dependencies, `torchvision`, `pyarrow` from source before building vLLM. Please build the following dependencies, `torchvision`, `llvmlite`, `numba`, `llguidance`, `pyarrow`, `opencv-headless` from source before building vLLM.
```bash ```bash
sed -i '/^torch/d' requirements/build/cuda.txt # remove torch from requirements/build/cuda.txt since we use nightly builds
uv pip install -v \ uv pip install -v \
--extra-index-url https://download.pytorch.org/whl/cpu \
--torch-backend auto \ --torch-backend auto \
-r requirements/build/cuda.txt \ -r requirements/build/cpu.txt \
-r requirements/cpu.txt \ -r requirements/cpu.txt \
VLLM_TARGET_DEVICE=cpu python setup.py bdist_wheel && \ VLLM_TARGET_DEVICE=cpu python setup.py bdist_wheel && \
uv pip install dist/*.whl uv pip install dist/*.whl
...@@ -57,10 +58,9 @@ Execute the following commands to build and install vLLM from source. ...@@ -57,10 +58,9 @@ Execute the following commands to build and install vLLM from source.
??? console "pip" ??? console "pip"
```bash ```bash
sed -i '/^torch/d' requirements/build/cuda.txt # remove torch from requirements/build/cuda.txt since we use nightly builds
pip install -v \ pip install -v \
--extra-index-url https://download.pytorch.org/whl/nightly/cpu \ --extra-index-url https://download.pytorch.org/whl/cpu \
-r requirements/build/cuda.txt \ -r requirements/build/cpu.txt \
-r requirements/cpu.txt \ -r requirements/cpu.txt \
VLLM_TARGET_DEVICE=cpu python setup.py bdist_wheel && \ VLLM_TARGET_DEVICE=cpu python setup.py bdist_wheel && \
pip install dist/*.whl pip install dist/*.whl
......
...@@ -19,7 +19,7 @@ pillow # Required for image processing ...@@ -19,7 +19,7 @@ pillow # Required for image processing
prometheus-fastapi-instrumentator >= 7.0.0 prometheus-fastapi-instrumentator >= 7.0.0
tiktoken >= 0.6.0 # Required for DBRX tokenizer tiktoken >= 0.6.0 # Required for DBRX tokenizer
lm-format-enforcer == 0.11.3 lm-format-enforcer == 0.11.3
llguidance >= 1.3.0, < 1.4.0; platform_machine == "x86_64" or platform_machine == "arm64" or platform_machine == "aarch64" or platform_machine == "s390x" or platform_machine == "ppc64le" llguidance >= 1.3.0, < 1.4.0; platform_machine == "x86_64" or platform_machine == "arm64" or platform_machine == "aarch64" or platform_machine == "ppc64le"
outlines_core == 0.2.11 outlines_core == 0.2.11
# required for outlines backend disk cache # required for outlines backend disk cache
diskcache == 5.6.3 diskcache == 5.6.3
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment