[Bugfix] Change vLLM install order & Add A2 support (#9232)

fda762a2 · Even Zhou · GitHub · 1df84ff4 · fda762a2 · fda762a2
Unverified Commit fda762a2 authored Aug 17, 2025 by Even Zhou Committed by GitHub Aug 16, 2025
3 changed files
--- a/.github/workflows/release-docker-npu-nightly.yaml
+++ b/.github/workflows/release-docker-npu-nightly.yaml
@@ -4,7 +4,7 @@ on:
    branches:
      - main
    paths:
-      - ".github/workflows/release-docker-npu-nightly.yaml"
+      - ".github/workflows/release-docker-npu-nightly.yml"
  workflow_dispatch:
  schedule:
    - cron: "0 0 * * *"
@@ -19,7 +19,7 @@ jobs:
    strategy:
      matrix:
        cann_version: ["8.2.rc1"]
-        device_type: ["a3"]
+        device_type: ["910b", "a3"]
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4

--- a/.github/workflows/release-docker-npu.yaml
+++ b/.github/workflows/release-docker-npu.yaml
@@ -8,7 +8,7 @@ on:
    branches:
      - main
    paths:
-      - ".github/workflows/release-docker-npu.yaml"
+      - ".github/workflows/release-docker-npu.yml"
 jobs:
  build:
@@ -16,7 +16,7 @@ jobs:
    strategy:
      matrix:
        cann_version: ["8.2.rc1"]
-        device_type: ["a3"]
+        device_type: ["910b", "a3"]
    steps:
      - name: Checkout repository
        uses: actions/checkout@v4

--- a/docker/Dockerfile.npu
+++ b/docker/Dockerfile.npu
@@ -51,22 +51,21 @@ ENV LC_ALL=en_US.UTF-8
 # Install dependencies
 # TODO: install from pypi released memfabric
-# TODO: install from pypi released triton-ascend
+RUN pip install $MEMFABRIC_URL --no-cache-dir
-RUN pip install $MEMFABRIC_URL --no-cache-dir \
-    && pip install torch==$PYTORCH_VERSION torchvision==$TORCHVISION_VERSION --index-url https://download.pytorch.org/whl/cpu --no-cache-dir \
-    && wget ${PTA_URL} && pip install "./torch_npu-2.6.0.post1-cp311-cp311-manylinux_2_28_aarch64.whl" --no-cache-dir \
-    && pip install ${TRITON_ASCEND_URL} --no-cache-dir \
-    && python3 -m pip install --no-cache-dir numpy==1.26.4 pybind11
 # Install vLLM
 RUN git clone --depth 1 https://github.com/vllm-project/vllm.git --branch $VLLM_TAG && \
-    cd vllm && VLLM_TARGET_DEVICE="empty" pip install -v . --no-cache-dir && \
+    (cd vllm && VLLM_TARGET_DEVICE="empty" pip install -v . --no-cache-dir) && rm -rf vllm
-    cd .. && rm -rf vllm
+# TODO: install from pypi released triton-ascend
+RUN pip install torch==$PYTORCH_VERSION torchvision==$TORCHVISION_VERSION --index-url https://download.pytorch.org/whl/cpu --no-cache-dir \
+    && wget ${PTA_URL} && pip install "./torch_npu-2.6.0.post1-cp311-cp311-manylinux_2_28_aarch64.whl" --no-cache-dir \
+    && python3 -m pip install --no-cache-dir attrs==24.2.0 numpy==1.26.4 scipy==1.13.1 decorator==5.1.1 psutil==6.0.0 pytest==8.3.2 pytest-xdist==3.6.1 pyyaml pybind11 \
+    && pip install ${TRITON_ASCEND_URL} --no-cache-dir
 # Install SGLang
 RUN git clone https://github.com/sgl-project/sglang --branch $SGLANG_TAG && \
-    cd ./sglang/python && pip install .[srt_npu] --no-cache-dir && \
+    (cd sglang/python && pip install -v .[srt_npu] --no-cache-dir) && rm -rf sglang
-    cd .. && rm -rf ./sglang
 # Install Deep-ep
 RUN git clone  --branch $SGLANG_KERNEL_NPU_TAG https://github.com/sgl-project/sgl-kernel-npu.git \