Unverified Commit 2ecbd8b8 authored by li chaoran's avatar li chaoran Committed by GitHub
Browse files

[feat] add ascend readme and docker release (#8700)


Signed-off-by: default avatarmywaaagh_admin <pkwarcraft@gmail.com>
Signed-off-by: default avatarlichaoran <pkwarcraft@gmail.com>
Co-authored-by: default avatarEven Zhou <even.y.zhou@outlook.com>
Co-authored-by: default avatarronnie_zheng <zl19940307@163.com>
parent 305b27c1
......@@ -27,13 +27,19 @@ jobs:
github.event.pull_request.draft == false
runs-on: linux-arm64-npu-1
container:
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1.alpha003-910b-ubuntu22.04-py3.11
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install dependencies
run: |
# speed up by using infra cache services
CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
pip config set global.index-url http://${CACHING_URL}/pypi/simple
pip config set global.trusted-host ${CACHING_URL}
bash scripts/ci/npu_ci_install_dependency.sh
# copy required file from our daily cache
cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
......@@ -56,13 +62,19 @@ jobs:
github.event.pull_request.draft == false
runs-on: linux-arm64-npu-2
container:
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1.alpha003-910b-ubuntu22.04-py3.11
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install dependencies
run: |
# speed up by using infra cache services
CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
pip config set global.index-url http://${CACHING_URL}/pypi/simple
pip config set global.trusted-host ${CACHING_URL}
bash scripts/ci/npu_ci_install_dependency.sh
# copy required file from our daily cache
cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
......@@ -85,13 +97,19 @@ jobs:
github.event.pull_request.draft == false
runs-on: linux-arm64-npu-4
container:
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1.alpha003-910b-ubuntu22.04-py3.11
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install dependencies
run: |
# speed up by using infra cache services
CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
pip config set global.index-url http://${CACHING_URL}/pypi/simple
pip config set global.trusted-host ${CACHING_URL}
bash scripts/ci/npu_ci_install_dependency.sh
# copy required file from our daily cache
cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
......
name: Release Docker Images Nightly (Ascend NPU)
on:
pull_request:
branches:
- main
paths:
- ".github/workflows/release-docker-npu-nightly.yaml"
workflow_dispatch:
schedule:
- cron: "0 0 * * *"
concurrency:
group: ${{ github.workflow }}-${{ github.sha }}
cancel-in-progress: true
jobs:
build:
runs-on: ubuntu-22.04-arm
strategy:
matrix:
cann_version: ["8.2.rc1"]
device_type: ["a3"]
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Free up disk space
uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
with:
tool-cache: true
docker-images: false
- name: Setup Docker buildx
uses: docker/setup-buildx-action@v3
- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
images: |
${{ github.repository_owner }}/sglang
# push with schedule event
# push with workflow_dispatch event
tags: |
type=ref,event=pr
type=ref,event=branch
type=schedule,pattern=main
flavor: |
latest=false
suffix=-cann${{ matrix.cann_version }}-${{ matrix.device_type }},onlatest=true
# Login against a Docker registry except on PR
# https://github.com/docker/login-action
- name: Log into docker hub
uses: docker/login-action@v3
if: ${{ github.repository_owner == 'sgl-project/sglang' && github.event_name != 'pull_request' }}
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
# Build and push Docker image with Buildx (don't push on PR)
# https://github.com/docker/build-push-action
- name: Build and push Docker image
id: build-and-push
uses: docker/build-push-action@v6
with:
context: docker
file: docker/Dockerfile.npu
# TODO: need add x86 platforms support when memfabric is ready
platforms: linux/arm64
labels: ${{ steps.meta.outputs.labels }}
tags: ${{ steps.meta.outputs.tags }}
push: ${{ github.repository_owner == 'sgl-project/sglang' && github.event_name != 'pull_request' }}
provenance: false
build-args: |
CANN_VERSION=${{ matrix.cann_version }}
DEVICE_TYPE=${{ matrix.device_type }}
name: Release Docker Images (Ascend NPU)
on:
push:
tags:
- "*" # Trigger on all tags and filterred by pep440 later
workflow_dispatch:
pull_request:
branches:
- main
paths:
- ".github/workflows/release-docker-npu.yaml"
jobs:
build:
runs-on: ubuntu-22.04-arm
strategy:
matrix:
cann_version: ["8.2.rc1"]
device_type: ["a3"]
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Free up disk space
uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
with:
tool-cache: true
docker-images: false
# push with tag
- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
images: |
${{ github.repository_owner }}/sglang
tags: |
type=ref,event=pr
type=ref,event=tag,suffix=-cann${{ matrix.cann_version }}-${{ matrix.device_type }}
flavor: |
latest=false
- name: Setup Docker buildx
uses: docker/setup-buildx-action@v3
# Login against a Docker registry except on PR
# https://github.com/docker/login-action
- name: Login to Docker Hub
uses: docker/login-action@v2
if: ${{ github.repository_owner == 'sgl-project/sglang' && github.event_name != 'pull_request' }}
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Get version
id: get_version
run: |
version=$(cat python/sglang/version.py | cut -d'"' -f2)
echo "TAG=${{ github.repository_owner }}/sglang:v$version-cann${{ matrix.cann_version }}-${{ matrix.device_type }}" >> $GITHUB_OUTPUT
kernel_tag=$(curl -s https://api.github.com/repos/sgl-project/sgl-kernel-npu/tags | jq -r '.[0].name')
echo "KERNEL_NPU_TAG=${kernel_tag}" >> $GITHUB_OUTPUT
- name: Build and push Docker image
id: build-and-push
uses: docker/build-push-action@v6
with:
context: docker
file: docker/Dockerfile.npu
# TODO: need add x86 platforms support when memfabric is ready
platforms: linux/arm64
labels: ${{ steps.meta.outputs.labels }}
tags: ${{ steps.meta.outputs.tags || steps.get_version.outputs.TAG }}
push: ${{ github.repository_owner == 'sgl-project/sglang' && github.event_name != 'pull_request' }}
provenance: false
build-args: |
SGLANG_KERNEL_NPU_TAG=${{ steps.get_version.outputs.KERNEL_NPU_TAG }}
CANN_VERSION=${{ matrix.cann_version }}
DEVICE_TYPE=${{ matrix.device_type }}
ARG CANN_VERSION=8.2.rc1
ARG DEVICE_TYPE=a3
ARG OS=ubuntu22.04
ARG PYTHON_VERSION=py3.11
FROM quay.io/ascend/cann:$CANN_VERSION-$DEVICE_TYPE-$OS-$PYTHON_VERSION
# Update pip & apt sources
ARG PIP_INDEX_URL="https://pypi.org/simple/"
ARG APTMIRROR=""
ARG MEMFABRIC_URL=https://sglang-ascend.obs.cn-east-3.myhuaweicloud.com/sglang/mf_adapter-1.0.0-cp311-cp311-linux_aarch64.whl
ARG PYTORCH_VERSION=2.6.0
ARG TORCHVISION_VERSION=0.21.0
ARG PTA_URL="https://gitee.com/ascend/pytorch/releases/download/v7.1.0.1-pytorch2.6.0/torch_npu-2.6.0.post1-cp311-cp311-manylinux_2_28_aarch64.whl"
ARG VLLM_TAG=v0.8.5
ARG TRITON_ASCEND_URL=https://sglang-ascend.obs.cn-east-3.myhuaweicloud.com/sglang/triton_ascend-3.2.0.dev20250729-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl
ARG SGLANG_TAG=main
ARG ASCEND_CANN_PATH=/usr/local/Ascend/ascend-toolkit
ARG SGLANG_KERNEL_NPU_TAG=main
WORKDIR /workspace
# Define environments
ENV DEBIAN_FRONTEND=noninteractive
RUN pip config set global.index-url $PIP_INDEX_URL
RUN if [ -n "$APTMIRROR" ];then sed -i "s|.*.ubuntu.com|$APTMIRROR|g" /etc/apt/sources.list ;fi
# Install development tools and utilities
RUN apt-get update -y && apt upgrade -y && apt-get install -y \
build-essential \
cmake \
vim \
wget \
curl \
net-tools \
zlib1g-dev \
lld \
clang \
locales \
ccache \
ca-certificates \
&& rm -rf /var/cache/apt/* \
&& rm -rf /var/lib/apt/lists/* \
&& update-ca-certificates \
&& locale-gen en_US.UTF-8
ENV LANG=en_US.UTF-8
ENV LANGUAGE=en_US:en
ENV LC_ALL=en_US.UTF-8
# Install dependencies
# TODO: install from pypi released memfabric
# TODO: install from pypi released triton-ascend
RUN pip install $MEMFABRIC_URL --no-cache-dir \
&& pip install torch==$PYTORCH_VERSION torchvision==$TORCHVISION_VERSION --index-url https://download.pytorch.org/whl/cpu --no-cache-dir \
&& wget ${PTA_URL} && pip install "./torch_npu-2.6.0.post1-cp311-cp311-manylinux_2_28_aarch64.whl" --no-cache-dir \
&& pip install ${TRITON_ASCEND_URL} --no-cache-dir \
&& python3 -m pip install --no-cache-dir numpy==1.26.4 pybind11
# Install vLLM
RUN git clone --depth 1 https://github.com/vllm-project/vllm.git --branch $VLLM_TAG && \
cd vllm && VLLM_TARGET_DEVICE="empty" pip install -v . --no-cache-dir && \
cd .. && rm -rf vllm
# Install SGLang
RUN git clone https://github.com/sgl-project/sglang --branch $SGLANG_TAG && \
cd ./sglang/python && pip install .[srt_npu] --no-cache-dir && \
cd .. && rm -rf ./sglang
# Install Deep-ep
RUN git clone --branch $SGLANG_KERNEL_NPU_TAG https://github.com/sgl-project/sgl-kernel-npu.git \
&& export LD_LIBRARY_PATH=${ASCEND_CANN_PATH}/latest/runtime/lib64/stub:$LD_LIBRARY_PATH && \
source ${ASCEND_CANN_PATH}/set_env.sh && \
cd sgl-kernel-npu && \
bash build.sh \
&& pip install output/deep_ep*.whl --no-cache-dir \
&& cd .. && rm -rf sgl-kernel-npu \
&& cd "$(pip show deep-ep | awk '/^Location:/ {print $2}')" && ln -s deep_ep/deep_ep_cpp*.so
CMD ["/bin/bash"]
......@@ -24,6 +24,7 @@ To run DeepSeek V3/R1 models, the requirements are as follows:
| **Quantized weights (int8)** | 16 x A100/800 |
| | 32 x L40S |
| | Xeon 6980P CPU |
| | 2 x Atlas 800I A3 |
<style>
.md-typeset__table {
......@@ -64,6 +65,7 @@ Detailed commands for reference:
- [16 x A100 (int8)](https://github.com/sgl-project/sglang/tree/main/benchmark/deepseek_v3#example-serving-with-16-a100a800-with-int8-quantization)
- [32 x L40S (int8)](https://github.com/sgl-project/sglang/tree/main/benchmark/deepseek_v3#example-serving-with-32-l40s-with-int8-quantization)
- [Xeon 6980P CPU](../platforms/cpu_server.md#example-running-deepseek-r1)
- [2 x Atlas 800I A3 (int8)](../platforms/ascend_npu.md#running-deepseek-v3)
### Download Weights
If you encounter errors when starting the server, ensure the weights have finished downloading. It's recommended to download them beforehand or restart multiple times until all weights are downloaded. Please refer to [DeepSeek V3](https://huggingface.co/deepseek-ai/DeepSeek-V3-Base#61-inference-with-deepseek-infer-demo-example-only) official guide to download the weights.
......
# Ascend NPUs
# SGLang on Ascend NPUs
## Install
TODO
You can install SGLang using any of the methods below. Please go through `System Settings` section to ensure the clusters are roaring at max performance. Feel free to leave an issue [here at sglang](https://github.com/sgl-project/sglang/issues) if you encounter any issues or have any problems.
## System Settings
### CPU performance power scheme
The default power scheme on Ascend hardware is `ondemand` which could affect performance, changing it to `performance` is recommended.
```shell
echo performance | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
# Make sure changes are applied successfully
cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor # shows performance
```
### Disable NUMA balancing
```shell
sudo sysctl -w kernel.numa_balancing=0
# Check
cat /proc/sys/kernel/numa_balancing # shows 0
```
### Prevent swapping out system memory
```shell
sudo sysctl -w vm.swappiness=10
# Check
cat /proc/sys/vm/swappiness # shows 10
```
## Installing SGLang
### Method 1: Installing from source with prerequisites
#### Python Version
Only `python==3.11` is supported currently. If you don't want to break system pre-installed python, try installing with [conda](https://github.com/conda/conda).
```shell
conda create --name sglang_npu python=3.11
conda activate sglang_npu
```
#### MemFabric Adaptor
_TODO: MemFabric is still a working project yet open sourced til August/September, 2025. We will release it as prebuilt wheel package for now._
_Notice: Prebuilt wheel package is based on `aarch64`, please leave an issue [here at sglang](https://github.com/sgl-project/sglang/issues) to let us know the requests for `amd64` build._
MemFabric Adaptor is a drop-in replacement of Mooncake Transfer Engine that enables KV cache transfer on Ascend NPU clusters.
```shell
MF_WHL_NAME="mf_adapter-1.0.0-cp311-cp311-linux_aarch64.whl"
MEMFABRIC_URL="https://sglang-ascend.obs.cn-east-3.myhuaweicloud.com/sglang/${MF_WHL_NAME}"
wget -O "${MF_WHL_NAME}" "${MEMFABRIC_URL}" && pip install "./${MF_WHL_NAME}"
```
#### Pytorch and Pytorch Framework Adaptor on Ascend
Only `torch==2.6.0` is supported currently due to NPUgraph and Triton-on-Ascend's limitation, however a more generalized version will be release by the end of September, 2025.
```shell
PYTORCH_VERSION=2.6.0
TORCHVISION_VERSION=0.21.0
pip install torch==$PYTORCH_VERSION torchvision==$TORCHVISION_VERSION --index-url https://download.pytorch.org/whl/cpu
PTA_VERSION="v7.1.0.1-pytorch2.6.0"
PTA_NAME="torch_npu-2.6.0.post1-cp311-cp311-manylinux_2_28_aarch64.whl"
PTA_URL="https://gitee.com/ascend/pytorch/releases/download/${PTA_VERSION}/${PTA_WHL_NAME}"
wget -O "${PTA_NAME}" "${PTA_URL}" && pip install "./${PTA_NAME}"
```
#### vLLM
vLLM is still a major prerequisite on Ascend NPU. Because of `torch==2.6.0` limitation, only vLLM v0.8.5 is supported.
```shell
VLLM_TAG=v0.8.5
git clone --depth 1 https://github.com/vllm-project/vllm.git --branch $VLLM_TAG
(cd vllm && VLLM_TARGET_DEVICE="empty" pip install -v -e .)
```
#### Triton on Ascend
_Notice:_ We recommend installing triton-ascend from source due to its rapid development, the version on PYPI can't keep up for know. This problem will be solved on Sep. 2025, afterwards `pip install` would be the one and only installing method.
Please follow Triton-on-Ascend's [installation guide from source](https://gitee.com/ascend/triton-ascend#2%E6%BA%90%E4%BB%A3%E7%A0%81%E5%AE%89%E8%A3%85-triton-ascend) to install the latest `triton-ascend` package.
#### DeepEP-compatible Library
We are also providing a DeepEP-compatible Library as a drop-in replacement of deepseek-ai's DeepEP library, check the [installation guide](https://github.com/sgl-project/sgl-kernel-npu/blob/main/python/deep_ep/README.md).
#### Installing SGLang from source
```shell
# Use the last release branch
git clone -b v0.5.0rc0 https://github.com/sgl-project/sglang.git
cd sglang
pip install --upgrade pip
pip install -e python[srt_npu]
```
### Method 2: Using docker
__Notice:__ `--privileged` and `--network=host` are required by RDMA, which is typically needed by Ascend NPU clusters.
__Notice:__ The following docker command is based on Atlas 800I A3 machines. If you are using Atlas 800I A2, make sure only `davinci[0-7]` are mapped into container.
```shell
# Clone the SGLang repository
git clone https://github.com/sgl-project/sglang.git
cd sglang/docker
# Build the docker image
docker build -t sglang-npu:main -f Dockerfile.npu .
alias drun='docker run -it --rm --privileged --network=host --ipc=host --shm-size=16g \
--device=/dev/davinci0 --device=/dev/davinci1 --device=/dev/davinci2 --device=/dev/davinci3 \
--device=/dev/davinci4 --device=/dev/davinci5 --device=/dev/davinci6 --device=/dev/davinci7 \
--device=/dev/davinci8 --device=/dev/davinci9 --device=/dev/davinci10 --device=/dev/davinci11 \
--device=/dev/davinci12 --device=/dev/davinci13 --device=/dev/davinci14 --device=/dev/davinci15 \
--device=/dev/davinci_manager --device=/dev/hisi_hdc \
--volume /usr/local/sbin:/usr/local/sbin --volume /usr/local/Ascend/driver:/usr/local/Ascend/driver \
--volume /usr/local/Ascend/firmware:/usr/local/Ascend/firmware \
--volume /etc/ascend_install.info:/etc/ascend_install.info \
--volume /var/queue_schedule:/var/queue_schedule --volume ~/.cache/:/root/.cache/'
drun --env "HF_TOKEN=<secret>" \
sglang-npu:main \
python3 -m sglang.launch_server --model-path meta-llama/Llama-3.1-8B-Instruct --attention-backend ascend --host 0.0.0.0 --port 30000
```
## Examples
TODO
### Running DeepSeek-V3
Running DeepSeek with PD disaggregation on 2 x Atlas 800I A3.
Model weights could be found [here](https://modelers.cn/models/State_Cloud/Deepseek-R1-bf16-hfd-w8a8).
Prefill:
```shell
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
export ASCEND_MF_STORE_URL="tcp://<PREFILL_HOST_IP>:<PORT>"
drun sglang-npu:main \
python3 -m sglang.launch_server --model-path State_Cloud/DeepSeek-R1-bf16-hfd-w8a8 \
--trust-remote-code \
--attention-backend ascend \
--mem-fraction-static 0.8 \
--quantization w8a8_int8 \
--tp-size 16 \
--dp-size 1 \
--nnodes 1 \
--node-rank 0 \
--disaggregation-mode prefill \
--disaggregation-bootstrap-port 6657 \
--disaggregation-transfer-backend ascend \
--dist-init-addr <PREFILL_HOST_IP>:6688 \
--host <PREFILL_HOST_IP> \
--port 8000
```
Decode:
```shell
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
export ASCEND_MF_STORE_URL="tcp://<PREFILL_HOST_IP>:<PORT>"
export HCCL_BUFFSIZE=200
export SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK=24
drun sglang-npu:main \
python3 -m sglang.launch_server --model-path State_Cloud/DeepSeek-R1-bf16-hfd-w8a8 \
--trust-remote-code \
--attention-backend ascend \
--mem-fraction-static 0.8 \
--quantization w8a8_int8 \
--enable-deepep-moe \
--deepep-mode low_latency \
--tp-size 16 \
--dp-size 1 \
--ep-size 16 \
--nnodes 1 \
--node-rank 0 \
--disaggregation-mode decode \
--disaggregation-transfer-backend ascend \
--dist-init-addr <DECODE_HOST_IP>:6688 \
--host <DECODE_HOST_IP> \
--port 8001
```
Mini_LB:
```shell
drun sglang-npu:main \
python -m sglang.srt.disaggregation.launch_lb \
--prefill http://<PREFILL_HOST_IP>:8000 \
--decode http://<DECODE_HOST_IP>:8001 \
--host 127.0.0.1 --port 5000
```
#!/bin/bash
set -euo pipefail
CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
PIP_INSTALL="pip install --no-cache-dir"
# Update apt & pip sources
sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
pip config set global.index-url http://${CACHING_URL}/pypi/simple
pip config set global.trusted-host ${CACHING_URL}
# Install the required dependencies in CI.
apt update -y && apt install -y \
build-essential \
......@@ -31,7 +24,7 @@ python3 -m ${PIP_INSTALL} --upgrade pip
### Download MemFabricV2
MF_WHL_NAME="mf_adapter-1.0.0-cp311-cp311-linux_aarch64.whl"
MEMFABRIC_URL="https://sglang-ascend.obs.cn-east-3.myhuaweicloud.com/sglang/${MF_WHL_NAME}"
wget "${MEMFABRIC_URL}" && ${PIP_INSTALL} "./${MF_WHL_NAME}"
wget -O "${MF_WHL_NAME}" "${MEMFABRIC_URL}" && ${PIP_INSTALL} "./${MF_WHL_NAME}"
### Install vLLM
......@@ -43,16 +36,19 @@ git clone --depth 1 https://github.com/vllm-project/vllm.git --branch $VLLM_TAG
### Install PyTorch and PTA
PYTORCH_VERSION=2.6.0
TORCHVISION_VERSION=0.21.0
PTA_VERSION=2.6.0
${PIP_INSTALL} torch==$PYTORCH_VERSION torchvision==$TORCHVISION_VERSION --index-url https://download.pytorch.org/whl/cpu
${PIP_INSTALL} torch_npu==$PTA_VERSION
PTA_VERSION="v7.1.0.1-pytorch2.6.0"
PTA_NAME="torch_npu-2.6.0.post1-cp311-cp311-manylinux_2_28_aarch64.whl"
PTA_URL="https://gitee.com/ascend/pytorch/releases/download/${PTA_VERSION}/${PTA_NAME}"
wget -O "${PTA_NAME}" "${PTA_URL}" && ${PIP_INSTALL} "./${PTA_NAME}"
### Install Triton-Ascend
TRITON_ASCEND_NAME="triton_ascend-3.2.0.dev20250729-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl"
TRITON_ASCEND_URL="https://sglang-ascend.obs.cn-east-3.myhuaweicloud.com/sglang/${TRITON_ASCEND_NAME}"
${PIP_INSTALL} attrs==24.2.0 numpy==1.26.4 scipy==1.13.1 decorator==5.1.1 psutil==6.0.0 pytest==8.3.2 pytest-xdist==3.6.1 pyyaml pybind11
wget "${TRITON_ASCEND_URL}" && ${PIP_INSTALL} "./${TRITON_ASCEND_NAME}"
wget -O "${TRITON_ASCEND_NAME}" "${TRITON_ASCEND_URL}" && ${PIP_INSTALL} "./${TRITON_ASCEND_NAME}"
### Install SGLang
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment