name: Release Docker Images Nightly (AMD) on: workflow_dispatch: schedule: - cron: '0 13 * * *' concurrency: # A PR number if a pull request and otherwise the commit hash. This cancels # queued and in-progress runs for the same PR (presubmit) or commit # (postsubmit). The workflow name is prepended to avoid conflicts between # different workflows. group: ${{ github.workflow }}-${{ github.event.number || github.sha }} cancel-in-progress: true jobs: publish: if: github.repository == 'sgl-project/sglang' runs-on: amd-docker-scale environment: 'prod' strategy: matrix: gpu_arch: ['gfx942', 'gfx942-rocm700', 'gfx950'] build_type: ['all', 'srt'] steps: - name: Checkout repository uses: actions/checkout@v4 - name: "Set Date" run: | echo "DATE=$(date +%Y%m%d)" >> $GITHUB_ENV - name: Login to Docker Hub uses: docker/login-action@v2 with: username: ${{ secrets.DOCKERHUB_AMD_USERNAME }} password: ${{ secrets.DOCKERHUB_AMD_TOKEN }} - name: Build and Push run: | version=$(cat python/sglang/version.py | cut -d'"' -f2) if [ "${{ matrix.gpu_arch }}" = "gfx942" ]; then rocm_tag="rocm630-mi30x" elif [ "${{ matrix.gpu_arch }}" = "gfx942-rocm700" ]; then rocm_tag="rocm700-mi30x" elif [ "${{ matrix.gpu_arch }}" = "gfx950" ]; then rocm_tag="rocm700-mi35x" else echo "Unsupported gfx arch" exit 1 fi tag=v${version}-${rocm_tag} if [ "${{ matrix.build_type }}" = "all" ]; then tag_suffix="" elif [ "${{ matrix.build_type }}" = "srt" ]; then tag_suffix="-srt" else echo "Unsupported build type" exit 1 fi docker build . -f docker/Dockerfile.rocm --build-arg BUILD_TYPE=${{ matrix.build_type }} --build-arg GPU_ARCH=${{ matrix.gpu_arch }} -t rocm/sgl-dev:${tag}-${{ env.DATE }}${tag_suffix} --no-cache docker push rocm/sgl-dev:${tag}-${{ env.DATE }}${tag_suffix} cache: if: github.repository == 'sgl-project/sglang' runs-on: linux-mi300-gpu-1 environment: 'prod' needs: publish strategy: matrix: gpu_arch: ['gfx942', 'gfx942-rocm700'] build_type: ['all'] steps: - name: Checkout repository uses: actions/checkout@v4 - name: "Set Date" run: | echo "DATE=$(date +%Y%m%d)" >> $GITHUB_ENV - name: Login to Docker Hub uses: docker/login-action@v2 with: username: ${{ secrets.DOCKERHUB_AMD_USERNAME }} password: ${{ secrets.DOCKERHUB_AMD_TOKEN }} - name: Pull and Save Docker Image to Cache run: | set -euxo pipefail version=$(cat python/sglang/version.py | cut -d'"' -f2) if [ "${{ matrix.gpu_arch }}" = "gfx942" ]; then rocm_tag="rocm630-mi30x" elif [ "${{ matrix.gpu_arch }}" = "gfx942-rocm700" ]; then rocm_tag="rocm700-mi30x" else echo "Unsupported gfx arch" exit 1 fi tag=v${version}-${rocm_tag} if [ "${{ matrix.build_type }}" = "all" ]; then tag_suffix="" else echo "Unsupported build type" exit 1 fi image="rocm/sgl-dev:${tag}-${{ env.DATE }}${tag_suffix}" # Determine target cache file name based on ROCm variant if [[ "${rocm_tag}" == rocm630* ]]; then final_path="/home/runner/sgl-data/docker/image.tar" elif [[ "${rocm_tag}" == rocm700* ]]; then final_path="/home/runner/sgl-data/docker/image-700.tar" else echo "Unexpected ROCm tag: ${rocm_tag}" exit 1 fi tmp_path="${final_path}.tmp" echo "Pulling image: ${image}" docker pull "${image}" echo "Saving to temp file: ${tmp_path}" docker save "${image}" -o "${tmp_path}" echo "Moving to final path: ${final_path}" mv -f "${tmp_path}" "${final_path}" echo "Cache populated successfully at ${final_path}"