name: 'Dynamo Graph Deployment Test' description: 'Deploy a DynamoGraphDeployment to Kubernetes, validate it serves requests, and cleanup' inputs: # Kubernetes Configuration kubeconfig_base64: description: 'Base64-encoded kubeconfig for cluster access' required: true namespace: description: 'Kubernetes namespace for deployment' required: true registry: description: 'Container registry hostname. Required for rerun self-bootstrap.' required: true operator_tag: description: 'Operator image tag (default: main-operator). Required for rerun self-bootstrap.' required: false default: 'main-operator' hf_token: description: 'HuggingFace token for model access' required: false default: '' framework: description: 'Framework name (vllm, sglang, trtllm)' required: true profile: description: 'Deployment profile (e.g., disagg_router, agg)' required: true image: description: 'Full container image reference for the framework runtime' required: true platform_arch: description: 'Platform architecture (amd64, arm64)' required: false default: 'amd64' runs: using: "composite" steps: - name: Check if namespace exists id: ns-check shell: bash env: KUBECONFIG_B64: ${{ inputs.kubeconfig_base64 }} NAMESPACE: ${{ inputs.namespace }} FRAMEWORK: ${{ inputs.framework }} PROFILE: ${{ inputs.profile }} run: | echo "::group::Check if namespace exists" echo "${KUBECONFIG_B64}" | base64 -d > ${{ github.workspace }}/.kubeconfig_check chmod 600 ${{ github.workspace }}/.kubeconfig_check if KUBECONFIG=${{ github.workspace }}/.kubeconfig_check kubectl get namespace $NAMESPACE --ignore-not-found | grep -q $NAMESPACE; then echo "exists=true" >> $GITHUB_OUTPUT echo "ns=${NAMESPACE}" >> $GITHUB_OUTPUT echo "Namespace $NAMESPACE exists, will reuse it" else echo "exists=false" >> $GITHUB_OUTPUT # Generate a unique namespace for this framework+profile to avoid collisions on parallel reruns # Replace underscores with hyphens for k8s naming compliance PROFILE_SANITIZED="${PROFILE//_/-}" SELF_NS="${NAMESPACE}-${FRAMEWORK}-${PROFILE_SANITIZED}" #TODO: Improve this truncation logic. The operator creates k8s labels as "{namespace}-{deployment_name} which restricts max length to 44 chars # (largest deployment name is "vllm-disagg-router" (18 chars)). SELF_NS="${SELF_NS:0:44}" # Remove trailing dash from truncation SELF_NS="${SELF_NS%-}" echo "ns=${SELF_NS}" >> $GITHUB_OUTPUT echo "Namespace $NAMESPACE not found, will self-bootstrap as ${SELF_NS}" fi rm -f ${{ github.workspace }}/.kubeconfig_check echo "::endgroup::" - name: Setup namespace (self-bootstrap on rerun) if: steps.ns-check.outputs.exists != 'true' uses: ./.github/actions/setup-deploy-namespace with: kubeconfig_base64: ${{ inputs.kubeconfig_base64 }} namespace: ${{ steps.ns-check.outputs.ns }} registry: ${{ inputs.registry }} operator_tag: ${{ inputs.operator_tag }} hf_token: ${{ inputs.hf_token }} - name: Setup Kubeconfig id: setup-kubeconfig shell: bash env: NAMESPACE: ${{ steps.ns-check.outputs.ns }} run: | echo "${{ inputs.kubeconfig_base64 }}" | base64 -d > ${{ github.workspace }}/.kubeconfig chmod 600 ${{ github.workspace }}/.kubeconfig echo "KUBECONFIG=${{ github.workspace }}/.kubeconfig" >> $GITHUB_ENV export KUBECONFIG=${{ github.workspace }}/.kubeconfig kubectl config set-context --current --namespace=${NAMESPACE} kubectl config get-contexts - name: Set up Python uses: actions/setup-python@v5 with: python-version: '3.12' cache: 'pip' cache-dependency-path: 'container/deps/requirements.test.txt' - name: Install test dependencies shell: bash run: | python -m pip install --upgrade pip pip install -r container/deps/requirements.test.txt - name: Deploy and Test id: deploy shell: bash env: KUBECONFIG: ${{ github.workspace }}/.kubeconfig NAMESPACE: ${{ steps.ns-check.outputs.ns }} FRAMEWORK: ${{ inputs.framework }} PROFILE: ${{ inputs.profile }} IMAGE: ${{ inputs.image }} run: | mkdir -p test-results pytest tests/deploy/test_deploy.py \ --framework="${FRAMEWORK}" \ --profile="${PROFILE}" \ --image="${IMAGE}" \ --namespace="${NAMESPACE}" \ -v -s \ --durations=10 \ --junitxml=test-results/pytest_deploy_${FRAMEWORK}_${PROFILE}_${{ inputs.platform_arch }}_${{ github.run_id }}_${{ job.check_run_id }}.xml \ --log-cli-level=INFO - name: Cleanup Deployment if: always() shell: bash env: NAMESPACE: ${{ steps.ns-check.outputs.ns }} GRAPH_NAME: ${{ steps.deploy.outputs.graph_name }} run: | echo "::group::Cleanup Deployment" set -x export KUBECONFIG=${{ github.workspace }}/.kubeconfig echo "=== PRE-CLEANUP STATUS ===" kubectl get dynamographdeployments -n $NAMESPACE || true kubectl get pods -n $NAMESPACE || true kubectl get dynamographdeployments -n $NAMESPACE --no-headers 2>/dev/null \ | awk '$2 == "False" {print $1}' \ | while read -r dep_name; do echo ">>> DETAILED DESCRIPTION FOR FAILED DEPLOYMENT: $dep_name" kubectl describe dynamographdeployments "$dep_name" -n $NAMESPACE done || true if kubectl get dynamographdeployments "${GRAPH_NAME}" -n $NAMESPACE &>/dev/null; then echo "DGD ${GRAPH_NAME} still exists after test, deleting..." kubectl delete dynamographdeployments ${GRAPH_NAME} -n $NAMESPACE --timeout=60s else echo "DGD ${GRAPH_NAME} already cleaned up by test" fi echo "::endgroup::" - name: Teardown namespace (self-cleanup on rerun) if: always() && steps.ns-check.outputs.exists != 'true' uses: ./.github/actions/teardown-deploy-namespace with: kubeconfig_base64: ${{ inputs.kubeconfig_base64 }} namespace: ${{ steps.ns-check.outputs.ns }} - name: Upload Test Results uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f #v6 if: always() with: name: test-results-${{ inputs.framework }}-${{ inputs.profile }}-${{ inputs.platform_arch }}-${{ github.run_id }}-${{ job.check_run_id }} path: test-results/pytest_deploy_${{ inputs.framework }}_${{ inputs.profile }}_${{ inputs.platform_arch }}_${{ github.run_id }}_${{ job.check_run_id }}.xml retention-days: 7