Unverified Commit 0e09ee86 authored by Tushar Sharma's avatar Tushar Sharma Committed by GitHub
Browse files

ci: enable rerunning single deploy test workflows (#6632)


Co-authored-by: default avatarClaude Opus 4.6 <noreply@anthropic.com>
parent 817f2eeb
...@@ -9,6 +9,17 @@ inputs: ...@@ -9,6 +9,17 @@ inputs:
namespace: namespace:
description: 'Kubernetes namespace for deployment' description: 'Kubernetes namespace for deployment'
required: true required: true
registry:
description: 'Container registry hostname. Required for rerun self-bootstrap.'
required: true
operator_tag:
description: 'Operator image tag (default: main-operator). Required for rerun self-bootstrap.'
required: false
default: 'main-operator'
hf_token:
description: 'HuggingFace token for model access'
required: false
default: ''
framework: framework:
description: 'Framework name (vllm, sglang, trtllm)' description: 'Framework name (vllm, sglang, trtllm)'
...@@ -27,16 +38,61 @@ inputs: ...@@ -27,16 +38,61 @@ inputs:
runs: runs:
using: "composite" using: "composite"
steps: steps:
- name: Check if namespace exists
id: ns-check
shell: bash
env:
KUBECONFIG_B64: ${{ inputs.kubeconfig_base64 }}
NAMESPACE: ${{ inputs.namespace }}
FRAMEWORK: ${{ inputs.framework }}
PROFILE: ${{ inputs.profile }}
run: |
echo "::group::Check if namespace exists"
echo "${KUBECONFIG_B64}" | base64 -d > ${{ github.workspace }}/.kubeconfig_check
chmod 600 ${{ github.workspace }}/.kubeconfig_check
if KUBECONFIG=${{ github.workspace }}/.kubeconfig_check kubectl get namespace $NAMESPACE --ignore-not-found | grep -q $NAMESPACE; then
echo "exists=true" >> $GITHUB_OUTPUT
echo "ns=${NAMESPACE}" >> $GITHUB_OUTPUT
echo "Namespace $NAMESPACE exists, will reuse it"
else
echo "exists=false" >> $GITHUB_OUTPUT
# Generate a unique namespace for this framework+profile to avoid collisions on parallel reruns
# Replace underscores with hyphens for k8s naming compliance
PROFILE_SANITIZED="${PROFILE//_/-}"
SELF_NS="${NAMESPACE}-${FRAMEWORK}-${PROFILE_SANITIZED}"
#TODO: Improve this truncation logic. The operator creates k8s labels as "{namespace}-{deployment_name} which restricts max length to 44 chars
# (largest deployment name is "vllm-disagg-router" (18 chars)).
SELF_NS="${SELF_NS:0:44}"
# Remove trailing dash from truncation
SELF_NS="${SELF_NS%-}"
echo "ns=${SELF_NS}" >> $GITHUB_OUTPUT
echo "Namespace $NAMESPACE not found, will self-bootstrap as ${SELF_NS}"
fi
rm -f ${{ github.workspace }}/.kubeconfig_check
echo "::endgroup::"
- name: Setup namespace (self-bootstrap on rerun)
if: steps.ns-check.outputs.exists != 'true'
uses: ./.github/actions/setup-deploy-namespace
with:
kubeconfig_base64: ${{ inputs.kubeconfig_base64 }}
namespace: ${{ steps.ns-check.outputs.ns }}
registry: ${{ inputs.registry }}
operator_tag: ${{ inputs.operator_tag }}
hf_token: ${{ inputs.hf_token }}
- name: Setup Kubeconfig - name: Setup Kubeconfig
id: setup-kubeconfig id: setup-kubeconfig
shell: bash shell: bash
env:
NAMESPACE: ${{ steps.ns-check.outputs.ns }}
run: | run: |
echo "${{ inputs.kubeconfig_base64 }}" | base64 -d > ${{ github.workspace }}/.kubeconfig echo "${{ inputs.kubeconfig_base64 }}" | base64 -d > ${{ github.workspace }}/.kubeconfig
chmod 600 ${{ github.workspace }}/.kubeconfig chmod 600 ${{ github.workspace }}/.kubeconfig
echo "KUBECONFIG=${{ github.workspace }}/.kubeconfig" >> $GITHUB_ENV echo "KUBECONFIG=${{ github.workspace }}/.kubeconfig" >> $GITHUB_ENV
export KUBECONFIG=${{ github.workspace }}/.kubeconfig export KUBECONFIG=${{ github.workspace }}/.kubeconfig
kubectl config set-context --current --namespace=${{ inputs.namespace }} kubectl config set-context --current --namespace=${NAMESPACE}
kubectl config get-contexts kubectl config get-contexts
- name: Set up Python - name: Set up Python
...@@ -56,7 +112,7 @@ runs: ...@@ -56,7 +112,7 @@ runs:
shell: bash shell: bash
env: env:
KUBECONFIG: ${{ github.workspace }}/.kubeconfig KUBECONFIG: ${{ github.workspace }}/.kubeconfig
NAMESPACE: ${{ inputs.namespace }} NAMESPACE: ${{ steps.ns-check.outputs.ns }}
FRAMEWORK: ${{ inputs.framework }} FRAMEWORK: ${{ inputs.framework }}
PROFILE: ${{ inputs.profile }} PROFILE: ${{ inputs.profile }}
IMAGE: ${{ inputs.image }} IMAGE: ${{ inputs.image }}
...@@ -73,12 +129,13 @@ runs: ...@@ -73,12 +129,13 @@ runs:
--log-cli-level=INFO --log-cli-level=INFO
- name: Cleanup Deployment - name: Cleanup Deployment
if: always() && inputs.skip_cleanup != 'true' if: always()
shell: bash shell: bash
env: env:
NAMESPACE: ${{ inputs.namespace }} NAMESPACE: ${{ steps.ns-check.outputs.ns }}
GRAPH_NAME: ${{ steps.deploy.outputs.graph_name }} GRAPH_NAME: ${{ steps.deploy.outputs.graph_name }}
run: | run: |
echo "::group::Cleanup Deployment"
set -x set -x
export KUBECONFIG=${{ github.workspace }}/.kubeconfig export KUBECONFIG=${{ github.workspace }}/.kubeconfig
...@@ -93,8 +150,20 @@ runs: ...@@ -93,8 +150,20 @@ runs:
kubectl describe dynamographdeployments "$dep_name" -n $NAMESPACE kubectl describe dynamographdeployments "$dep_name" -n $NAMESPACE
done || true done || true
echo "Deleting DynamoGraphDeployment: ${GRAPH_NAME}" if kubectl get dynamographdeployments "${GRAPH_NAME}" -n $NAMESPACE &>/dev/null; then
kubectl delete dynamographdeployments ${GRAPH_NAME} -n $NAMESPACE --timeout=60s || true echo "DGD ${GRAPH_NAME} still exists after test, deleting..."
kubectl delete dynamographdeployments ${GRAPH_NAME} -n $NAMESPACE --timeout=60s
else
echo "DGD ${GRAPH_NAME} already cleaned up by test"
fi
echo "::endgroup::"
- name: Teardown namespace (self-cleanup on rerun)
if: always() && steps.ns-check.outputs.exists != 'true'
uses: ./.github/actions/teardown-deploy-namespace
with:
kubeconfig_base64: ${{ inputs.kubeconfig_base64 }}
namespace: ${{ steps.ns-check.outputs.ns }}
- name: Upload Test Results - name: Upload Test Results
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f #v6 uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f #v6
......
name: 'Setup Deploy Namespace'
description: 'Create a Kubernetes namespace and install the Dynamo platform operator via Helm'
inputs:
kubeconfig_base64:
description: 'Base64-encoded kubeconfig for cluster access'
required: true
namespace:
description: 'Target namespace name'
required: true
registry:
description: 'Container registry hostname (e.g. myregistry.azurecr.io)'
required: true
operator_tag:
description: 'Operator image tag (default: main-operator)'
required: false
default: 'main-operator'
hf_token:
description: 'HuggingFace token for model access'
required: false
default: ''
runs:
using: "composite"
steps:
- name: Setup Kubeconfig
shell: bash
run: |
echo "${{ inputs.kubeconfig_base64 }}" | base64 -d > ${{ github.workspace }}/.kubeconfig
chmod 600 ${{ github.workspace }}/.kubeconfig
echo "KUBECONFIG=${{ github.workspace }}/.kubeconfig" >> $GITHUB_ENV
- name: Create namespace
shell: bash
env:
NAMESPACE: ${{ inputs.namespace }}
run: |
echo "::group::Create namespace $NAMESPACE"
set -x
kubectl create namespace $NAMESPACE
echo "Attaching the labels for secrets and cleanup"
kubectl label namespaces ${NAMESPACE} \
nscleanup/enabled=true \
nscleanup/ttl=7200 \
gitlab-imagepull=enabled \
ngc-api=enabled \
nvcr-imagepull=enabled \
--overwrite=true
# Set the context to the new namespace
kubectl config set-context --current --namespace=$NAMESPACE
# Check if Istio is installed
kubectl get pods -n istio-system
# Check if default storage class exists
kubectl get storageclass
echo "::endgroup::"
- name: Create HF token secret
if: inputs.hf_token != ''
shell: bash
env:
NAMESPACE: ${{ inputs.namespace }}
HF_TOKEN: ${{ inputs.hf_token }}
run: |
echo "::group::Create HF token secret"
kubectl create secret generic hf-token-secret \
--from-literal=HF_TOKEN=${HF_TOKEN} \
-n $NAMESPACE || true
echo "::endgroup::"
- name: Install Dynamo platform via Helm
shell: bash
env:
NAMESPACE: ${{ inputs.namespace }}
REGISTRY: ${{ inputs.registry }}
OPERATOR_TAG: ${{ inputs.operator_tag }}
run: |
echo "::group::Install Dynamo platform via Helm"
set -x
# Install Helm chart
export VIRTUAL_ENV=/opt/dynamo/venv
export KUBE_NS=$NAMESPACE
export ISTIO_ENABLED=true
export ISTIO_GATEWAY=istio-system/ingress-alb
export VIRTUAL_SERVICE_SUPPORTS_HTTPS=true
OPERATOR_REPO="${REGISTRY}/ai-dynamo/dynamo"
echo "Using operator image: ${OPERATOR_REPO}:${OPERATOR_TAG}"
# Install helm dependencies
helm repo add bitnami https://charts.bitnami.com/bitnami
cd deploy/helm/charts/platform/
helm dep build .
# Install platform with namespace restriction for single profile testing
# we manage crds via Velonix so we skip the crds installation
helm upgrade --install dynamo-platform . --namespace ${NAMESPACE} \
--skip-crds \
--set dynamo-operator.namespaceRestriction.enabled=true \
--set dynamo-operator.namespaceRestriction.allowedNamespaces[0]=${NAMESPACE} \
--set dynamo-operator.controllerManager.manager.image.repository=${OPERATOR_REPO} \
--set dynamo-operator.controllerManager.manager.image.tag=${OPERATOR_TAG} \
--set dynamo-operator.gpuDiscovery.enabled=false \
--set dynamo-operator.upgradeCRD=false \
--debug
echo "::endgroup::"
- name: Wait for operator rollout
shell: bash
env:
NAMESPACE: ${{ inputs.namespace }}
run: |
echo "::group::Wait for operator rollout"
kubectl rollout status deployment -n $NAMESPACE --watch --timeout=300s
echo "::endgroup::"
- name: Report Unhealthy Pods
if: failure()
shell: bash
env:
NAMESPACE: ${{ inputs.namespace }}
run: |
echo "### OPERATOR DEPLOYMENT FAILED: Unhealthy Pods Report" >> $GITHUB_STEP_SUMMARY
echo "Unhealthy pods:" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
kubectl get pods -n ${NAMESPACE} --no-headers \
| grep -v -E '(Running|Completed)' \
| awk '{print "- **" $1 "** | Status: `" $3 "`"}' >> $GITHUB_STEP_SUMMARY || true
name: 'Teardown Deploy Namespace'
description: 'Clean up Dynamo platform resources and delete the Kubernetes namespace'
inputs:
kubeconfig_base64:
description: 'Base64-encoded kubeconfig for cluster access'
required: true
namespace:
description: 'Namespace to tear down'
required: true
runs:
using: "composite"
steps:
- name: Setup Kubeconfig
shell: bash
run: |
echo "${{ inputs.kubeconfig_base64 }}" | base64 -d > ${{ github.workspace }}/.kubeconfig
chmod 600 ${{ github.workspace }}/.kubeconfig
echo "KUBECONFIG=${{ github.workspace }}/.kubeconfig" >> $GITHUB_ENV
- name: Debug - List resources
shell: bash
env:
NAMESPACE: ${{ inputs.namespace }}
run: |
echo "::group::List resources in namespace $NAMESPACE"
kubectl get dynamographdeployments -n $NAMESPACE || true
kubectl get all -n $NAMESPACE || true
echo "::endgroup::"
- name: Delete stale DynamoGraphDeployments
shell: bash
env:
NAMESPACE: ${{ inputs.namespace }}
run: |
echo "::group::Delete stale DynamoGraphDeployments"
kubectl delete dynamographdeployments --all -n $NAMESPACE --timeout=60s
echo "::endgroup::"
- name: Uninstall Helm chart
shell: bash
env:
NAMESPACE: ${{ inputs.namespace }}
run: |
echo "::group::Uninstall Helm chart"
if helm status dynamo-platform --namespace $NAMESPACE &>/dev/null; then
helm uninstall dynamo-platform --namespace $NAMESPACE --timeout 10m
else
echo "Helm release dynamo-platform not found, skipping"
fi
echo "::endgroup::"
- name: Delete namespace
shell: bash
env:
NAMESPACE: ${{ inputs.namespace }}
run: |
echo "::group::Delete namespace $NAMESPACE"
kubectl delete namespace $NAMESPACE --timeout=120s
echo "::endgroup::"
...@@ -276,103 +276,41 @@ jobs: ...@@ -276,103 +276,41 @@ jobs:
(needs.operator.result == 'success' || needs.operator.result == 'skipped') (needs.operator.result == 'success' || needs.operator.result == 'skipped')
needs: [changed-files, operator] needs: [changed-files, operator]
outputs: outputs:
NAMESPACE: ${{ steps.deploy-operator-step.outputs.namespace }} NAMESPACE: ${{ steps.namespace.outputs.namespace }}
OPERATOR_TAG: ${{ steps.operator-tag.outputs.tag }}
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- name: Determine operator image tag - name: Determine operator tag
id: operator-tag id: operator-tag
run: | run: |
if [ "${{ needs.operator.result }}" == "success" ]; then if [ "${{ needs.operator.result }}" == "success" ]; then
echo "tag=${{ needs.operator.outputs.operator_default_tag }}" >> $GITHUB_OUTPUT TAG="${{ needs.operator.outputs.operator_default_tag }}"
echo "Using newly built operator image: ${{ needs.operator.outputs.operator_default_tag }}"
else else
echo "tag=main-operator" >> $GITHUB_OUTPUT TAG="main-operator"
echo "Using stable operator image: main-operator"
fi fi
- name: Deploy Operator echo "tag=${TAG}" >> $GITHUB_OUTPUT
id: deploy-operator-step echo "Using operator tag: ${TAG}"
- name: Generate namespace name
id: namespace
env: env:
BRANCH: ${{ github.ref_name }} BRANCH: ${{ github.ref_name }}
run: | run: |
set -x # Sanitize branch name for k8s namespace
# Set namespace
# Invalid patterns: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/ # Invalid patterns: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/
BRANCH_SANITIZED="${BRANCH//\//-}" BRANCH_SANITIZED="${BRANCH//\//-}"
BRANCH_SANITIZED="${BRANCH_SANITIZED/pull-request/pr}" BRANCH_SANITIZED="${BRANCH_SANITIZED/pull-request/pr}"
BRANCH_SANITIZED="${BRANCH_SANITIZED//./-}" BRANCH_SANITIZED="${BRANCH_SANITIZED//./-}"
# Cap at 10 chars
BRANCH_SANITIZED="${BRANCH_SANITIZED:0:10}" BRANCH_SANITIZED="${BRANCH_SANITIZED:0:10}"
NAMESPACE="gh-id-${{ github.run_id }}-${BRANCH_SANITIZED}-dt" NAMESPACE="gh-id-${{ github.run_id }}-${BRANCH_SANITIZED}-dt"
echo "namespace=${NAMESPACE}" >> "$GITHUB_OUTPUT" echo "namespace=${NAMESPACE}" >> "$GITHUB_OUTPUT"
- name: Setup namespace and operator
# Setup kubeconfig uses: ./.github/actions/setup-deploy-namespace
echo "${{ secrets.AZURE_AKS_CI_KUBECONFIG_B64 }}" | base64 -d > .kubeconfig with:
chmod 600 .kubeconfig kubeconfig_base64: ${{ secrets.AZURE_AKS_CI_KUBECONFIG_B64 }}
export KUBECONFIG=$(pwd)/.kubeconfig namespace: ${{ steps.namespace.outputs.namespace }}
kubectl config set-context --current --namespace=$NAMESPACE --kubeconfig "${KUBECONFIG}" registry: ${{ secrets.AZURE_ACR_HOSTNAME }}
kubectl config current-context operator_tag: ${{ steps.operator-tag.outputs.tag }}
hf_token: ${{ secrets.HF_TOKEN }}
# Create a namespace for this job
echo "Creating an ephemeral namespace..."
kubectl create namespace $NAMESPACE
echo "Attaching the labels for secrets and cleanup"
kubectl label namespaces ${NAMESPACE} nscleanup/enabled=true nscleanup/ttl=7200 gitlab-imagepull=enabled ngc-api=enabled nvcr-imagepull=enabled --overwrite=true
# Set the namespace as default
kubectl config set-context --current --namespace=$NAMESPACE
# Check if Istio is installed
kubectl get pods -n istio-system
# Check if default storage class exists
kubectl get storageclass
# Install Helm chart
export VIRTUAL_ENV=/opt/dynamo/venv
export KUBE_NS=$NAMESPACE
export ISTIO_ENABLED=true
export ISTIO_GATEWAY=istio-system/ingress-alb
export VIRTUAL_SERVICE_SUPPORTS_HTTPS=true
# Install dynamo env secrets
kubectl create secret generic hf-token-secret --from-literal=HF_TOKEN=${{ secrets.HF_TOKEN }} -n $KUBE_NS || true
# Install helm dependencies
helm repo add bitnami https://charts.bitnami.com/bitnami
cd deploy/helm/charts/platform/
helm dep build .
# Install platform with namespace restriction for single profile testing
# we manage crds via Velonix so we skip the crds installation
helm upgrade --install dynamo-platform . --namespace ${NAMESPACE} \
--skip-crds \
--set dynamo-operator.namespaceRestriction.enabled=true \
--set dynamo-operator.namespaceRestriction.allowedNamespaces[0]=${NAMESPACE} \
--set dynamo-operator.controllerManager.manager.image.repository=${{ secrets.AZURE_ACR_HOSTNAME }}/ai-dynamo/dynamo \
--set dynamo-operator.controllerManager.manager.image.tag=${{ steps.operator-tag.outputs.tag }} \
--set dynamo-operator.gpuDiscovery.enabled=false \
--set dynamo-operator.upgradeCRD=false \
--debug
# Wait for all deployments to be ready
timeout 300s kubectl rollout status deployment -n $NAMESPACE --watch
- name: 🔍 Report Unhealthy Pods
if: failure()
run: |
# Setup kubeconfig
echo "${{ secrets.AZURE_AKS_CI_KUBECONFIG_B64 }}" | base64 -d > .kubeconfig
chmod 600 .kubeconfig
export KUBECONFIG=$(pwd)/.kubeconfig
kubectl config set-context --current --namespace=$NAMESPACE --kubeconfig "${KUBECONFIG}"
kubectl config current-context
# Descriptive header for the summary
echo "### ⚠️ OPERATOR DEPLOYMENT FAILED: Unhealthy Pods Report" >> $GITHUB_STEP_SUMMARY
echo "Unhealthy pods:" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
# Get pods, exclude healthy ones, and format output
# If the namespace is empty or all pods are healthy, the grep/awk won't output anything, which is handled gracefully.
kubectl get pods -n ${{ steps.deploy-operator-step.outputs.namespace }} --no-headers \
| grep -v -E '(Running|Completed)' \
| awk '{print "- 🔴 **" $1 "** | Status: `" $3 "`"}' >> $GITHUB_STEP_SUMMARY || true
# ============================================================================ # ============================================================================
# #
# End-to-end tests for each framework with various deployment profiles # End-to-end tests for each framework with various deployment profiles
...@@ -406,7 +344,9 @@ jobs: ...@@ -406,7 +344,9 @@ jobs:
with: with:
kubeconfig_base64: ${{ secrets.AZURE_AKS_CI_KUBECONFIG_B64 }} kubeconfig_base64: ${{ secrets.AZURE_AKS_CI_KUBECONFIG_B64 }}
namespace: ${{ needs.deploy-operator.outputs.NAMESPACE }} namespace: ${{ needs.deploy-operator.outputs.NAMESPACE }}
deployment_file: "deploy/${{ matrix.profile }}.yaml" registry: ${{ secrets.AZURE_ACR_HOSTNAME }}
operator_tag: ${{ needs.deploy-operator.outputs.OPERATOR_TAG }}
hf_token: ${{ secrets.HF_TOKEN }}
framework: ${{ env.FRAMEWORK }} framework: ${{ env.FRAMEWORK }}
profile: ${{ matrix.profile }} profile: ${{ matrix.profile }}
image: ${{ secrets.AZURE_ACR_HOSTNAME }}/ai-dynamo/dynamo:${{ github.sha }}-vllm-cuda12-amd64 image: ${{ secrets.AZURE_ACR_HOSTNAME }}/ai-dynamo/dynamo:${{ github.sha }}-vllm-cuda12-amd64
...@@ -439,7 +379,9 @@ jobs: ...@@ -439,7 +379,9 @@ jobs:
with: with:
kubeconfig_base64: ${{ secrets.AZURE_AKS_CI_KUBECONFIG_B64 }} kubeconfig_base64: ${{ secrets.AZURE_AKS_CI_KUBECONFIG_B64 }}
namespace: ${{ needs.deploy-operator.outputs.NAMESPACE }} namespace: ${{ needs.deploy-operator.outputs.NAMESPACE }}
deployment_file: "deploy/${{ matrix.profile }}.yaml" registry: ${{ secrets.AZURE_ACR_HOSTNAME }}
operator_tag: ${{ needs.deploy-operator.outputs.OPERATOR_TAG }}
hf_token: ${{ secrets.HF_TOKEN }}
framework: ${{ env.FRAMEWORK }} framework: ${{ env.FRAMEWORK }}
profile: ${{ matrix.profile }} profile: ${{ matrix.profile }}
image: ${{ secrets.AZURE_ACR_HOSTNAME }}/ai-dynamo/dynamo:${{ github.sha }}-sglang-cuda12-amd64 image: ${{ secrets.AZURE_ACR_HOSTNAME }}/ai-dynamo/dynamo:${{ github.sha }}-sglang-cuda12-amd64
...@@ -476,7 +418,9 @@ jobs: ...@@ -476,7 +418,9 @@ jobs:
with: with:
kubeconfig_base64: ${{ secrets.AZURE_AKS_CI_KUBECONFIG_B64 }} kubeconfig_base64: ${{ secrets.AZURE_AKS_CI_KUBECONFIG_B64 }}
namespace: ${{ needs.deploy-operator.outputs.NAMESPACE }} namespace: ${{ needs.deploy-operator.outputs.NAMESPACE }}
deployment_file: "deploy/${{ matrix.profile }}.yaml" registry: ${{ secrets.AZURE_ACR_HOSTNAME }}
operator_tag: ${{ needs.deploy-operator.outputs.OPERATOR_TAG }}
hf_token: ${{ secrets.HF_TOKEN }}
framework: ${{ env.FRAMEWORK }} framework: ${{ env.FRAMEWORK }}
profile: ${{ matrix.profile }} profile: ${{ matrix.profile }}
image: ${{ secrets.AZURE_ACR_HOSTNAME }}/ai-dynamo/dynamo:${{ github.sha }}-trtllm-cuda13-amd64 image: ${{ secrets.AZURE_ACR_HOSTNAME }}/ai-dynamo/dynamo:${{ github.sha }}-trtllm-cuda13-amd64
...@@ -511,47 +455,14 @@ jobs: ...@@ -511,47 +455,14 @@ jobs:
cleanup: cleanup:
name: Cleanup AKS resources name: Cleanup AKS resources
runs-on: prod-default-small-v2 runs-on: prod-default-small-v2
if: always() if: always()
needs: [deploy-operator, deploy-test-trtllm, deploy-test-sglang, deploy-test-vllm] needs: [deploy-operator, deploy-test-trtllm, deploy-test-sglang, deploy-test-vllm]
steps: steps:
- name: Checkout code - name: Checkout code
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0 uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
- name: Setup Kubeconfig - name: Teardown namespace
env: if: needs.deploy-operator.outputs.NAMESPACE != ''
NAMESPACE: ${{ needs.deploy-operator.outputs.NAMESPACE }} uses: ./.github/actions/teardown-deploy-namespace
run: | with:
set -x kubeconfig_base64: ${{ secrets.AZURE_AKS_CI_KUBECONFIG_B64 }}
# Setup kubeconfig namespace: ${{ needs.deploy-operator.outputs.NAMESPACE }}
echo "${{ secrets.AZURE_AKS_CI_KUBECONFIG_B64 }}" | base64 -d > .kubeconfig
chmod 600 .kubeconfig
export KUBECONFIG=$(pwd)/.kubeconfig
kubectl config set-context --current --namespace=$NAMESPACE --kubeconfig "${KUBECONFIG}"
kubectl config current-context
- name: Cleanup
timeout-minutes: 5
env:
NAMESPACE: ${{ needs.deploy-operator.outputs.NAMESPACE }}
run: |
set -x
export KUBECONFIG=$(pwd)/.kubeconfig
kubectl config set-context --current --namespace=$NAMESPACE
echo "${{ secrets.AZURE_AKS_CI_KUBECONFIG_B64 }}" | base64 -d > .kubeconfig
chmod 600 .kubeconfig
export KUBECONFIG=$(pwd)/.kubeconfig
kubectl config set-context --current --namespace=$NAMESPACE --kubeconfig "${KUBECONFIG}"
# For debugging purposes, list all the resources before we uninstall
kubectl get dynamographdeployments
kubectl get all
echo "Deleting all DynamoGraphDeployments in namespace $NAMESPACE..."
kubectl delete dynamographdeployments --all -n $NAMESPACE || true
# Uninstall the helm chart
helm ls
helm uninstall dynamo-platform --namespace $NAMESPACE --timeout 10m || true
echo "Namespace $NAMESPACE deletion initiated, proceeding with cleanup..."
kubectl delete namespace $NAMESPACE || true
echo "Namespace $NAMESPACE completed."
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment