Unverified Commit feb325f5 authored by Pavithra Vijayakrishnan's avatar Pavithra Vijayakrishnan Committed by GitHub
Browse files

ci: Add manual trigger variable (#4490)


Signed-off-by: default avatarpvijayakrish <pvijayakrish@nvidia.com>
parent 796520bb
...@@ -9,6 +9,13 @@ on: ...@@ -9,6 +9,13 @@ on:
- main - main
- "pull-request/[0-9]+" - "pull-request/[0-9]+"
- release/*.*.* - release/*.*.*
workflow_dispatch:
inputs:
run_deploy_operator:
description: 'Run deploy operator and deployment tests'
required: false
type: boolean
default: false
concurrency: concurrency:
# The group name is a ternary operation. If the ref_name is 'main', # The group name is a ternary operation. If the ref_name is 'main',
...@@ -21,6 +28,7 @@ concurrency: ...@@ -21,6 +28,7 @@ concurrency:
jobs: jobs:
changed-files: changed-files:
runs-on: ubuntu-latest runs-on: ubuntu-latest
environment: ${{ github.event_name == 'workflow_dispatch' && 'protected-deploy' || '' }}
outputs: outputs:
has_code_changes: ${{ steps.filter.outputs.has_code_changes }} has_code_changes: ${{ steps.filter.outputs.has_code_changes }}
steps: steps:
...@@ -517,325 +525,335 @@ jobs: ...@@ -517,325 +525,335 @@ jobs:
# Upload complete workflow metrics including container metrics # Upload complete workflow metrics including container metrics
python3 .github/workflows/upload_complete_workflow_metrics.py python3 .github/workflows/upload_complete_workflow_metrics.py
# deploy-operator: deploy-operator:
# runs-on: cpu-amd-m5-2xlarge runs-on: cpu-amd-m5-2xlarge
# if: needs.changed-files.outputs.has_code_changes == 'true' # TODO: Uncomment this when we have a way to test the deploy-operator job in CI.
# needs: [changed-files, operator, vllm, sglang, trtllm] #if: needs.changed-files.outputs.has_code_changes == 'true'
# env: if: github.event.inputs.run_deploy_operator
# DYNAMO_INGRESS_SUFFIX: dev.aire.nvidia.com needs: [changed-files, operator, vllm, sglang, trtllm]
# outputs: env:
# NAMESPACE: ${{ steps.deploy-operator-step.outputs.namespace }} DYNAMO_INGRESS_SUFFIX: dev.aire.nvidia.com
# steps: outputs:
# - name: Output Node Name NAMESPACE: ${{ steps.deploy-operator-step.outputs.namespace }}
# shell: bash steps:
# run: | - name: Output Node Name
# echo ${K8S_NODE_NAME} shell: bash
# - uses: actions/checkout@v4 run: |
# - name: Deploy Operator echo ${K8S_NODE_NAME}
# id: deploy-operator-step - uses: actions/checkout@v4
# env: - name: Deploy Operator
# BRANCH: ${{ github.ref_name }} id: deploy-operator-step
# run: | env:
# set -x BRANCH: ${{ github.ref_name }}
run: |
# # Set namespace set -x
# # Invalid patterns: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/
# BRANCH_SANITIZED="${BRANCH//\//-}" # Set namespace
# BRANCH_SANITIZED="${BRANCH_SANITIZED/pull-request/pr}" # Invalid patterns: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/
# BRANCH_SANITIZED="${BRANCH_SANITIZED//./-}" BRANCH_SANITIZED="${BRANCH//\//-}"
# NAMESPACE="gh-id-${{ github.run_id }}-${BRANCH_SANITIZED}-dt" BRANCH_SANITIZED="${BRANCH_SANITIZED/pull-request/pr}"
# echo "namespace=${NAMESPACE}" >> "$GITHUB_OUTPUT" BRANCH_SANITIZED="${BRANCH_SANITIZED//./-}"
NAMESPACE="gh-id-${{ github.run_id }}-${BRANCH_SANITIZED}-dt"
# # Setup kubeconfig echo "namespace=${NAMESPACE}" >> "$GITHUB_OUTPUT"
# echo "${{ secrets.AZURE_AKS_CI_KUBECONFIG_B64 }}" | base64 -d > .kubeconfig
# chmod 600 .kubeconfig # Setup kubeconfig
# export KUBECONFIG=$(pwd)/.kubeconfig echo "${{ secrets.AZURE_AKS_CI_KUBECONFIG_B64 }}" | base64 -d > .kubeconfig
# kubectl config set-context --current --namespace=$NAMESPACE --kubeconfig "${KUBECONFIG}" chmod 600 .kubeconfig
# kubectl config current-context export KUBECONFIG=$(pwd)/.kubeconfig
kubectl config set-context --current --namespace=$NAMESPACE --kubeconfig "${KUBECONFIG}"
# # Create a namespace for this job kubectl config current-context
# echo "Creating an ephemeral namespace..."
# kubectl create namespace $NAMESPACE # Create a namespace for this job
# echo "Attaching the labels for secrets and cleanup" echo "Creating an ephemeral namespace..."
# kubectl label namespaces ${NAMESPACE} nscleanup/enabled=true nscleanup/ttl=7200 gitlab-imagepull=enabled ngc-api=enabled nvcr-imagepull=enabled --overwrite=true kubectl create namespace $NAMESPACE
echo "Attaching the labels for secrets and cleanup"
# # Set the namespace as default kubectl label namespaces ${NAMESPACE} nscleanup/enabled=true nscleanup/ttl=7200 gitlab-imagepull=enabled ngc-api=enabled nvcr-imagepull=enabled --overwrite=true
# kubectl config set-context --current --namespace=$NAMESPACE
# Set the namespace as default
# # Check if Istio is installed kubectl config set-context --current --namespace=$NAMESPACE
# kubectl get pods -n istio-system
# # Check if default storage class exists # Check if Istio is installed
# kubectl get storageclass kubectl get pods -n istio-system
# Check if default storage class exists
# # Install Helm chart kubectl get storageclass
# export VIRTUAL_ENV=/opt/dynamo/venv
# export KUBE_NS=$NAMESPACE # Install Helm chart
# export ISTIO_ENABLED=true export VIRTUAL_ENV=/opt/dynamo/venv
# export ISTIO_GATEWAY=istio-system/ingress-alb export KUBE_NS=$NAMESPACE
# export VIRTUAL_SERVICE_SUPPORTS_HTTPS=true export ISTIO_ENABLED=true
# export DYNAMO_CLOUD=https://${NAMESPACE}.${DYNAMO_INGRESS_SUFFIX} export ISTIO_GATEWAY=istio-system/ingress-alb
export VIRTUAL_SERVICE_SUPPORTS_HTTPS=true
# # Install dynamo env secrets export DYNAMO_CLOUD=https://${NAMESPACE}.${DYNAMO_INGRESS_SUFFIX}
# kubectl create secret generic hf-token-secret --from-literal=HF_TOKEN=${{ secrets.HF_TOKEN }} -n $KUBE_NS || true
# # Create docker pull secret for operator image # Install dynamo env secrets
# kubectl create secret docker-registry docker-imagepullsecret --docker-server=${{ secrets.AZURE_ACR_HOSTNAME }} --docker-username=${{ secrets.AZURE_ACR_USER }} --docker-password=${{ secrets.AZURE_ACR_PASSWORD }} --namespace=${NAMESPACE} kubectl create secret generic hf-token-secret --from-literal=HF_TOKEN=${{ secrets.HF_TOKEN }} -n $KUBE_NS || true
# # Install helm dependencies # Create docker pull secret for operator image
# helm repo add bitnami https://charts.bitnami.com/bitnami kubectl create secret docker-registry docker-imagepullsecret --docker-server=${{ secrets.AZURE_ACR_HOSTNAME }} --docker-username=${{ secrets.AZURE_ACR_USER }} --docker-password=${{ secrets.AZURE_ACR_PASSWORD }} --namespace=${NAMESPACE}
# cd deploy/cloud/helm/platform/ # Install helm dependencies
# helm dep build . helm repo add bitnami https://charts.bitnami.com/bitnami
# # Install platform with namespace restriction for single profile testing cd deploy/cloud/helm/platform/
# helm upgrade --install dynamo-platform . --namespace ${NAMESPACE} \ helm dep build .
# --set dynamo-operator.namespaceRestriction.enabled=true \ # Install platform with namespace restriction for single profile testing
# --set dynamo-operator.namespaceRestriction.allowedNamespaces[0]=${NAMESPACE} \ helm upgrade --install dynamo-platform . --namespace ${NAMESPACE} \
# --set dynamo-operator.controllerManager.manager.image.repository=${{ secrets.AZURE_ACR_HOSTNAME }}/ai-dynamo/dynamo \ --set dynamo-operator.namespaceRestriction.enabled=true \
# --set dynamo-operator.controllerManager.manager.image.tag=${{ github.sha }}-operator-amd64 \ --set dynamo-operator.namespaceRestriction.allowedNamespaces[0]=${NAMESPACE} \
# --set dynamo-operator.imagePullSecrets[0].name=docker-imagepullsecret --set dynamo-operator.controllerManager.manager.image.repository=${{ secrets.AZURE_ACR_HOSTNAME }}/ai-dynamo/dynamo \
# # Wait for all deployments to be ready --set dynamo-operator.controllerManager.manager.image.tag=${{ github.sha }}-operator-amd64 \
# timeout 300s kubectl rollout status deployment -n $NAMESPACE --watch --set dynamo-operator.imagePullSecrets[0].name=docker-imagepullsecret
# Wait for all deployments to be ready
# deploy-test-vllm: timeout 300s kubectl rollout status deployment -n $NAMESPACE --watch
# runs-on: cpu-amd-m5-2xlarge
# if: needs.changed-files.outputs.has_code_changes == 'true' deploy-test-vllm:
# needs: [changed-files, deploy-operator, vllm] runs-on: cpu-amd-m5-2xlarge
# permissions: # TODO: Uncomment this when we have a way to test the deploy-test-vllm job in CI.
# contents: read #if: needs.changed-files.outputs.has_code_changes == 'true'
# strategy: if: github.event.inputs.run_deploy_operator
# fail-fast: false needs: [changed-files, deploy-operator, vllm]
# max-parallel: 1 permissions:
# matrix: contents: read
# profile: strategy:
# - agg fail-fast: false
# - agg_router max-parallel: 1
# - disagg matrix:
# - disagg_router profile:
# name: deploy-test-vllm (${{ matrix.profile }}) - agg
# env: - agg_router
# FRAMEWORK: vllm - disagg
# DYNAMO_INGRESS_SUFFIX: dev.aire.nvidia.com - disagg_router
# DEPLOYMENT_FILE: "deploy/${{ matrix.profile }}.yaml" name: deploy-test-vllm (${{ matrix.profile }})
# MODEL_NAME: "Qwen/Qwen3-0.6B" env:
# steps: &deploy-test-steps FRAMEWORK: vllm
# - name: Output Node Name DYNAMO_INGRESS_SUFFIX: dev.aire.nvidia.com
# shell: bash DEPLOYMENT_FILE: "deploy/${{ matrix.profile }}.yaml"
# run: | MODEL_NAME: "Qwen/Qwen3-0.6B"
# echo ${K8S_NODE_NAME} steps: &deploy-test-steps
# - uses: actions/checkout@v4 - name: Output Node Name
# - name: Setup Kubeconfig shell: bash
# env: run: |
# NAMESPACE: ${{ needs.deploy-operator.outputs.NAMESPACE }} echo ${K8S_NODE_NAME}
# run: | - uses: actions/checkout@v4
# set -x - name: Setup Kubeconfig
# # Setup kubeconfig env:
# echo "${{ secrets.AZURE_AKS_CI_KUBECONFIG_B64 }}" | base64 -d > .kubeconfig NAMESPACE: ${{ needs.deploy-operator.outputs.NAMESPACE }}
# chmod 600 .kubeconfig run: |
# export KUBECONFIG=$(pwd)/.kubeconfig set -x
# kubectl config set-context --current --namespace=$NAMESPACE --kubeconfig "${KUBECONFIG}" # Setup kubeconfig
# kubectl config get-contexts echo "${{ secrets.AZURE_AKS_CI_KUBECONFIG_B64 }}" | base64 -d > .kubeconfig
# - name: Run Tests chmod 600 .kubeconfig
# env: export KUBECONFIG=$(pwd)/.kubeconfig
# NAMESPACE: ${{ needs.deploy-operator.outputs.NAMESPACE }} kubectl config set-context --current --namespace=$NAMESPACE --kubeconfig "${KUBECONFIG}"
# run: | kubectl config get-contexts
# set -x - name: Run Tests
# export KUBECONFIG=$(pwd)/.kubeconfig env:
# kubectl config set-context --current --namespace=$NAMESPACE NAMESPACE: ${{ needs.deploy-operator.outputs.NAMESPACE }}
run: |
# cd examples/backends/$FRAMEWORK set -x
# export FRAMEWORK_RUNTIME_IMAGE="${{ secrets.AZURE_ACR_HOSTNAME }}/ai-dynamo/dynamo:${{ github.sha }}-${FRAMEWORK}-amd64" export KUBECONFIG=$(pwd)/.kubeconfig
# export KUBE_NS=$NAMESPACE kubectl config set-context --current --namespace=$NAMESPACE
# export GRAPH_NAME=$(yq e '.metadata.name' $DEPLOYMENT_FILE)
# echo "GRAPH_NAME=${GRAPH_NAME}" >> $GITHUB_ENV cd examples/backends/$FRAMEWORK
# # Update the deployment file in-place export FRAMEWORK_RUNTIME_IMAGE="${{ secrets.AZURE_ACR_HOSTNAME }}/ai-dynamo/dynamo:${{ github.sha }}-${FRAMEWORK}-amd64"
# yq -i '.spec.services.[].extraPodSpec.mainContainer.image = env(FRAMEWORK_RUNTIME_IMAGE)' $DEPLOYMENT_FILE export KUBE_NS=$NAMESPACE
export GRAPH_NAME=$(yq e '.metadata.name' $DEPLOYMENT_FILE)
# # Debug: Show updated deployment file echo "GRAPH_NAME=${GRAPH_NAME}" >> $GITHUB_ENV
# echo "=== UPDATED DEPLOYMENT FILE ===" # Update the deployment file in-place
# cat $DEPLOYMENT_FILE yq -i '.spec.services.[].extraPodSpec.mainContainer.image = env(FRAMEWORK_RUNTIME_IMAGE)' $DEPLOYMENT_FILE
# # Apply the updated file # Debug: Show updated deployment file
# kubectl apply -n $KUBE_NS -f $DEPLOYMENT_FILE echo "=== UPDATED DEPLOYMENT FILE ==="
cat $DEPLOYMENT_FILE
# # --- Wait for all pods in the dynamo graph deployment to be ready ---
# sleep 20 # Apply the updated file
# # Get the deployment name from the file kubectl apply -n $KUBE_NS -f $DEPLOYMENT_FILE
# export GRAPH_NAME=$(yq e '.metadata.name' $DEPLOYMENT_FILE)
# echo "Waiting for all pods with label nvidia.com/dynamo-graph-deployment-name: $GRAPH_NAME" # --- Wait for all pods in the dynamo graph deployment to be ready ---
# # Wait for all pods with the deployment label to be ready sleep 20
# kubectl wait --for=condition=ready pod -l "nvidia.com/dynamo-graph-deployment-name=$GRAPH_NAME" -n ${KUBE_NS} --timeout=1300s # Get the deployment name from the file
export GRAPH_NAME=$(yq e '.metadata.name' $DEPLOYMENT_FILE)
# # Debug: Show final pod statuses for the deployment echo "Waiting for all pods with label nvidia.com/dynamo-graph-deployment-name: $GRAPH_NAME"
# echo "=== FINAL POD STATUSES ===" # Wait for all pods with the deployment label to be ready
# kubectl get pods -l "nvidia.com/dynamo-graph-deployment-name=$GRAPH_NAME" -n $KUBE_NS -o wide kubectl wait --for=condition=ready pod -l "nvidia.com/dynamo-graph-deployment-name=$GRAPH_NAME" -n ${KUBE_NS} --timeout=1800s
# echo ""
# Debug: Show final pod statuses for the deployment
# kubectl get all -n $KUBE_NS echo "=== FINAL POD STATUSES ==="
# export FRONTEND_POD=$(kubectl get pods -n ${KUBE_NS} -l nvidia.com/dynamo-component-type=frontend,nvidia.com/dynamo-graph-deployment-name=${GRAPH_NAME} | tail -n1 | awk '{print $1}') kubectl get pods -l "nvidia.com/dynamo-graph-deployment-name=$GRAPH_NAME" -n $KUBE_NS -o wide
# export CONTAINER_PORT=$(kubectl get pod $FRONTEND_POD -n ${KUBE_NS} -o jsonpath='{.spec.containers[0].ports[?(@.name=="http")].containerPort}') echo ""
# echo "Container port is ${CONTAINER_PORT}"
# kubectl port-forward pod/$FRONTEND_POD 8000:${CONTAINER_PORT} -n ${KUBE_NS} & kubectl get all -n $KUBE_NS
# export LLM_URL="http://localhost:8000" export FRONTEND_POD=$(kubectl get pods -n ${KUBE_NS} -l nvidia.com/dynamo-component-type=frontend,nvidia.com/dynamo-graph-deployment-name=${GRAPH_NAME} | tail -n1 | awk '{print $1}')
# sleep 10 # Give port-forward time to establish the connection export CONTAINER_PORT=$(kubectl get pod $FRONTEND_POD -n ${KUBE_NS} -o jsonpath='{.spec.containers[0].ports[?(@.name=="http")].containerPort}')
# echo "LLM URL: ${LLM_URL}" echo "Container port is ${CONTAINER_PORT}"
# echo "MODEL NAME: ${MODEL_NAME}" kubectl port-forward pod/$FRONTEND_POD 8000:${CONTAINER_PORT} -n ${KUBE_NS} &
# # Wait until the model is available in the /v1/models response export LLM_URL="http://localhost:8000"
# MAX_ATTEMPTS=30 sleep 10 # Give port-forward time to establish the connection
# ATTEMPT=1 echo "LLM URL: ${LLM_URL}"
# while [ $ATTEMPT -le $MAX_ATTEMPTS ]; do echo "MODEL NAME: ${MODEL_NAME}"
# MODELS_RESPONSE=$(curl -s --retry 5 --retry-delay 2 --retry-connrefused "${LLM_URL}/v1/models") # Wait until the model is available in the /v1/models response
# if echo "$MODELS_RESPONSE" | jq -e --arg MODEL_NAME "$MODEL_NAME" '.data[]?.id == $MODEL_NAME' >/dev/null 2>&1; then MAX_ATTEMPTS=30
# echo "Model $MODEL_NAME is available in /v1/models" ATTEMPT=1
# break while [ $ATTEMPT -le $MAX_ATTEMPTS ]; do
# fi MODELS_RESPONSE=$(curl -s --retry 5 --retry-delay 2 --retry-connrefused "${LLM_URL}/v1/models")
# echo "Waiting for model $MODEL_NAME to be available in /v1/models... (attempt $ATTEMPT/$MAX_ATTEMPTS)" if echo "$MODELS_RESPONSE" | jq -e --arg MODEL_NAME "$MODEL_NAME" '.data[]?.id == $MODEL_NAME' >/dev/null 2>&1; then
# sleep 5 echo "Model $MODEL_NAME is available in /v1/models"
# ATTEMPT=$((ATTEMPT + 1)) break
# done fi
# if [ $ATTEMPT -gt $MAX_ATTEMPTS ]; then echo "Waiting for model $MODEL_NAME to be available in /v1/models... (attempt $ATTEMPT/$MAX_ATTEMPTS)"
# echo "Model $MODEL_NAME not found in /v1/models after $MAX_ATTEMPTS attempts" sleep 5
# echo "Last response: $MODELS_RESPONSE" ATTEMPT=$((ATTEMPT + 1))
# exit 1 done
# fi if [ $ATTEMPT -gt $MAX_ATTEMPTS ]; then
# RESPONSE=$(curl -s -N --no-buffer --retry 10 --retry-delay 5 --retry-connrefused -X POST "${LLM_URL}/v1/chat/completions" \ echo "Model $MODEL_NAME not found in /v1/models after $MAX_ATTEMPTS attempts"
# -H 'accept: text/event-stream' \ echo "Last response: $MODELS_RESPONSE"
# -H 'Content-Type: application/json' \ exit 1
# -d '{ fi
# "model": "'"${MODEL_NAME:-Qwen/Qwen3-0.6B}"'", RESPONSE=$(curl -s -N --no-buffer --retry 10 --retry-delay 5 --retry-connrefused -X POST "${LLM_URL}/v1/chat/completions" \
# "messages": [ -H 'accept: text/event-stream' \
# { -H 'Content-Type: application/json' \
# "role": "user", -d '{
# "content": "In the heart of Eldoria, an ancient land of boundless magic and mysterious creatures, lies the long-forgotten city of Aeloria. Once a beacon of knowledge and power, Aeloria was buried beneath the shifting sands of time, lost to the world for centuries. You are an intrepid explorer, known for your unparalleled curiosity and courage, who has stumbled upon an ancient map hinting at ests that Aeloria holds a secret so profound that it has the potential to reshape the very fabric of reality. Your journey will take you through treacherous deserts, enchanted forests, and across perilous mountain ranges. Your Task: Character Background: Develop a detailed background for your character. Describe their motivations for seeking out Aeloria, their skills and weaknesses, and any personal connections to the ancient city or its legends. Are they driven by a quest for knowledge, a search for lost familt clue is hidden." "model": "'"${MODEL_NAME:-Qwen/Qwen3-0.6B}"'",
# } "messages": [
# ], {
# "stream":false, "role": "user",
# "max_tokens": 30, "content": "In the heart of Eldoria, an ancient land of boundless magic and mysterious creatures, lies the long-forgotten city of Aeloria. Once a beacon of knowledge and power, Aeloria was buried beneath the shifting sands of time, lost to the world for centuries. You are an intrepid explorer, known for your unparalleled curiosity and courage, who has stumbled upon an ancient map hinting at ests that Aeloria holds a secret so profound that it has the potential to reshape the very fabric of reality. Your journey will take you through treacherous deserts, enchanted forests, and across perilous mountain ranges. Your Task: Character Background: Develop a detailed background for your character. Describe their motivations for seeking out Aeloria, their skills and weaknesses, and any personal connections to the ancient city or its legends. Are they driven by a quest for knowledge, a search for lost familt clue is hidden."
# "temperature": 0.0 }
# }' 2>&1) ],
# echo "Response: $RESPONSE" "stream":false,
# TEST_RESULT=0 "max_tokens": 30,
# if ! echo "$RESPONSE" | jq -e . >/dev/null 2>&1; then "temperature": 0.0
# echo "Test failed: Response is not valid JSON" }' 2>&1)
# echo "Got: $RESPONSE" echo "Response: $RESPONSE"
# TEST_RESULT=1 TEST_RESULT=0
# elif ! echo "$RESPONSE" | jq -e '.choices[0].message.role == "assistant"' >/dev/null 2>&1; then if ! echo "$RESPONSE" | jq -e . >/dev/null 2>&1; then
# echo "Test failed: Message role is not 'assistant'" echo "Test failed: Response is not valid JSON"
# echo "Got: $(echo "$RESPONSE" | jq '.choices[0].message.role')" echo "Got: $RESPONSE"
# TEST_RESULT=1 TEST_RESULT=1
# elif ! echo "$RESPONSE" | jq -e '.model == "'"${MODEL_NAME}"'"' >/dev/null 2>&1; then elif ! echo "$RESPONSE" | jq -e '.choices[0].message.role == "assistant"' >/dev/null 2>&1; then
# echo "Test failed: Model name is incorrect" echo "Test failed: Message role is not 'assistant'"
# echo "Got: $(echo "$RESPONSE" | jq '.model')" echo "Got: $(echo "$RESPONSE" | jq '.choices[0].message.role')"
# TEST_RESULT=1 TEST_RESULT=1
# elif ! echo "$RESPONSE" | jq -e '.choices[0].message.content | length > 100' >/dev/null 2>&1; then elif ! echo "$RESPONSE" | jq -e '.model == "'"${MODEL_NAME}"'"' >/dev/null 2>&1; then
# echo "Test failed: Response content length is not greater than 100 characters" echo "Test failed: Model name is incorrect"
# echo "Got length: $(echo "$RESPONSE" | jq '.choices[0].message.content | length')" echo "Got: $(echo "$RESPONSE" | jq '.model')"
# TEST_RESULT=1 TEST_RESULT=1
# else elif ! echo "$RESPONSE" | jq -e '.choices[0].message.content | length > 100' >/dev/null 2>&1; then
# echo "Test passed: Response matches expected format and content" echo "Test failed: Response content length is not greater than 100 characters"
# fi echo "Got length: $(echo "$RESPONSE" | jq '.choices[0].message.content | length')"
# exit $TEST_RESULT TEST_RESULT=1
# - name: Cleanup else
# if: always() echo "Test passed: Response matches expected format and content"
# timeout-minutes: 5 fi
# env: exit $TEST_RESULT
# NAMESPACE: ${{ needs.deploy-operator.outputs.NAMESPACE }} - name: Cleanup
# run: | if: always()
# set -x timeout-minutes: 5
# export KUBECONFIG=$(pwd)/.kubeconfig env:
# kubectl config set-context --current --namespace=$NAMESPACE NAMESPACE: ${{ needs.deploy-operator.outputs.NAMESPACE }}
run: |
# # For debugging purposes, list all the resources before we delete set -x
# kubectl get all export KUBECONFIG=$(pwd)/.kubeconfig
kubectl config set-context --current --namespace=$NAMESPACE
# echo "Deleting DynamoGraphDeployments for this job in namespace $NAMESPACE..."
# kubectl delete dynamographdeployments ${GRAPH_NAME} -n $NAMESPACE || true # For debugging purposes, list all the resources before we delete
kubectl get all
# deploy-test-sglang:
# runs-on: cpu-amd-m5-2xlarge echo "Deleting DynamoGraphDeployments for this job in namespace $NAMESPACE..."
# if: needs.changed-files.outputs.has_code_changes == 'true' kubectl delete dynamographdeployments ${GRAPH_NAME} -n $NAMESPACE || true
# needs: [changed-files, deploy-operator, sglang]
# permissions: deploy-test-sglang:
# contents: read runs-on: cpu-amd-m5-2xlarge
# strategy: # TODO: Uncomment this when we have a way to test the deploy-test-sglang job in CI.
# fail-fast: false #if: needs.changed-files.outputs.has_code_changes == 'true'
# max-parallel: 1 if: github.event.inputs.run_deploy_operator
# matrix: needs: [changed-files, deploy-operator, sglang]
# profile: permissions:
# - agg contents: read
# - agg_router strategy:
# name: deploy-test-sglang (${{ matrix.profile }}) fail-fast: false
# env: max-parallel: 1
# FRAMEWORK: sglang matrix:
# DYNAMO_INGRESS_SUFFIX: dev.aire.nvidia.com profile:
# DEPLOYMENT_FILE: "deploy/${{ matrix.profile }}.yaml" - agg
# MODEL_NAME: "Qwen/Qwen3-0.6B" - agg_router
# steps: *deploy-test-steps name: deploy-test-sglang (${{ matrix.profile }})
env:
# deploy-test-trtllm: FRAMEWORK: sglang
# runs-on: cpu-amd-m5-2xlarge DYNAMO_INGRESS_SUFFIX: dev.aire.nvidia.com
# if: needs.changed-files.outputs.has_code_changes == 'true' DEPLOYMENT_FILE: "deploy/${{ matrix.profile }}.yaml"
# needs: [changed-files, deploy-operator, trtllm] MODEL_NAME: "Qwen/Qwen3-0.6B"
# permissions: steps: *deploy-test-steps
# contents: read
# strategy: deploy-test-trtllm:
# fail-fast: false runs-on: cpu-amd-m5-2xlarge
# max-parallel: 1 # TODO: Uncomment this when we have a way to test the deploy-test-trtllm job in CI.
# matrix: #if: needs.changed-files.outputs.has_code_changes == 'true'
# profile: if: github.event.inputs.run_deploy_operator
# - agg needs: [changed-files, deploy-operator, trtllm]
# - agg_router permissions:
# - disagg contents: read
# - disagg_router strategy:
# name: deploy-test-trtllm (${{ matrix.profile }}) fail-fast: false
# env: max-parallel: 1
# FRAMEWORK: trtllm matrix:
# DYNAMO_INGRESS_SUFFIX: dev.aire.nvidia.com profile:
# DEPLOYMENT_FILE: "deploy/${{ matrix.profile }}.yaml" - agg
# MODEL_NAME: "Qwen/Qwen3-0.6B" - agg_router
# steps: *deploy-test-steps - disagg
- disagg_router
# cleanup: name: deploy-test-trtllm (${{ matrix.profile }})
# runs-on: cpu-amd-m5-2xlarge env:
# if: always() FRAMEWORK: trtllm
# needs: [changed-files, deploy-operator, deploy-test-trtllm, deploy-test-sglang, deploy-test-vllm] DYNAMO_INGRESS_SUFFIX: dev.aire.nvidia.com
# steps: DEPLOYMENT_FILE: "deploy/${{ matrix.profile }}.yaml"
# - name: Output Node Name MODEL_NAME: "Qwen/Qwen3-0.6B"
# shell: bash steps: *deploy-test-steps
# run: |
# echo ${K8S_NODE_NAME} cleanup:
# - uses: actions/checkout@v4 runs-on: cpu-amd-m5-2xlarge
# - name: Setup Kubeconfig # TODO: Uncomment the below if statement when we have a way to test the cleanup job in CI.
# env: # if: always()
# NAMESPACE: ${{ needs.deploy-operator.outputs.NAMESPACE }} if: github.event.inputs.run_deploy_operator
# run: | needs: [changed-files, deploy-operator, deploy-test-trtllm, deploy-test-sglang, deploy-test-vllm]
# set -x steps:
# # Setup kubeconfig - name: Output Node Name
# echo "${{ secrets.AZURE_AKS_CI_KUBECONFIG_B64 }}" | base64 -d > .kubeconfig shell: bash
# chmod 600 .kubeconfig run: |
# export KUBECONFIG=$(pwd)/.kubeconfig echo ${K8S_NODE_NAME}
# kubectl config set-context --current --namespace=$NAMESPACE --kubeconfig "${KUBECONFIG}" - uses: actions/checkout@v4
# kubectl config current-context - name: Setup Kubeconfig
# - name: Cleanup env:
# timeout-minutes: 5 NAMESPACE: ${{ needs.deploy-operator.outputs.NAMESPACE }}
# env: run: |
# NAMESPACE: ${{ needs.deploy-operator.outputs.NAMESPACE }} set -x
# run: | # Setup kubeconfig
# set -x echo "${{ secrets.AZURE_AKS_CI_KUBECONFIG_B64 }}" | base64 -d > .kubeconfig
# export KUBECONFIG=$(pwd)/.kubeconfig chmod 600 .kubeconfig
# kubectl config set-context --current --namespace=$NAMESPACE export KUBECONFIG=$(pwd)/.kubeconfig
kubectl config set-context --current --namespace=$NAMESPACE --kubeconfig "${KUBECONFIG}"
# echo "${{ secrets.AZURE_AKS_CI_KUBECONFIG_B64 }}" | base64 -d > .kubeconfig kubectl config current-context
# chmod 600 .kubeconfig - name: Cleanup
# export KUBECONFIG=$(pwd)/.kubeconfig timeout-minutes: 5
# kubectl config set-context --current --namespace=$NAMESPACE --kubeconfig "${KUBECONFIG}" env:
NAMESPACE: ${{ needs.deploy-operator.outputs.NAMESPACE }}
# # For debugging purposes, list all the resources before we uninstall run: |
# kubectl get all set -x
export KUBECONFIG=$(pwd)/.kubeconfig
# echo "Deleting all DynamoGraphDeployments in namespace $NAMESPACE..." kubectl config set-context --current --namespace=$NAMESPACE
# kubectl delete dynamographdeployments --all -n $NAMESPACE || true
echo "${{ secrets.AZURE_AKS_CI_KUBECONFIG_B64 }}" | base64 -d > .kubeconfig
# # Uninstall the helm chart chmod 600 .kubeconfig
# helm ls export KUBECONFIG=$(pwd)/.kubeconfig
# helm uninstall dynamo-platform --namespace $NAMESPACE || true kubectl config set-context --current --namespace=$NAMESPACE --kubeconfig "${KUBECONFIG}"
# echo "Namespace $NAMESPACE deletion initiated, proceeding with cleanup..." # For debugging purposes, list all the resources before we uninstall
# kubectl delete namespace $NAMESPACE || true kubectl get all
# echo "Namespace $NAMESPACE completed."
echo "Deleting all DynamoGraphDeployments in namespace $NAMESPACE..."
kubectl delete dynamographdeployments --all -n $NAMESPACE || true
# Uninstall the helm chart
helm ls
helm uninstall dynamo-platform --namespace $NAMESPACE || true
echo "Namespace $NAMESPACE deletion initiated, proceeding with cleanup..."
kubectl delete namespace $NAMESPACE || true
echo "Namespace $NAMESPACE completed."
...@@ -138,4 +138,4 @@ jobs: ...@@ -138,4 +138,4 @@ jobs:
--form token=${{ secrets.PIPELINE_TOKEN }} \ --form token=${{ secrets.PIPELINE_TOKEN }} \
--form ref=${REF} \ --form ref=${REF} \
"${ci_args[@]}" \ "${ci_args[@]}" \
"${{ secrets.PIPELINE_URL }}" "${{ secrets.PIPELINE_URL }}"
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment