# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 apiVersion: batch/v1 kind: Job metadata: name: dynamo-benchmark spec: template: spec: imagePullSecrets: - name: docker-imagepullsecret securityContext: runAsNonRoot: true runAsUser: 1000 fsGroup: 1000 containers: - name: benchmark-runner # TODO: update to latest public image in next release image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL runAsNonRoot: true resources: requests: cpu: "4" memory: "8Gi" limits: cpu: "8" memory: "16Gi" env: - name: HUGGING_FACE_HUB_TOKEN valueFrom: secretKeyRef: name: hf-token-secret key: HF_TOKEN command: ["python3", "-m", "benchmarks.utils.benchmark"] args: - --model - "Qwen/Qwen3-0.6B" - --isl - "2000" - --std - "10" - --osl - "256" - --output-dir - /data/results - --benchmark-name - "qwen3-0p6b-vllm-agg" - --endpoint-url - "vllm-agg-frontend:8000" volumeMounts: - name: data-volume mountPath: /data restartPolicy: Never volumes: - name: data-volume persistentVolumeClaim: claimName: dynamo-pvc backoffLimit: 0 ttlSecondsAfterFinished: 3600 # Clean up job after 1 hour