profile_sla_job.yaml 1.52 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
apiVersion: batch/v1
kind: Job
metadata:
  name: profile-sla
  namespace: ${NAMESPACE}
spec:
  template:
    spec:
11
      serviceAccountName: dynamo-sa
12
13
14
15
16
      containers:
      - name: profile-sla
        image: ${DOCKER_IMAGE}
        resources:
          requests:
17
18
            cpu: "16"
            memory: "10Gi"
19
20
21
22
23
24
25
26
27
28
        env:
          - name: HUGGING_FACE_HUB_TOKEN
            valueFrom:
              secretKeyRef:
                name: hf-token-secret
                key: HF_TOKEN
          - name: NATS_SERVER
            value: nats://${NAMESPACE}-nats:4222
          - name: ETCD_ENDPOINTS
            value: ${NAMESPACE}-etcd:2379
29
        command: ["python", "-m", "benchmarks.profiler.profile_sla"]
30
31
        args:
          - --config
32
          - ${DGD_CONFIG_FILE}
33
          - --output-dir
34
          - /data/profiling_results
35
36
          - --namespace
          - ${NAMESPACE}
37
38
          - --backend
          - vllm
39
40
41
42
43
44
45
46
47
48
49
50
          - --min-num-gpus-per-engine
          - "1"
          - --max-num-gpus-per-engine
          - "8"
          - --isl
          - "3000"
          - --osl
          - "150"
          - --ttft
          - "200"
          - --itl
          - "20"
51
52
        volumeMounts:
          - name: output-volume
53
            mountPath: /data
54
55
56
57
      restartPolicy: Never
      volumes:
        - name: output-volume
          persistentVolumeClaim:
58
            claimName: dynamo-pvc
59
  backoffLimit: 0