profile_sla_job.yaml 1.56 KB
Newer Older
1
2
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
3
# TODO: update to dgdr spec for online mode
4
5
6
7
8
9
10
11
apiVersion: batch/v1
kind: Job
metadata:
  name: profile-sla
  namespace: ${NAMESPACE}
spec:
  template:
    spec:
12
      serviceAccountName: dynamo-sa
13
14
15
16
17
      containers:
      - name: profile-sla
        image: ${DOCKER_IMAGE}
        resources:
          requests:
18
19
            cpu: "16"
            memory: "10Gi"
20
21
22
23
24
25
26
27
28
29
        env:
          - name: HUGGING_FACE_HUB_TOKEN
            valueFrom:
              secretKeyRef:
                name: hf-token-secret
                key: HF_TOKEN
          - name: NATS_SERVER
            value: nats://${NAMESPACE}-nats:4222
          - name: ETCD_ENDPOINTS
            value: ${NAMESPACE}-etcd:2379
30
        command: ["python", "-m", "benchmarks.profiler.profile_sla"]
31
32
        args:
          - --config
33
          - ${DGD_CONFIG_FILE}
34
          - --output-dir
35
          - /data/profiling_results
36
37
          - --namespace
          - ${NAMESPACE}
38
39
          - --backend
          - vllm
40
41
42
43
44
45
46
47
48
49
50
51
          - --min-num-gpus-per-engine
          - "1"
          - --max-num-gpus-per-engine
          - "8"
          - --isl
          - "3000"
          - --osl
          - "150"
          - --ttft
          - "200"
          - --itl
          - "20"
52
53
        volumeMounts:
          - name: output-volume
54
            mountPath: /data
55
56
57
58
      restartPolicy: Never
      volumes:
        - name: output-volume
          persistentVolumeClaim:
59
            claimName: dynamo-pvc
60
  backoffLimit: 0