profile_sla_job.yaml 1.34 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
apiVersion: batch/v1
kind: Job
metadata:
  name: profile-sla
  namespace: ${NAMESPACE}
spec:
  template:
    spec:
      serviceAccountName: profile-sla-sa
      containers:
      - name: profile-sla
        image: ${DOCKER_IMAGE}
        resources:
          requests:
            cpu: "1"
            memory: "2Gi"
          limits:
            cpu: "2"
            memory: "4Gi"
        env:
          - name: HUGGING_FACE_HUB_TOKEN
            valueFrom:
              secretKeyRef:
                name: hf-token-secret
                key: HF_TOKEN
          - name: NATS_SERVER
            value: nats://${NAMESPACE}-nats:4222
          - name: ETCD_ENDPOINTS
            value: ${NAMESPACE}-etcd:2379
        command: ["python", "/workspace/benchmarks/profiler/profile_sla.py"]
        args:
          - --config
          - ${DGD_CONFIG_FILE}
          - --output-dir
          - /workspace/profiling_results
          - --namespace
          - ${NAMESPACE}
        volumeMounts:
          - name: output-volume
            mountPath: /workspace/profiling_results
      restartPolicy: Never
      volumes:
        - name: output-volume
          persistentVolumeClaim:
            claimName: profiling-pvc
  backoffLimit: 0