"vscode:/vscode.git/clone" did not exist on "762fca25532f14320b62450d73941a8a795f88a7"
values.yaml 3.23 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# -- Default values for chart vllm
# -- Declare variables to be passed into your templates.

# -- Image configuration
image:
  # -- Image repository
  repository: "vllm/vllm-openai"
  # -- Image tag
  tag: "latest"
  # -- Container launch command
  command: ["vllm", "serve", "/data/", "--served-model-name", "opt-125m", "--dtype", "float32", "--block-size", "16", "--host", "0.0.0.0", "--port", "8000"]

# -- Container port
containerPort: 8000
# -- Service name
serviceName:
# -- Service port
servicePort: 80
# -- Additional ports configuration
extraPorts: []

# -- Number of replicas
replicaCount: 1

# -- Deployment strategy configuration
deploymentStrategy: {}

# -- Resource configuration
resources:
  requests:
    # -- Number of CPUs
    cpu: 4
    # -- CPU memory configuration
    memory: 16Gi
    # -- Number of gpus used
    nvidia.com/gpu: 1
  limits:
    # -- Number of CPUs
    cpu: 4
    # -- CPU memory configuration
    memory: 16Gi
    # -- Number of gpus used
    nvidia.com/gpu: 1

# -- Type of gpu used
gpuModels:
  - "TYPE_GPU_USED"

# -- Autoscaling configuration
autoscaling:
  # -- Enable autoscaling
  enabled: false
  # -- Minimum replicas
  minReplicas: 1
  # -- Maximum replicas
  maxReplicas: 100
  # -- Target CPU utilization for autoscaling
  targetCPUUtilizationPercentage: 80
  # targetMemoryUtilizationPercentage: 80

# -- Configmap
configs: {}

# -- Secrets configuration
secrets: {}

# -- External configuration
externalConfigs: []

# -- Custom Objects configuration
customObjects: []

# -- Disruption Budget Configuration
maxUnavailablePodDisruptionBudget: ""

# -- Additional configuration for the init container
extraInit:
   # -- Path of the model on the s3 which hosts model weights and config files
  s3modelpath: "relative_s3_model_path/opt-125m"
   # -- Storage size of the s3
  pvcStorage: "1Gi"
  awsEc2MetadataDisabled: true

# -- Additional containers configuration
extraContainers: []

# -- Readiness probe configuration
readinessProbe:
  # -- Number of seconds after the container has started before readiness probe is initiated
  initialDelaySeconds: 5
  # -- How often (in seconds) to perform the readiness probe
  periodSeconds: 5
  # -- Number of times after which if a probe fails in a row, Kubernetes considers that the overall check has failed: the container is not ready
  failureThreshold: 3
   # -- Configuration of the Kubelet http request on the server
  httpGet:
    # -- Path to access on the HTTP server
    path: /health
    # -- Name or number of the port to access on the container, on which the server is listening
    port: 8000

# -- Liveness probe configuration
livenessProbe:
 # -- Number of seconds after the container has started before liveness probe is initiated
  initialDelaySeconds: 15
  # -- Number of times after which if a probe fails in a row, Kubernetes considers that the overall check has failed: the container is not alive
  failureThreshold: 3
  # -- How often (in seconds) to perform the liveness probe
  periodSeconds: 10
  # -- Configuration of the Kubelet http request on the server
  httpGet:
    # -- Path to access on the HTTP server
    path: /health
    # -- Name or number of the port to access on the container, on which the server is listening
    port: 8000

labels:
  environment: "test"
  release: "test"