Unverified Commit 73a4ab31 authored by atchernych's avatar atchernych Committed by GitHub
Browse files

chore: Optimizations for epp interface complience (#6581)


Signed-off-by: default avatarAnna Tchernych <atchernych@nvidia.com>
parent d688aa68
......@@ -10549,6 +10549,181 @@ spec:
x-kubernetes-list-type: map
required: []
type: object
frontendSidecar:
description: |-
FrontendSidecar configures an auto-generated frontend sidecar container.
When specified, the operator injects a fully configured frontend container
with all standard Dynamo environment variables, health probes, and ports.
This eliminates the need to manually specify these in extraPodSpec.containers. (GAIE)
properties:
args:
description: |-
Args overrides the default frontend arguments. When specified, these replace
the default ["-m", "dynamo.frontend"] entirely.
For example, ["-m", "dynamo.frontend", "--router-mode", "direct"] for GAIE deployments.
items:
type: string
type: array
envFromSecret:
description: |-
EnvFromSecret references a Secret whose key/value pairs will be exposed as
environment variables in the frontend sidecar container.
type: string
envs:
description: |-
Envs defines additional environment variables for the frontend sidecar.
These are merged with (and can override) the auto-generated Dynamo env vars.
items:
description: EnvVar represents an environment variable present in a Container.
properties:
name:
description: |-
Name of the environment variable.
May consist of any printable ASCII characters except '='.
type: string
value:
description: |-
Variable references $(VAR_NAME) are expanded
using the previously defined environment variables in the container and
any service environment variables. If a variable cannot be resolved,
the reference in the input string will be unchanged. Double $$ are reduced
to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e.
"$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)".
Escaped references will never be expanded, regardless of whether the variable
exists or not.
Defaults to "".
type: string
valueFrom:
description: Source for the environment variable's value. Cannot be used if value is not empty.
properties:
configMapKeyRef:
description: Selects a key of a ConfigMap.
properties:
key:
description: The key to select.
type: string
name:
default: ""
description: |-
Name of the referent.
This field is effectively required, but due to backwards compatibility is
allowed to be empty. Instances of this type with an empty value here are
almost certainly wrong.
More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
type: string
optional:
description: Specify whether the ConfigMap or its key must be defined
type: boolean
required:
- key
type: object
x-kubernetes-map-type: atomic
fieldRef:
description: |-
Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['<KEY>']`, `metadata.annotations['<KEY>']`,
spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs.
properties:
apiVersion:
description: Version of the schema the FieldPath is written in terms of, defaults to "v1".
type: string
fieldPath:
description: Path of the field to select in the specified API version.
type: string
required:
- fieldPath
type: object
x-kubernetes-map-type: atomic
fileKeyRef:
description: |-
FileKeyRef selects a key of the env file.
Requires the EnvFiles feature gate to be enabled.
properties:
key:
description: |-
The key within the env file. An invalid key will prevent the pod from starting.
The keys defined within a source may consist of any printable ASCII characters except '='.
During Alpha stage of the EnvFiles feature gate, the key size is limited to 128 characters.
type: string
optional:
default: false
description: |-
Specify whether the file or its key must be defined. If the file or key
does not exist, then the env var is not published.
If optional is set to true and the specified key does not exist,
the environment variable will not be set in the Pod's containers.
If optional is set to false and the specified key does not exist,
an error will be returned during Pod creation.
type: boolean
path:
description: |-
The path within the volume from which to select the file.
Must be relative and may not contain the '..' path or start with '..'.
type: string
volumeName:
description: The name of the volume mount containing the env file.
type: string
required:
- key
- path
- volumeName
type: object
x-kubernetes-map-type: atomic
resourceFieldRef:
description: |-
Selects a resource of the container: only resources limits and requests
(limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported.
properties:
containerName:
description: 'Container name: required for volumes, optional for env vars'
type: string
divisor:
anyOf:
- type: integer
- type: string
description: Specifies the output format of the exposed resources, defaults to "1"
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource:
description: 'Required: resource to select'
type: string
required:
- resource
type: object
x-kubernetes-map-type: atomic
secretKeyRef:
description: Selects a key of a secret in the pod's namespace
properties:
key:
description: The key of the secret to select from. Must be a valid secret key.
type: string
name:
default: ""
description: |-
Name of the referent.
This field is effectively required, but due to backwards compatibility is
allowed to be empty. Instances of this type with an empty value here are
almost certainly wrong.
More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
type: string
optional:
description: Specify whether the Secret or its key must be defined
type: boolean
required:
- key
type: object
x-kubernetes-map-type: atomic
type: object
required:
- name
type: object
type: array
image:
description: Image is the container image for the frontend sidecar.
type: string
required:
- image
type: object
globalDynamoNamespace:
description: GlobalDynamoNamespace indicates that the Component will be placed in the global Dynamo namespace
type: boolean
......
......@@ -10758,6 +10758,181 @@ spec:
x-kubernetes-list-type: map
required: []
type: object
frontendSidecar:
description: |-
FrontendSidecar configures an auto-generated frontend sidecar container.
When specified, the operator injects a fully configured frontend container
with all standard Dynamo environment variables, health probes, and ports.
This eliminates the need to manually specify these in extraPodSpec.containers. (GAIE)
properties:
args:
description: |-
Args overrides the default frontend arguments. When specified, these replace
the default ["-m", "dynamo.frontend"] entirely.
For example, ["-m", "dynamo.frontend", "--router-mode", "direct"] for GAIE deployments.
items:
type: string
type: array
envFromSecret:
description: |-
EnvFromSecret references a Secret whose key/value pairs will be exposed as
environment variables in the frontend sidecar container.
type: string
envs:
description: |-
Envs defines additional environment variables for the frontend sidecar.
These are merged with (and can override) the auto-generated Dynamo env vars.
items:
description: EnvVar represents an environment variable present in a Container.
properties:
name:
description: |-
Name of the environment variable.
May consist of any printable ASCII characters except '='.
type: string
value:
description: |-
Variable references $(VAR_NAME) are expanded
using the previously defined environment variables in the container and
any service environment variables. If a variable cannot be resolved,
the reference in the input string will be unchanged. Double $$ are reduced
to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e.
"$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)".
Escaped references will never be expanded, regardless of whether the variable
exists or not.
Defaults to "".
type: string
valueFrom:
description: Source for the environment variable's value. Cannot be used if value is not empty.
properties:
configMapKeyRef:
description: Selects a key of a ConfigMap.
properties:
key:
description: The key to select.
type: string
name:
default: ""
description: |-
Name of the referent.
This field is effectively required, but due to backwards compatibility is
allowed to be empty. Instances of this type with an empty value here are
almost certainly wrong.
More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
type: string
optional:
description: Specify whether the ConfigMap or its key must be defined
type: boolean
required:
- key
type: object
x-kubernetes-map-type: atomic
fieldRef:
description: |-
Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['<KEY>']`, `metadata.annotations['<KEY>']`,
spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs.
properties:
apiVersion:
description: Version of the schema the FieldPath is written in terms of, defaults to "v1".
type: string
fieldPath:
description: Path of the field to select in the specified API version.
type: string
required:
- fieldPath
type: object
x-kubernetes-map-type: atomic
fileKeyRef:
description: |-
FileKeyRef selects a key of the env file.
Requires the EnvFiles feature gate to be enabled.
properties:
key:
description: |-
The key within the env file. An invalid key will prevent the pod from starting.
The keys defined within a source may consist of any printable ASCII characters except '='.
During Alpha stage of the EnvFiles feature gate, the key size is limited to 128 characters.
type: string
optional:
default: false
description: |-
Specify whether the file or its key must be defined. If the file or key
does not exist, then the env var is not published.
If optional is set to true and the specified key does not exist,
the environment variable will not be set in the Pod's containers.
If optional is set to false and the specified key does not exist,
an error will be returned during Pod creation.
type: boolean
path:
description: |-
The path within the volume from which to select the file.
Must be relative and may not contain the '..' path or start with '..'.
type: string
volumeName:
description: The name of the volume mount containing the env file.
type: string
required:
- key
- path
- volumeName
type: object
x-kubernetes-map-type: atomic
resourceFieldRef:
description: |-
Selects a resource of the container: only resources limits and requests
(limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported.
properties:
containerName:
description: 'Container name: required for volumes, optional for env vars'
type: string
divisor:
anyOf:
- type: integer
- type: string
description: Specifies the output format of the exposed resources, defaults to "1"
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource:
description: 'Required: resource to select'
type: string
required:
- resource
type: object
x-kubernetes-map-type: atomic
secretKeyRef:
description: Selects a key of a secret in the pod's namespace
properties:
key:
description: The key of the secret to select from. Must be a valid secret key.
type: string
name:
default: ""
description: |-
Name of the referent.
This field is effectively required, but due to backwards compatibility is
allowed to be empty. Instances of this type with an empty value here are
almost certainly wrong.
More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
type: string
optional:
description: Specify whether the Secret or its key must be defined
type: boolean
required:
- key
type: object
x-kubernetes-map-type: atomic
type: object
required:
- name
type: object
type: array
image:
description: Image is the container image for the frontend sidecar.
type: string
required:
- image
type: object
globalDynamoNamespace:
description: GlobalDynamoNamespace indicates that the Component will be placed in the global Dynamo namespace
type: boolean
......
......@@ -293,7 +293,7 @@ helm: manifests kustomize helmify
$(KUSTOMIZE) build config/default | $(HELMIFY) -image-pull-secrets charts/dynamo-kubernetes-operator
######################### CRD Reference Docs
CRD_REF_DOCS_VERSION ?= latest
CRD_REF_DOCS_VERSION ?= v0.3.0
CRD_REF_DOCS ?= $(LOCALBIN)/crd-ref-docs
.PHONY: crd-ref-docs
......
......@@ -131,6 +131,13 @@ type DynamoComponentDeploymentSharedSpec struct {
// +optional
EPPConfig *EPPConfig `json:"eppConfig,omitempty"`
// FrontendSidecar configures an auto-generated frontend sidecar container.
// When specified, the operator injects a fully configured frontend container
// with all standard Dynamo environment variables, health probes, and ports.
// This eliminates the need to manually specify these in extraPodSpec.containers. (GAIE)
// +optional
FrontendSidecar *FrontendSidecarSpec `json:"frontendSidecar,omitempty"`
// Checkpoint configures container checkpointing for this service.
// When enabled, pods can be restored from a checkpoint files for faster cold start.
// +optional
......@@ -358,6 +365,31 @@ type ModelReference struct {
Revision string `json:"revision,omitempty"`
}
// FrontendSidecarSpec configures the auto-generated frontend sidecar container.
// The operator uses these fields together with built-in frontend defaults (command, probes, ports,
// and Dynamo env vars) to produce a fully configured sidecar container.
type FrontendSidecarSpec struct {
// Image is the container image for the frontend sidecar.
// +kubebuilder:validation:Required
Image string `json:"image"`
// Args overrides the default frontend arguments. When specified, these replace
// the default ["-m", "dynamo.frontend"] entirely.
// For example, ["-m", "dynamo.frontend", "--router-mode", "direct"] for GAIE deployments.
// +optional
Args []string `json:"args,omitempty"`
// EnvFromSecret references a Secret whose key/value pairs will be exposed as
// environment variables in the frontend sidecar container.
// +optional
EnvFromSecret *string `json:"envFromSecret,omitempty"`
// Envs defines additional environment variables for the frontend sidecar.
// These are merged with (and can override) the auto-generated Dynamo env vars.
// +optional
Envs []corev1.EnvVar `json:"envs,omitempty"`
}
// EPPConfig contains configuration for EPP (Endpoint Picker Plugin) components.
// EPP is responsible for intelligent endpoint selection and KV-aware routing.
type EPPConfig struct {
......
......@@ -537,6 +537,11 @@ func (in *DynamoComponentDeploymentSharedSpec) DeepCopyInto(out *DynamoComponent
*out = new(EPPConfig)
(*in).DeepCopyInto(*out)
}
if in.FrontendSidecar != nil {
in, out := &in.FrontendSidecar, &out.FrontendSidecar
*out = new(FrontendSidecarSpec)
(*in).DeepCopyInto(*out)
}
if in.Checkpoint != nil {
in, out := &in.Checkpoint, &out.Checkpoint
*out = new(ServiceCheckpointConfig)
......@@ -1185,6 +1190,38 @@ func (in *ExtraPodSpec) DeepCopy() *ExtraPodSpec {
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *FrontendSidecarSpec) DeepCopyInto(out *FrontendSidecarSpec) {
*out = *in
if in.Args != nil {
in, out := &in.Args, &out.Args
*out = make([]string, len(*in))
copy(*out, *in)
}
if in.EnvFromSecret != nil {
in, out := &in.EnvFromSecret, &out.EnvFromSecret
*out = new(string)
**out = **in
}
if in.Envs != nil {
in, out := &in.Envs, &out.Envs
*out = make([]v1.EnvVar, len(*in))
for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FrontendSidecarSpec.
func (in *FrontendSidecarSpec) DeepCopy() *FrontendSidecarSpec {
if in == nil {
return nil
}
out := new(FrontendSidecarSpec)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *IngressSpec) DeepCopyInto(out *IngressSpec) {
*out = *in
......
......@@ -10549,6 +10549,181 @@ spec:
x-kubernetes-list-type: map
required: []
type: object
frontendSidecar:
description: |-
FrontendSidecar configures an auto-generated frontend sidecar container.
When specified, the operator injects a fully configured frontend container
with all standard Dynamo environment variables, health probes, and ports.
This eliminates the need to manually specify these in extraPodSpec.containers. (GAIE)
properties:
args:
description: |-
Args overrides the default frontend arguments. When specified, these replace
the default ["-m", "dynamo.frontend"] entirely.
For example, ["-m", "dynamo.frontend", "--router-mode", "direct"] for GAIE deployments.
items:
type: string
type: array
envFromSecret:
description: |-
EnvFromSecret references a Secret whose key/value pairs will be exposed as
environment variables in the frontend sidecar container.
type: string
envs:
description: |-
Envs defines additional environment variables for the frontend sidecar.
These are merged with (and can override) the auto-generated Dynamo env vars.
items:
description: EnvVar represents an environment variable present in a Container.
properties:
name:
description: |-
Name of the environment variable.
May consist of any printable ASCII characters except '='.
type: string
value:
description: |-
Variable references $(VAR_NAME) are expanded
using the previously defined environment variables in the container and
any service environment variables. If a variable cannot be resolved,
the reference in the input string will be unchanged. Double $$ are reduced
to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e.
"$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)".
Escaped references will never be expanded, regardless of whether the variable
exists or not.
Defaults to "".
type: string
valueFrom:
description: Source for the environment variable's value. Cannot be used if value is not empty.
properties:
configMapKeyRef:
description: Selects a key of a ConfigMap.
properties:
key:
description: The key to select.
type: string
name:
default: ""
description: |-
Name of the referent.
This field is effectively required, but due to backwards compatibility is
allowed to be empty. Instances of this type with an empty value here are
almost certainly wrong.
More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
type: string
optional:
description: Specify whether the ConfigMap or its key must be defined
type: boolean
required:
- key
type: object
x-kubernetes-map-type: atomic
fieldRef:
description: |-
Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['<KEY>']`, `metadata.annotations['<KEY>']`,
spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs.
properties:
apiVersion:
description: Version of the schema the FieldPath is written in terms of, defaults to "v1".
type: string
fieldPath:
description: Path of the field to select in the specified API version.
type: string
required:
- fieldPath
type: object
x-kubernetes-map-type: atomic
fileKeyRef:
description: |-
FileKeyRef selects a key of the env file.
Requires the EnvFiles feature gate to be enabled.
properties:
key:
description: |-
The key within the env file. An invalid key will prevent the pod from starting.
The keys defined within a source may consist of any printable ASCII characters except '='.
During Alpha stage of the EnvFiles feature gate, the key size is limited to 128 characters.
type: string
optional:
default: false
description: |-
Specify whether the file or its key must be defined. If the file or key
does not exist, then the env var is not published.
If optional is set to true and the specified key does not exist,
the environment variable will not be set in the Pod's containers.
If optional is set to false and the specified key does not exist,
an error will be returned during Pod creation.
type: boolean
path:
description: |-
The path within the volume from which to select the file.
Must be relative and may not contain the '..' path or start with '..'.
type: string
volumeName:
description: The name of the volume mount containing the env file.
type: string
required:
- key
- path
- volumeName
type: object
x-kubernetes-map-type: atomic
resourceFieldRef:
description: |-
Selects a resource of the container: only resources limits and requests
(limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported.
properties:
containerName:
description: 'Container name: required for volumes, optional for env vars'
type: string
divisor:
anyOf:
- type: integer
- type: string
description: Specifies the output format of the exposed resources, defaults to "1"
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource:
description: 'Required: resource to select'
type: string
required:
- resource
type: object
x-kubernetes-map-type: atomic
secretKeyRef:
description: Selects a key of a secret in the pod's namespace
properties:
key:
description: The key of the secret to select from. Must be a valid secret key.
type: string
name:
default: ""
description: |-
Name of the referent.
This field is effectively required, but due to backwards compatibility is
allowed to be empty. Instances of this type with an empty value here are
almost certainly wrong.
More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
type: string
optional:
description: Specify whether the Secret or its key must be defined
type: boolean
required:
- key
type: object
x-kubernetes-map-type: atomic
type: object
required:
- name
type: object
type: array
image:
description: Image is the container image for the frontend sidecar.
type: string
required:
- image
type: object
globalDynamoNamespace:
description: GlobalDynamoNamespace indicates that the Component will be placed in the global Dynamo namespace
type: boolean
......
......@@ -10758,6 +10758,181 @@ spec:
x-kubernetes-list-type: map
required: []
type: object
frontendSidecar:
description: |-
FrontendSidecar configures an auto-generated frontend sidecar container.
When specified, the operator injects a fully configured frontend container
with all standard Dynamo environment variables, health probes, and ports.
This eliminates the need to manually specify these in extraPodSpec.containers. (GAIE)
properties:
args:
description: |-
Args overrides the default frontend arguments. When specified, these replace
the default ["-m", "dynamo.frontend"] entirely.
For example, ["-m", "dynamo.frontend", "--router-mode", "direct"] for GAIE deployments.
items:
type: string
type: array
envFromSecret:
description: |-
EnvFromSecret references a Secret whose key/value pairs will be exposed as
environment variables in the frontend sidecar container.
type: string
envs:
description: |-
Envs defines additional environment variables for the frontend sidecar.
These are merged with (and can override) the auto-generated Dynamo env vars.
items:
description: EnvVar represents an environment variable present in a Container.
properties:
name:
description: |-
Name of the environment variable.
May consist of any printable ASCII characters except '='.
type: string
value:
description: |-
Variable references $(VAR_NAME) are expanded
using the previously defined environment variables in the container and
any service environment variables. If a variable cannot be resolved,
the reference in the input string will be unchanged. Double $$ are reduced
to a single $, which allows for escaping the $(VAR_NAME) syntax: i.e.
"$$(VAR_NAME)" will produce the string literal "$(VAR_NAME)".
Escaped references will never be expanded, regardless of whether the variable
exists or not.
Defaults to "".
type: string
valueFrom:
description: Source for the environment variable's value. Cannot be used if value is not empty.
properties:
configMapKeyRef:
description: Selects a key of a ConfigMap.
properties:
key:
description: The key to select.
type: string
name:
default: ""
description: |-
Name of the referent.
This field is effectively required, but due to backwards compatibility is
allowed to be empty. Instances of this type with an empty value here are
almost certainly wrong.
More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
type: string
optional:
description: Specify whether the ConfigMap or its key must be defined
type: boolean
required:
- key
type: object
x-kubernetes-map-type: atomic
fieldRef:
description: |-
Selects a field of the pod: supports metadata.name, metadata.namespace, `metadata.labels['<KEY>']`, `metadata.annotations['<KEY>']`,
spec.nodeName, spec.serviceAccountName, status.hostIP, status.podIP, status.podIPs.
properties:
apiVersion:
description: Version of the schema the FieldPath is written in terms of, defaults to "v1".
type: string
fieldPath:
description: Path of the field to select in the specified API version.
type: string
required:
- fieldPath
type: object
x-kubernetes-map-type: atomic
fileKeyRef:
description: |-
FileKeyRef selects a key of the env file.
Requires the EnvFiles feature gate to be enabled.
properties:
key:
description: |-
The key within the env file. An invalid key will prevent the pod from starting.
The keys defined within a source may consist of any printable ASCII characters except '='.
During Alpha stage of the EnvFiles feature gate, the key size is limited to 128 characters.
type: string
optional:
default: false
description: |-
Specify whether the file or its key must be defined. If the file or key
does not exist, then the env var is not published.
If optional is set to true and the specified key does not exist,
the environment variable will not be set in the Pod's containers.
If optional is set to false and the specified key does not exist,
an error will be returned during Pod creation.
type: boolean
path:
description: |-
The path within the volume from which to select the file.
Must be relative and may not contain the '..' path or start with '..'.
type: string
volumeName:
description: The name of the volume mount containing the env file.
type: string
required:
- key
- path
- volumeName
type: object
x-kubernetes-map-type: atomic
resourceFieldRef:
description: |-
Selects a resource of the container: only resources limits and requests
(limits.cpu, limits.memory, limits.ephemeral-storage, requests.cpu, requests.memory and requests.ephemeral-storage) are currently supported.
properties:
containerName:
description: 'Container name: required for volumes, optional for env vars'
type: string
divisor:
anyOf:
- type: integer
- type: string
description: Specifies the output format of the exposed resources, defaults to "1"
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
resource:
description: 'Required: resource to select'
type: string
required:
- resource
type: object
x-kubernetes-map-type: atomic
secretKeyRef:
description: Selects a key of a secret in the pod's namespace
properties:
key:
description: The key of the secret to select from. Must be a valid secret key.
type: string
name:
default: ""
description: |-
Name of the referent.
This field is effectively required, but due to backwards compatibility is
allowed to be empty. Instances of this type with an empty value here are
almost certainly wrong.
More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
type: string
optional:
description: Specify whether the Secret or its key must be defined
type: boolean
required:
- key
type: object
x-kubernetes-map-type: atomic
type: object
required:
- name
type: object
type: array
image:
description: Image is the container image for the frontend sidecar.
type: string
required:
- image
type: object
globalDynamoNamespace:
description: GlobalDynamoNamespace indicates that the Component will be placed in the global Dynamo namespace
type: boolean
......
......@@ -121,7 +121,8 @@ const (
GroveRoleSuffixLeader = "ldr"
GroveRoleSuffixWorker = "wkr"
MainContainerName = "main"
MainContainerName = "main"
FrontendSidecarContainerName = "sidecar-frontend"
RestartAnnotation = "nvidia.com/restartAt"
......
......@@ -505,6 +505,18 @@ type SecretsRetriever interface {
GetSecrets(namespace, registry string) ([]string, error)
}
func resolveImagePullSecrets(retriever SecretsRetriever, namespace, image string) []corev1.LocalObjectReference {
names, err := retriever.GetSecrets(namespace, image)
if err != nil {
return nil
}
refs := make([]corev1.LocalObjectReference, 0, len(names))
for _, name := range names {
refs = append(refs, corev1.LocalObjectReference{Name: name})
}
return refs
}
// applyCliqueStartupDependencies configures StartsAfter dependencies for cliques in a PodCliqueSet
// based on the backend framework and multinode deployment patterns.
//
......@@ -1049,12 +1061,7 @@ func GenerateBasePodSpec(
imagePullSecrets := []corev1.LocalObjectReference{}
if !shouldDisableImagePullSecret && secretsRetriever != nil && component.ExtraPodSpec != nil && component.ExtraPodSpec.MainContainer != nil && component.ExtraPodSpec.MainContainer.Image != "" {
secretsName, err := secretsRetriever.GetSecrets(namespace, component.ExtraPodSpec.MainContainer.Image)
if err == nil {
for _, secretName := range secretsName {
imagePullSecrets = append(imagePullSecrets, corev1.LocalObjectReference{Name: secretName})
}
}
imagePullSecrets = resolveImagePullSecrets(secretsRetriever, namespace, component.ExtraPodSpec.MainContainer.Image)
}
if component.EnvFromSecret != nil {
container.EnvFrom = append(container.EnvFrom, corev1.EnvFromSource{
......@@ -1170,6 +1177,22 @@ func GenerateBasePodSpec(
return nil, fmt.Errorf("failed to inject checkpoint config: %w", err)
}
// Inject auto-generated frontend sidecar if configured
if component.FrontendSidecar != nil {
sidecar, err := generateFrontendSidecar(component.FrontendSidecar, componentContext, operatorConfig)
if err != nil {
return nil, fmt.Errorf("failed to generate frontend sidecar: %w", err)
}
podSpec.Containers = append(podSpec.Containers, sidecar)
if !shouldDisableImagePullSecret && secretsRetriever != nil {
podSpec.ImagePullSecrets = controller_common.AppendUniqueImagePullSecrets(
podSpec.ImagePullSecrets,
resolveImagePullSecrets(secretsRetriever, namespace, component.FrontendSidecar.Image),
)
}
}
return &podSpec, nil
}
......@@ -1205,6 +1228,54 @@ func generateComponentContext(component *v1alpha1.DynamoComponentDeploymentShare
return componentContext
}
// generateFrontendSidecar builds a fully configured frontend sidecar container
// using the same FrontendDefaults logic as standalone frontend services.
// This eliminates the need for users to manually specify Dynamo env vars, probes,
// and ports when running the frontend as a sidecar (e.g., GAIE deployments).
func generateFrontendSidecar(
spec *v1alpha1.FrontendSidecarSpec,
parentContext ComponentContext,
operatorConfig *configv1alpha1.OperatorConfiguration,
) (corev1.Container, error) {
frontendContext := ComponentContext{
numberOfNodes: 1,
ComponentType: commonconsts.ComponentTypeFrontend,
ParentGraphDeploymentName: parentContext.ParentGraphDeploymentName,
ParentGraphDeploymentNamespace: parentContext.ParentGraphDeploymentNamespace,
DiscoveryBackend: parentContext.DiscoveryBackend,
DynamoNamespace: parentContext.DynamoNamespace,
}
frontendDefaults := NewFrontendDefaults()
container, err := frontendDefaults.GetBaseContainer(frontendContext)
if err != nil {
return corev1.Container{}, fmt.Errorf("failed to get frontend base container: %w", err)
}
container.Name = commonconsts.FrontendSidecarContainerName
container.Image = spec.Image
if len(spec.Args) > 0 {
container.Args = spec.Args
}
if spec.EnvFromSecret != nil {
container.EnvFrom = append(container.EnvFrom, corev1.EnvFromSource{
SecretRef: &corev1.SecretEnvSource{
LocalObjectReference: corev1.LocalObjectReference{Name: *spec.EnvFromSecret},
},
})
}
if len(spec.Envs) > 0 {
container.Env = MergeEnvs(container.Env, spec.Envs)
}
addStandardEnvVars(&container, operatorConfig)
return container, nil
}
// GeneratePodSpecForComponent creates a PodSpec for Grove deployments (simplified wrapper)
func GeneratePodSpecForComponent(
component *v1alpha1.DynamoComponentDeploymentSharedSpec,
......
......@@ -6967,6 +6967,189 @@ func TestFrontendDefaults_NamespacePrefixEnvVar(t *testing.T) {
assert.True(t, found, "DYN_NAMESPACE_PREFIX should be set on frontend")
}
func TestGenerateBasePodSpec_FrontendSidecar(t *testing.T) {
secretsRetriever := &mockSecretsRetriever{}
controllerConfig := &configv1alpha1.OperatorConfiguration{}
envFromSecret := "hf-token-secret"
tests := []struct {
name string
component *v1alpha1.DynamoComponentDeploymentSharedSpec
parentDGDName string
namespace string
wantSidecarCount int
wantSidecarName string
wantSidecarImage string
wantSidecarArgs []string
wantSidecarEnvVars map[string]string
wantSidecarEnvFrom int
wantSidecarProbes bool
wantSidecarPorts bool
wantErr bool
}{
{
name: "worker without frontendSidecar has no sidecar",
component: &v1alpha1.DynamoComponentDeploymentSharedSpec{
ComponentType: commonconsts.ComponentTypeWorker,
},
parentDGDName: "test-dgd",
namespace: "test-ns",
wantSidecarCount: 1, // only main container
},
{
name: "worker with frontendSidecar gets auto-generated sidecar",
component: &v1alpha1.DynamoComponentDeploymentSharedSpec{
ComponentType: commonconsts.ComponentTypeWorker,
FrontendSidecar: &v1alpha1.FrontendSidecarSpec{
Image: "my-frontend:latest",
Args: []string{"-m", "dynamo.frontend", "--router-mode", "direct"},
},
},
parentDGDName: "test-dgd",
namespace: "test-ns",
wantSidecarCount: 2,
wantSidecarName: commonconsts.FrontendSidecarContainerName,
wantSidecarImage: "my-frontend:latest",
wantSidecarArgs: []string{"-m", "dynamo.frontend", "--router-mode", "direct"},
wantSidecarEnvVars: map[string]string{
"DYN_NAMESPACE": "test-ns-test-dgd",
"DYN_COMPONENT": commonconsts.ComponentTypeFrontend,
"DYN_DISCOVERY_BACKEND": "kubernetes",
"DYN_HTTP_PORT": fmt.Sprintf("%d", commonconsts.DynamoServicePort),
"DYN_PARENT_DGD_K8S_NAME": "test-dgd",
"DYN_PARENT_DGD_K8S_NAMESPACE": "test-ns",
},
wantSidecarProbes: true,
wantSidecarPorts: true,
},
{
name: "frontendSidecar with envFromSecret",
component: &v1alpha1.DynamoComponentDeploymentSharedSpec{
ComponentType: commonconsts.ComponentTypeWorker,
FrontendSidecar: &v1alpha1.FrontendSidecarSpec{
Image: "my-frontend:latest",
EnvFromSecret: &envFromSecret,
},
},
parentDGDName: "test-dgd",
namespace: "test-ns",
wantSidecarCount: 2,
wantSidecarName: commonconsts.FrontendSidecarContainerName,
wantSidecarImage: "my-frontend:latest",
wantSidecarArgs: []string{"-m", "dynamo.frontend"},
wantSidecarEnvFrom: 1,
wantSidecarProbes: true,
wantSidecarPorts: true,
},
{
name: "frontendSidecar with custom env vars",
component: &v1alpha1.DynamoComponentDeploymentSharedSpec{
ComponentType: commonconsts.ComponentTypeWorker,
FrontendSidecar: &v1alpha1.FrontendSidecarSpec{
Image: "my-frontend:latest",
Envs: []corev1.EnvVar{
{Name: "CUSTOM_VAR", Value: "custom_value"},
},
},
},
parentDGDName: "test-dgd",
namespace: "test-ns",
wantSidecarCount: 2,
wantSidecarName: commonconsts.FrontendSidecarContainerName,
wantSidecarImage: "my-frontend:latest",
wantSidecarEnvVars: map[string]string{
"CUSTOM_VAR": "custom_value",
},
wantSidecarProbes: true,
wantSidecarPorts: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
podSpec, err := GenerateBasePodSpec(
tt.component,
BackendFrameworkVLLM,
secretsRetriever,
tt.parentDGDName,
tt.namespace,
RoleMain,
1,
controllerConfig,
commonconsts.MultinodeDeploymentTypeGrove,
"test-service",
nil,
)
if (err != nil) != tt.wantErr {
t.Errorf("GenerateBasePodSpec() error = %v, wantErr %v", err, tt.wantErr)
return
}
if tt.wantErr {
return
}
assert.Equal(t, tt.wantSidecarCount, len(podSpec.Containers),
"expected %d containers, got %d", tt.wantSidecarCount, len(podSpec.Containers))
if tt.wantSidecarCount <= 1 {
return
}
// The frontend sidecar is the last container
sidecar := podSpec.Containers[len(podSpec.Containers)-1]
assert.Equal(t, tt.wantSidecarName, sidecar.Name, "sidecar container name")
assert.Equal(t, tt.wantSidecarImage, sidecar.Image, "sidecar container image")
if tt.wantSidecarArgs != nil {
assert.Equal(t, tt.wantSidecarArgs, sidecar.Args, "sidecar args")
}
assert.Equal(t, []string{"python3"}, sidecar.Command, "sidecar command should be python3")
if tt.wantSidecarEnvVars != nil {
envVars := make(map[string]string)
for _, env := range sidecar.Env {
envVars[env.Name] = env.Value
}
for k, v := range tt.wantSidecarEnvVars {
assert.Equal(t, v, envVars[k], "sidecar env var %s", k)
}
}
if tt.wantSidecarEnvFrom > 0 {
assert.Equal(t, tt.wantSidecarEnvFrom, len(sidecar.EnvFrom), "sidecar envFrom count")
assert.Equal(t, envFromSecret, sidecar.EnvFrom[0].SecretRef.Name, "sidecar envFromSecret name")
}
if tt.wantSidecarProbes {
assert.NotNil(t, sidecar.LivenessProbe, "sidecar should have liveness probe")
assert.NotNil(t, sidecar.ReadinessProbe, "sidecar should have readiness probe")
assert.Equal(t, "/live", sidecar.LivenessProbe.HTTPGet.Path)
assert.Equal(t, "/health", sidecar.ReadinessProbe.HTTPGet.Path)
}
if tt.wantSidecarPorts {
assert.NotEmpty(t, sidecar.Ports, "sidecar should have ports")
assert.Equal(t, int32(commonconsts.DynamoServicePort), sidecar.Ports[0].ContainerPort)
}
// Verify POD_NAME/POD_NAMESPACE/POD_UID are set via downward API
hasDownwardAPI := map[string]bool{"POD_NAME": false, "POD_NAMESPACE": false, "POD_UID": false}
for _, env := range sidecar.Env {
if _, ok := hasDownwardAPI[env.Name]; ok && env.ValueFrom != nil && env.ValueFrom.FieldRef != nil {
hasDownwardAPI[env.Name] = true
}
}
for name, found := range hasDownwardAPI {
assert.True(t, found, "sidecar should have downward API env var %s", name)
}
})
}
}
func TestPropagateDGDAnnotations(t *testing.T) {
tests := []struct {
name string
......
......@@ -113,6 +113,11 @@ func (v *SharedSpecValidator) Validate(ctx context.Context) (admission.Warnings,
v.fieldPath))
}
// Validate frontend sidecar container name conflicts
if err := v.validateFrontendSidecar(); err != nil {
return nil, err
}
// Validate service-level annotations
if err := v.validateServiceAnnotations(); err != nil {
return nil, err
......@@ -231,6 +236,25 @@ func (v *SharedSpecValidator) checkInferencePoolAPIAvailability(ctx context.Cont
return nil
}
// validateFrontendSidecar checks that extraPodSpec.containers does not already
// contain a container whose name collides with the auto-generated frontend sidecar.
func (v *SharedSpecValidator) validateFrontendSidecar() error {
if v.spec.FrontendSidecar == nil {
return nil
}
if v.spec.ExtraPodSpec == nil || v.spec.ExtraPodSpec.PodSpec == nil {
return nil
}
for _, c := range v.spec.ExtraPodSpec.PodSpec.Containers {
if c.Name == consts.FrontendSidecarContainerName {
return fmt.Errorf(
"%s: cannot inject frontend sidecar: a container named %q already exists in extraPodSpec.containers",
v.fieldPath, consts.FrontendSidecarContainerName)
}
}
return nil
}
// validateServiceAnnotations validates known annotations on the service-level spec.
func (v *SharedSpecValidator) validateServiceAnnotations() error {
if v.spec.Annotations == nil {
......
......@@ -22,6 +22,8 @@ import (
"testing"
nvidiacomv1alpha1 "github.com/ai-dynamo/dynamo/deploy/operator/api/v1alpha1"
"github.com/ai-dynamo/dynamo/deploy/operator/internal/consts"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
)
......@@ -249,6 +251,54 @@ func TestSharedSpecValidator_Validate(t *testing.T) {
wantErr: true,
errMsg: `spec.services[decode].annotations[nvidia.com/vllm-distributed-executor-backend] has invalid value "invalid": must be "mp" or "ray"`,
},
{
name: "frontendSidecar with no extraPodSpec containers is valid",
spec: &nvidiacomv1alpha1.DynamoComponentDeploymentSharedSpec{
FrontendSidecar: &nvidiacomv1alpha1.FrontendSidecarSpec{
Image: "my-frontend:latest",
},
},
fieldPath: "spec.services[worker]",
calculatedNamespace: "default-my-dgd",
wantErr: false,
},
{
name: "frontendSidecar rejects duplicate container name in extraPodSpec",
spec: &nvidiacomv1alpha1.DynamoComponentDeploymentSharedSpec{
FrontendSidecar: &nvidiacomv1alpha1.FrontendSidecarSpec{
Image: "my-frontend:latest",
},
ExtraPodSpec: &nvidiacomv1alpha1.ExtraPodSpec{
PodSpec: &corev1.PodSpec{
Containers: []corev1.Container{
{Name: consts.FrontendSidecarContainerName, Image: "conflict:latest"},
},
},
},
},
fieldPath: "spec.services[worker]",
calculatedNamespace: "default-my-dgd",
wantErr: true,
errMsg: `spec.services[worker]: cannot inject frontend sidecar: a container named "sidecar-frontend" already exists in extraPodSpec.containers`,
},
{
name: "frontendSidecar with non-conflicting extraPodSpec containers is valid",
spec: &nvidiacomv1alpha1.DynamoComponentDeploymentSharedSpec{
FrontendSidecar: &nvidiacomv1alpha1.FrontendSidecarSpec{
Image: "my-frontend:latest",
},
ExtraPodSpec: &nvidiacomv1alpha1.ExtraPodSpec{
PodSpec: &corev1.PodSpec{
Containers: []corev1.Container{
{Name: "other-sidecar", Image: "other:latest"},
},
},
},
},
fieldPath: "spec.services[worker]",
calculatedNamespace: "default-my-dgd",
wantErr: false,
},
}
for _, tt := range tests {
......
......@@ -396,6 +396,7 @@ _Appears in:_
| `multinode` _[MultinodeSpec](#multinodespec)_ | Multinode is the configuration for multinode components. | | |
| `scalingAdapter` _[ScalingAdapter](#scalingadapter)_ | ScalingAdapter configures whether this service uses the DynamoGraphDeploymentScalingAdapter.<br />When enabled, replicas are managed via DGDSA and external autoscalers can scale<br />the service using the Scale subresource. When disabled, replicas can be modified directly. | | Optional: \{\} <br /> |
| `eppConfig` _[EPPConfig](#eppconfig)_ | EPPConfig defines EPP-specific configuration options for Endpoint Picker Plugin components.<br />Only applicable when ComponentType is "epp". | | Optional: \{\} <br /> |
| `frontendSidecar` _[FrontendSidecarSpec](#frontendsidecarspec)_ | FrontendSidecar configures an auto-generated frontend sidecar container.<br />When specified, the operator injects a fully configured frontend container<br />with all standard Dynamo environment variables, health probes, and ports.<br />This eliminates the need to manually specify these in extraPodSpec.containers. (GAIE) | | Optional: \{\} <br /> |
| `checkpoint` _[ServiceCheckpointConfig](#servicecheckpointconfig)_ | Checkpoint configures container checkpointing for this service.<br />When enabled, pods can be restored from a checkpoint files for faster cold start. | | Optional: \{\} <br /> |
......@@ -436,6 +437,7 @@ _Appears in:_
| `multinode` _[MultinodeSpec](#multinodespec)_ | Multinode is the configuration for multinode components. | | |
| `scalingAdapter` _[ScalingAdapter](#scalingadapter)_ | ScalingAdapter configures whether this service uses the DynamoGraphDeploymentScalingAdapter.<br />When enabled, replicas are managed via DGDSA and external autoscalers can scale<br />the service using the Scale subresource. When disabled, replicas can be modified directly. | | Optional: \{\} <br /> |
| `eppConfig` _[EPPConfig](#eppconfig)_ | EPPConfig defines EPP-specific configuration options for Endpoint Picker Plugin components.<br />Only applicable when ComponentType is "epp". | | Optional: \{\} <br /> |
| `frontendSidecar` _[FrontendSidecarSpec](#frontendsidecarspec)_ | FrontendSidecar configures an auto-generated frontend sidecar container.<br />When specified, the operator injects a fully configured frontend container<br />with all standard Dynamo environment variables, health probes, and ports.<br />This eliminates the need to manually specify these in extraPodSpec.containers. (GAIE) | | Optional: \{\} <br /> |
| `checkpoint` _[ServiceCheckpointConfig](#servicecheckpointconfig)_ | Checkpoint configures container checkpointing for this service.<br />When enabled, pods can be restored from a checkpoint files for faster cold start. | | Optional: \{\} <br /> |
......@@ -789,6 +791,28 @@ _Appears in:_
| `mainContainer` _[Container](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#container-v1-core)_ | | | |
#### FrontendSidecarSpec
FrontendSidecarSpec configures the auto-generated frontend sidecar container.
The operator uses these fields together with built-in frontend defaults (command, probes, ports,
and Dynamo env vars) to produce a fully configured sidecar container.
_Appears in:_
- [DynamoComponentDeploymentSharedSpec](#dynamocomponentdeploymentsharedspec)
- [DynamoComponentDeploymentSpec](#dynamocomponentdeploymentspec)
| Field | Description | Default | Validation |
| --- | --- | --- | --- |
| `image` _string_ | Image is the container image for the frontend sidecar. | | Required: \{\} <br /> |
| `args` _string array_ | Args overrides the default frontend arguments. When specified, these replace<br />the default ["-m", "dynamo.frontend"] entirely.<br />For example, ["-m", "dynamo.frontend", "--router-mode", "direct"] for GAIE deployments. | | Optional: \{\} <br /> |
| `envFromSecret` _string_ | EnvFromSecret references a Secret whose key/value pairs will be exposed as<br />environment variables in the frontend sidecar container. | | Optional: \{\} <br /> |
| `envs` _[EnvVar](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#envvar-v1-core) array_ | Envs defines additional environment variables for the frontend sidecar.<br />These are merged with (and can override) the auto-generated Dynamo env vars. | | Optional: \{\} <br /> |
#### IngressSpec
......
......@@ -123,11 +123,22 @@ make info # Check image tag
### 5. Deploy
We recommend deploying Inference Gateway's Endpoint Picker as a Dynamo operator's managed component. Alternatively,
you could deploy it as a standalone pod
you could deploy it as a standalone pod.
Note that when deploying Dynamo with the Inference Gateway Extension each worker must have the FrontEnd as a sidecar.
#### 5.a. Deploy as a DGD component (recommended)
We provide an example for the Qwen vLLM below.
You have to deploy the Dynamo Graph and the HttpRoute service.
For the HttpRoute service make sure to specify the namespace where your gateway (i.e. kGateway was deployed) as shown below.
```bash
parentRefs:
- group: gateway.networking.k8s.io
kind: Gateway
name: inference-gateway
namespace: my-model # the namespace where your gateway is deployed.
```
```bash
cd <dynamo-source-root>
kubectl apply -f examples/backends/vllm/deploy/gaie/agg.yaml -n my-model
......@@ -158,18 +169,20 @@ kubectl apply -f recipes/llama-3-70b/vllm/disagg-single-node/gaie/http-route.yam
```
- When using GAIE the FrontEnd does not choose the workers. The routing is determined in the EPP.
- You must enable the flag in the FrontEnd cli as below.
```bash
command:
- python3
args:
- -m
- dynamo.frontend
- --router-mode
- direct
- The FrontEnd must run with `--router-mode direct` so that it respects the EPP's routing decisions passed via request headers.
- Use the `frontendSidecar` field on a worker service to have the operator automatically inject a fully configured frontend sidecar container with all required Dynamo env vars, probes, and ports:
```yaml
frontendSidecar:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
args:
- --router-mode
- direct
envFromSecret: hf-token-secret
```
- The pre-selected worker (decode and prefill in case of the disaggregated serving) are passed in the request headers.
- The flag assures the routing respects this selection.
- The `--router-mode direct` flag ensures the routing respects this selection.
**Startup Probe Timeout:** The EPP has a default startup probe timeout of 30 minutes (10s × 180 failures).
If your model takes longer to load, increase the `failureThreshold` in the EPP's `startupProbe`. For example,
......
......@@ -46,6 +46,14 @@ spec:
envFromSecret: hf-token-secret
sharedMemory:
size: 2Gi
frontendSidecar:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
args:
- -m
- dynamo.frontend
- --router-mode
- direct
envFromSecret: hf-token-secret
extraPodSpec:
mainContainer:
env:
......@@ -62,62 +70,6 @@ spec:
- -c
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/examples/backends/vllm
containers:
- name: frontend
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
command:
- python3
args:
- -m
- dynamo.frontend
- --router-mode
- direct
ports:
- containerPort: 8000
name: http
protocol: TCP
envFrom:
- secretRef:
name: hf-token-secret
env:
- name: DYNAMO_PORT
value: "8000"
- name: DYN_HTTP_PORT
value: "8000"
- name: DYN_NAMESPACE
value: my-model-qwen-agg
- name: DYN_COMPONENT
value: frontend
- name: DYN_DISCOVERY_BACKEND
value: kubernetes
- name: DYN_PARENT_DGD_K8S_NAME
value: qwen-agg
- name: DYN_PARENT_DGD_K8S_NAMESPACE
value: my-model
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: POD_UID
valueFrom:
fieldRef:
fieldPath: metadata.uid
livenessProbe:
httpGet:
path: /live
port: http
initialDelaySeconds: 15
periodSeconds: 10
readinessProbe:
httpGet:
path: /health
port: http
initialDelaySeconds: 10
periodSeconds: 10
replicas: 1
resources:
limits:
......
......@@ -24,7 +24,7 @@ spec:
- group: gateway.networking.k8s.io
kind: Gateway
name: inference-gateway
namespace: my-model
namespace: my-model # the namespace where your gateway is deployed.
rules:
- backendRefs:
- group: inference.networking.k8s.io
......
......@@ -59,6 +59,14 @@ spec:
mountPoint: /opt/models
sharedMemory:
size: 20Gi
frontendSidecar:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
args:
- -m
- dynamo.frontend
- --router-mode
- direct
envFromSecret: hf-token-secret
extraPodSpec:
mainContainer:
env:
......@@ -77,64 +85,6 @@ spec:
- -c
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/examples/backends/vllm
# Frontend sidecar: receives requests from kGateway on port 8000
# and routes them to the vLLM worker in the same pod
containers:
- name: frontend
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
command:
- python3
args:
- -m
- dynamo.frontend
- --router-mode
- direct
ports:
- containerPort: 8000
name: http
protocol: TCP
envFrom:
- secretRef:
name: hf-token-secret
env:
- name: DYNAMO_PORT
value: "8000"
- name: DYN_HTTP_PORT
value: "8000"
- name: DYN_NAMESPACE
value: my-model-vllm-agg
- name: DYN_COMPONENT
value: frontend
- name: DYN_DISCOVERY_BACKEND
value: kubernetes
- name: DYN_PARENT_DGD_K8S_NAME
value: llama3-70b-agg
- name: DYN_PARENT_DGD_K8S_NAMESPACE
value: my-model
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: POD_UID
valueFrom:
fieldRef:
fieldPath: metadata.uid
livenessProbe:
httpGet:
path: /live
port: http
initialDelaySeconds: 15
periodSeconds: 10
readinessProbe:
httpGet:
path: /health
port: http
initialDelaySeconds: 10
periodSeconds: 10
replicas: 1
resources:
limits:
......
......@@ -67,6 +67,14 @@ spec:
mountPoint: /opt/models
sharedMemory:
size: 80Gi
frontendSidecar:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
args:
- -m
- dynamo.frontend
- --router-mode
- direct
envFromSecret: hf-token-secret
extraPodSpec:
affinity:
podAffinity:
......@@ -95,64 +103,6 @@ spec:
- -c
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/examples/backends/vllm
# Frontend sidecar: receives requests from kGateway on port 8000
# and routes them to the vLLM worker in the same pod
containers:
- name: frontend
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
command:
- python3
args:
- -m
- dynamo.frontend
- --router-mode
- direct
ports:
- containerPort: 8000
name: http
protocol: TCP
envFrom:
- secretRef:
name: hf-token-secret
env:
- name: DYNAMO_PORT
value: "8000"
- name: DYN_HTTP_PORT
value: "8000"
- name: DYN_NAMESPACE
value: a-epp-vllm-disagg
- name: DYN_COMPONENT
value: frontend
- name: DYN_DISCOVERY_BACKEND
value: kubernetes
- name: DYN_PARENT_DGD_K8S_NAME
value: llama3-70b-disagg
- name: DYN_PARENT_DGD_K8S_NAMESPACE
value: a-epp
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: POD_UID
valueFrom:
fieldRef:
fieldPath: metadata.uid
livenessProbe:
httpGet:
path: /live
port: http
initialDelaySeconds: 15
periodSeconds: 10
readinessProbe:
httpGet:
path: /health
port: http
initialDelaySeconds: 10
periodSeconds: 10
replicas: 2
resources:
limits:
......@@ -168,6 +118,14 @@ spec:
mountPoint: /opt/models
sharedMemory:
size: 80Gi
frontendSidecar:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
args:
- -m
- dynamo.frontend
- --router-mode
- direct
envFromSecret: hf-token-secret
extraPodSpec:
affinity:
podAffinity:
......@@ -196,64 +154,6 @@ spec:
- -c
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
workingDir: /workspace/examples/backends/vllm
# Frontend sidecar: receives requests from kGateway on port 8000
# and routes them to the vLLM worker in the same pod
containers:
- name: frontend
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
command:
- python3
args:
- -m
- dynamo.frontend
- --router-mode
- direct
ports:
- containerPort: 8000
name: http
protocol: TCP
envFrom:
- secretRef:
name: hf-token-secret
env:
- name: DYNAMO_PORT
value: "8000"
- name: DYN_HTTP_PORT
value: "8000"
- name: DYN_NAMESPACE
value: a-epp-vllm-disagg
- name: DYN_COMPONENT
value: frontend
- name: DYN_DISCOVERY_BACKEND
value: kubernetes
- name: DYN_PARENT_DGD_K8S_NAME
value: llama3-70b-disagg
- name: DYN_PARENT_DGD_K8S_NAMESPACE
value: a-epp
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: POD_UID
valueFrom:
fieldRef:
fieldPath: metadata.uid
livenessProbe:
httpGet:
path: /live
port: http
initialDelaySeconds: 15
periodSeconds: 10
readinessProbe:
httpGet:
path: /health
port: http
initialDelaySeconds: 10
periodSeconds: 10
replicas: 1
resources:
limits:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment