Unverified Commit 58d2699d authored by julienmancuso's avatar julienmancuso Committed by GitHub
Browse files

feat: Add support for model express url injection (#2769)


Signed-off-by: default avatarJulien Mancuso <jmancuso@nvidia.com>
parent 6c539fbd
...@@ -35,7 +35,7 @@ dependencies: ...@@ -35,7 +35,7 @@ dependencies:
repository: "https://charts.bitnami.com/bitnami" repository: "https://charts.bitnami.com/bitnami"
condition: etcd.enabled condition: etcd.enabled
- name: kai-scheduler - name: kai-scheduler
version: v0.8.1 version: v0.8.4
repository: oci://ghcr.io/nvidia/kai-scheduler repository: oci://ghcr.io/nvidia/kai-scheduler
condition: kai-scheduler.enabled condition: kai-scheduler.enabled
- name: grove-charts - name: grove-charts
......
...@@ -48,7 +48,7 @@ The Dynamo Platform Helm chart deploys the complete Dynamo Cloud infrastructure ...@@ -48,7 +48,7 @@ The Dynamo Platform Helm chart deploys the complete Dynamo Cloud infrastructure
| https://charts.bitnami.com/bitnami | etcd | 11.1.0 | | https://charts.bitnami.com/bitnami | etcd | 11.1.0 |
| https://nats-io.github.io/k8s/helm/charts/ | nats | 1.3.2 | | https://nats-io.github.io/k8s/helm/charts/ | nats | 1.3.2 |
| oci://ghcr.io/nvidia/grove | grove(grove-charts) | v0.0.0-6e30275 | | oci://ghcr.io/nvidia/grove | grove(grove-charts) | v0.0.0-6e30275 |
| oci://ghcr.io/nvidia/kai-scheduler | kai-scheduler | v0.8.1 | | oci://ghcr.io/nvidia/kai-scheduler | kai-scheduler | v0.8.4 |
## Values ## Values
...@@ -57,6 +57,8 @@ The Dynamo Platform Helm chart deploys the complete Dynamo Cloud infrastructure ...@@ -57,6 +57,8 @@ The Dynamo Platform Helm chart deploys the complete Dynamo Cloud infrastructure
| dynamo-operator.enabled | bool | `true` | Whether to enable the Dynamo Kubernetes operator deployment | | dynamo-operator.enabled | bool | `true` | Whether to enable the Dynamo Kubernetes operator deployment |
| dynamo-operator.natsAddr | string | `""` | NATS server address for operator communication (leave empty to use the bundled NATS chart). Format: "nats://hostname:port" | | dynamo-operator.natsAddr | string | `""` | NATS server address for operator communication (leave empty to use the bundled NATS chart). Format: "nats://hostname:port" |
| dynamo-operator.etcdAddr | string | `""` | etcd server address for operator state storage (leave empty to use the bundled etcd chart). Format: "http://hostname:port" or "https://hostname:port" | | dynamo-operator.etcdAddr | string | `""` | etcd server address for operator state storage (leave empty to use the bundled etcd chart). Format: "http://hostname:port" or "https://hostname:port" |
| dynamo-operator.modelExpressURL | string | `""` | URL for the Model Express server if not deployed by this helm chart. This is ignored if Model Express server is installed by this helm chart (global.model-express.enabled is true). |
| dynamo-operator.namespaceRestriction | object | `{"enabled":true,"targetNamespace":null}` | Namespace access controls for the operator |
| dynamo-operator.namespaceRestriction.enabled | bool | `true` | Whether to restrict operator to specific namespaces | | dynamo-operator.namespaceRestriction.enabled | bool | `true` | Whether to restrict operator to specific namespaces |
| dynamo-operator.namespaceRestriction.targetNamespace | string | `nil` | Target namespace for operator deployment (leave empty for current namespace) | | dynamo-operator.namespaceRestriction.targetNamespace | string | `nil` | Target namespace for operator deployment (leave empty for current namespace) |
| dynamo-operator.controllerManager.tolerations | list | `[]` | Node tolerations for controller manager pods | | dynamo-operator.controllerManager.tolerations | list | `[]` | Node tolerations for controller manager pods |
......
...@@ -104,6 +104,9 @@ spec: ...@@ -104,6 +104,9 @@ spec:
{{- if .Values.dynamo.groveTerminationDelay }} {{- if .Values.dynamo.groveTerminationDelay }}
- --grove-termination-delay={{ .Values.dynamo.groveTerminationDelay }} - --grove-termination-delay={{ .Values.dynamo.groveTerminationDelay }}
{{- end }} {{- end }}
{{- if .Values.modelExpressURL }}
- --model-express-url={{ .Values.modelExpressURL }}
{{- end }}
command: command:
- /manager - /manager
env: env:
......
...@@ -112,3 +112,5 @@ metricsService: ...@@ -112,3 +112,5 @@ metricsService:
natsAddr: "" natsAddr: ""
etcdAddr: "" etcdAddr: ""
modelExpressURL: ""
...@@ -27,7 +27,9 @@ dynamo-operator: ...@@ -27,7 +27,9 @@ dynamo-operator:
# -- etcd server address for operator state storage (leave empty to use the bundled etcd chart). Format: "http://hostname:port" or "https://hostname:port" # -- etcd server address for operator state storage (leave empty to use the bundled etcd chart). Format: "http://hostname:port" or "https://hostname:port"
etcdAddr: "" etcdAddr: ""
# Namespace access controls for the operator # -- URL for the Model Express server if not deployed by this helm chart. This is ignored if Model Express server is installed by this helm chart (global.model-express.enabled is true).
modelExpressURL: ""
# -- Namespace access controls for the operator
namespaceRestriction: namespaceRestriction:
# -- Whether to restrict operator to specific namespaces # -- Whether to restrict operator to specific namespaces
enabled: true enabled: true
......
...@@ -23,6 +23,7 @@ import ( ...@@ -23,6 +23,7 @@ import (
"context" "context"
"crypto/tls" "crypto/tls"
"flag" "flag"
"net/url"
"os" "os"
"time" "time"
...@@ -130,6 +131,7 @@ func main() { ...@@ -130,6 +131,7 @@ func main() {
var ingressControllerTLSSecretName string var ingressControllerTLSSecretName string
var ingressHostSuffix string var ingressHostSuffix string
var groveTerminationDelay time.Duration var groveTerminationDelay time.Duration
var modelExpressURL string
flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.") flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.")
flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.") flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.")
flag.BoolVar(&enableLeaderElection, "leader-elect", false, flag.BoolVar(&enableLeaderElection, "leader-elect", false,
...@@ -157,12 +159,23 @@ func main() { ...@@ -157,12 +159,23 @@ func main() {
"The suffix to use for the ingress host") "The suffix to use for the ingress host")
flag.DurationVar(&groveTerminationDelay, "grove-termination-delay", consts.DefaultGroveTerminationDelay, flag.DurationVar(&groveTerminationDelay, "grove-termination-delay", consts.DefaultGroveTerminationDelay,
"The termination delay for Grove PodGangSets") "The termination delay for Grove PodGangSets")
flag.StringVar(&modelExpressURL, "model-express-url", "",
"URL of the Model Express server to inject into all pods")
opts := zap.Options{ opts := zap.Options{
Development: true, Development: true,
} }
opts.BindFlags(flag.CommandLine) opts.BindFlags(flag.CommandLine)
flag.Parse() flag.Parse()
// Validate modelExpressURL if provided
if modelExpressURL != "" {
if _, err := url.Parse(modelExpressURL); err != nil {
setupLog.Error(err, "invalid model-express-url provided", "url", modelExpressURL)
os.Exit(1)
}
setupLog.Info("Model Express URL configured", "url", modelExpressURL)
}
ctrlConfig := commonController.Config{ ctrlConfig := commonController.Config{
RestrictedNamespace: restrictedNamespace, RestrictedNamespace: restrictedNamespace,
Grove: commonController.GroveConfig{ Grove: commonController.GroveConfig{
...@@ -183,6 +196,7 @@ func main() { ...@@ -183,6 +196,7 @@ func main() {
IngressControllerTLSSecret: ingressControllerTLSSecretName, IngressControllerTLSSecret: ingressControllerTLSSecretName,
IngressHostSuffix: ingressHostSuffix, IngressHostSuffix: ingressHostSuffix,
}, },
ModelExpressURL: modelExpressURL,
} }
mainCtx := ctrl.SetupSignalHandler() mainCtx := ctrl.SetupSignalHandler()
......
...@@ -56,6 +56,8 @@ type Config struct { ...@@ -56,6 +56,8 @@ type Config struct {
EtcdAddress string EtcdAddress string
NatsAddress string NatsAddress string
IngressConfig IngressConfig IngressConfig IngressConfig
// ModelExpressURL is the URL of the Model Express server to inject into all pods
ModelExpressURL string
} }
type IngressConfig struct { type IngressConfig struct {
......
...@@ -673,6 +673,13 @@ func addStandardEnvVars(container *corev1.Container, controllerConfig controller ...@@ -673,6 +673,13 @@ func addStandardEnvVars(container *corev1.Container, controllerConfig controller
Value: controllerConfig.EtcdAddress, Value: controllerConfig.EtcdAddress,
}) })
} }
if controllerConfig.ModelExpressURL != "" {
container.Env = append(container.Env, corev1.EnvVar{
Name: "MODEL_EXPRESS_URL",
Value: controllerConfig.ModelExpressURL,
})
}
} }
// GenerateBasePodSpec creates a basic PodSpec with common logic shared between controller and grove // GenerateBasePodSpec creates a basic PodSpec with common logic shared between controller and grove
......
...@@ -1065,8 +1065,9 @@ func TestGenerateGrovePodGangSet(t *testing.T) { ...@@ -1065,8 +1065,9 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
args: args{ args: args{
ctx: context.Background(), ctx: context.Background(),
controllerConfig: controller_common.Config{ controllerConfig: controller_common.Config{
EtcdAddress: "etcd-address", EtcdAddress: "etcd-address",
NatsAddress: "nats-address", NatsAddress: "nats-address",
ModelExpressURL: "model-express-url",
Grove: controller_common.GroveConfig{ Grove: controller_common.GroveConfig{
TerminationDelay: 15 * time.Minute, TerminationDelay: 15 * time.Minute,
}, },
...@@ -1343,6 +1344,10 @@ func TestGenerateGrovePodGangSet(t *testing.T) { ...@@ -1343,6 +1344,10 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
Name: "DYN_PARENT_DGD_K8S_NAMESPACE", Name: "DYN_PARENT_DGD_K8S_NAMESPACE",
Value: "test-namespace", Value: "test-namespace",
}, },
{
Name: "MODEL_EXPRESS_URL",
Value: "model-express-url",
},
}, },
Resources: corev1.ResourceRequirements{ Resources: corev1.ResourceRequirements{
Requests: corev1.ResourceList{ Requests: corev1.ResourceList{
...@@ -1474,6 +1479,10 @@ func TestGenerateGrovePodGangSet(t *testing.T) { ...@@ -1474,6 +1479,10 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
Name: "DYN_PARENT_DGD_K8S_NAMESPACE", Name: "DYN_PARENT_DGD_K8S_NAMESPACE",
Value: "test-namespace", Value: "test-namespace",
}, },
{
Name: "MODEL_EXPRESS_URL",
Value: "model-express-url",
},
}, },
Resources: corev1.ResourceRequirements{ Resources: corev1.ResourceRequirements{
Requests: corev1.ResourceList{ Requests: corev1.ResourceList{
......
...@@ -34,7 +34,7 @@ services: ...@@ -34,7 +34,7 @@ services:
- monitoring - monitoring
etcd-server: etcd-server:
image: bitnami/etcd:3.6.1 image: bitnamilegacy/etcd:3.6.1
environment: environment:
- ALLOW_NONE_AUTHENTICATION=yes - ALLOW_NONE_AUTHENTICATION=yes
ports: ports:
......
...@@ -73,6 +73,14 @@ helm install dynamo-platform dynamo-platform-${RELEASE_VERSION}.tgz --namespace ...@@ -73,6 +73,14 @@ helm install dynamo-platform dynamo-platform-${RELEASE_VERSION}.tgz --namespace
--set "kai-scheduler.enabled=true" --set "kai-scheduler.enabled=true"
``` ```
> [!TIP]
> By default, Model Express Server is not used.
> If you wish to use an existing Model Express Server, you can set the modelExpressURL to the existing server's URL in the helm install command:
```bash
--set "dynamo-operator.modelExpressURL=http://model-express-server.model-express.svc.cluster.local:8080"
```
[Verify Installation](#verify-installation) [Verify Installation](#verify-installation)
...@@ -189,3 +197,4 @@ kubectl create secret generic hf-token-secret \ ...@@ -189,3 +197,4 @@ kubectl create secret generic hf-token-secret \
- [GKE-specific setup](gke_setup.md) - [GKE-specific setup](gke_setup.md)
- [Create custom deployments](create_deployment.md) - [Create custom deployments](create_deployment.md)
- [Dynamo Operator details](dynamo_operator.md) - [Dynamo Operator details](dynamo_operator.md)
- [Model Express Server details](https://github.com/ai-dynamo/modelexpress)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment