"launch/vscode:/vscode.git/clone" did not exist on "7fdc742e174415256361418c14fef036f0f26ddf"
Unverified Commit 58d2699d authored by julienmancuso's avatar julienmancuso Committed by GitHub
Browse files

feat: Add support for model express url injection (#2769)


Signed-off-by: default avatarJulien Mancuso <jmancuso@nvidia.com>
parent 6c539fbd
......@@ -35,7 +35,7 @@ dependencies:
repository: "https://charts.bitnami.com/bitnami"
condition: etcd.enabled
- name: kai-scheduler
version: v0.8.1
version: v0.8.4
repository: oci://ghcr.io/nvidia/kai-scheduler
condition: kai-scheduler.enabled
- name: grove-charts
......
......@@ -48,7 +48,7 @@ The Dynamo Platform Helm chart deploys the complete Dynamo Cloud infrastructure
| https://charts.bitnami.com/bitnami | etcd | 11.1.0 |
| https://nats-io.github.io/k8s/helm/charts/ | nats | 1.3.2 |
| oci://ghcr.io/nvidia/grove | grove(grove-charts) | v0.0.0-6e30275 |
| oci://ghcr.io/nvidia/kai-scheduler | kai-scheduler | v0.8.1 |
| oci://ghcr.io/nvidia/kai-scheduler | kai-scheduler | v0.8.4 |
## Values
......@@ -57,6 +57,8 @@ The Dynamo Platform Helm chart deploys the complete Dynamo Cloud infrastructure
| dynamo-operator.enabled | bool | `true` | Whether to enable the Dynamo Kubernetes operator deployment |
| dynamo-operator.natsAddr | string | `""` | NATS server address for operator communication (leave empty to use the bundled NATS chart). Format: "nats://hostname:port" |
| dynamo-operator.etcdAddr | string | `""` | etcd server address for operator state storage (leave empty to use the bundled etcd chart). Format: "http://hostname:port" or "https://hostname:port" |
| dynamo-operator.modelExpressURL | string | `""` | URL for the Model Express server if not deployed by this helm chart. This is ignored if Model Express server is installed by this helm chart (global.model-express.enabled is true). |
| dynamo-operator.namespaceRestriction | object | `{"enabled":true,"targetNamespace":null}` | Namespace access controls for the operator |
| dynamo-operator.namespaceRestriction.enabled | bool | `true` | Whether to restrict operator to specific namespaces |
| dynamo-operator.namespaceRestriction.targetNamespace | string | `nil` | Target namespace for operator deployment (leave empty for current namespace) |
| dynamo-operator.controllerManager.tolerations | list | `[]` | Node tolerations for controller manager pods |
......
......@@ -104,6 +104,9 @@ spec:
{{- if .Values.dynamo.groveTerminationDelay }}
- --grove-termination-delay={{ .Values.dynamo.groveTerminationDelay }}
{{- end }}
{{- if .Values.modelExpressURL }}
- --model-express-url={{ .Values.modelExpressURL }}
{{- end }}
command:
- /manager
env:
......
......@@ -112,3 +112,5 @@ metricsService:
natsAddr: ""
etcdAddr: ""
modelExpressURL: ""
......@@ -27,7 +27,9 @@ dynamo-operator:
# -- etcd server address for operator state storage (leave empty to use the bundled etcd chart). Format: "http://hostname:port" or "https://hostname:port"
etcdAddr: ""
# Namespace access controls for the operator
# -- URL for the Model Express server if not deployed by this helm chart. This is ignored if Model Express server is installed by this helm chart (global.model-express.enabled is true).
modelExpressURL: ""
# -- Namespace access controls for the operator
namespaceRestriction:
# -- Whether to restrict operator to specific namespaces
enabled: true
......
......@@ -23,6 +23,7 @@ import (
"context"
"crypto/tls"
"flag"
"net/url"
"os"
"time"
......@@ -130,6 +131,7 @@ func main() {
var ingressControllerTLSSecretName string
var ingressHostSuffix string
var groveTerminationDelay time.Duration
var modelExpressURL string
flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.")
flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.")
flag.BoolVar(&enableLeaderElection, "leader-elect", false,
......@@ -157,12 +159,23 @@ func main() {
"The suffix to use for the ingress host")
flag.DurationVar(&groveTerminationDelay, "grove-termination-delay", consts.DefaultGroveTerminationDelay,
"The termination delay for Grove PodGangSets")
flag.StringVar(&modelExpressURL, "model-express-url", "",
"URL of the Model Express server to inject into all pods")
opts := zap.Options{
Development: true,
}
opts.BindFlags(flag.CommandLine)
flag.Parse()
// Validate modelExpressURL if provided
if modelExpressURL != "" {
if _, err := url.Parse(modelExpressURL); err != nil {
setupLog.Error(err, "invalid model-express-url provided", "url", modelExpressURL)
os.Exit(1)
}
setupLog.Info("Model Express URL configured", "url", modelExpressURL)
}
ctrlConfig := commonController.Config{
RestrictedNamespace: restrictedNamespace,
Grove: commonController.GroveConfig{
......@@ -183,6 +196,7 @@ func main() {
IngressControllerTLSSecret: ingressControllerTLSSecretName,
IngressHostSuffix: ingressHostSuffix,
},
ModelExpressURL: modelExpressURL,
}
mainCtx := ctrl.SetupSignalHandler()
......
......@@ -56,6 +56,8 @@ type Config struct {
EtcdAddress string
NatsAddress string
IngressConfig IngressConfig
// ModelExpressURL is the URL of the Model Express server to inject into all pods
ModelExpressURL string
}
type IngressConfig struct {
......
......@@ -673,6 +673,13 @@ func addStandardEnvVars(container *corev1.Container, controllerConfig controller
Value: controllerConfig.EtcdAddress,
})
}
if controllerConfig.ModelExpressURL != "" {
container.Env = append(container.Env, corev1.EnvVar{
Name: "MODEL_EXPRESS_URL",
Value: controllerConfig.ModelExpressURL,
})
}
}
// GenerateBasePodSpec creates a basic PodSpec with common logic shared between controller and grove
......
......@@ -1065,8 +1065,9 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
args: args{
ctx: context.Background(),
controllerConfig: controller_common.Config{
EtcdAddress: "etcd-address",
NatsAddress: "nats-address",
EtcdAddress: "etcd-address",
NatsAddress: "nats-address",
ModelExpressURL: "model-express-url",
Grove: controller_common.GroveConfig{
TerminationDelay: 15 * time.Minute,
},
......@@ -1343,6 +1344,10 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
Name: "DYN_PARENT_DGD_K8S_NAMESPACE",
Value: "test-namespace",
},
{
Name: "MODEL_EXPRESS_URL",
Value: "model-express-url",
},
},
Resources: corev1.ResourceRequirements{
Requests: corev1.ResourceList{
......@@ -1474,6 +1479,10 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
Name: "DYN_PARENT_DGD_K8S_NAMESPACE",
Value: "test-namespace",
},
{
Name: "MODEL_EXPRESS_URL",
Value: "model-express-url",
},
},
Resources: corev1.ResourceRequirements{
Requests: corev1.ResourceList{
......
......@@ -34,7 +34,7 @@ services:
- monitoring
etcd-server:
image: bitnami/etcd:3.6.1
image: bitnamilegacy/etcd:3.6.1
environment:
- ALLOW_NONE_AUTHENTICATION=yes
ports:
......
......@@ -73,6 +73,14 @@ helm install dynamo-platform dynamo-platform-${RELEASE_VERSION}.tgz --namespace
--set "kai-scheduler.enabled=true"
```
> [!TIP]
> By default, Model Express Server is not used.
> If you wish to use an existing Model Express Server, you can set the modelExpressURL to the existing server's URL in the helm install command:
```bash
--set "dynamo-operator.modelExpressURL=http://model-express-server.model-express.svc.cluster.local:8080"
```
[Verify Installation](#verify-installation)
......@@ -189,3 +197,4 @@ kubectl create secret generic hf-token-secret \
- [GKE-specific setup](gke_setup.md)
- [Create custom deployments](create_deployment.md)
- [Dynamo Operator details](dynamo_operator.md)
- [Model Express Server details](https://github.com/ai-dynamo/modelexpress)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment