Unverified Commit 1a8dcacd authored by atchernych's avatar atchernych Committed by GitHub
Browse files

fix: enable K8s discovery in GAIE (#5303)


Signed-off-by: default avatarAnna Tchernych <atchernych@nvidia.com>
parent c94d097a
...@@ -544,8 +544,21 @@ impl DistributedConfig { ...@@ -544,8 +544,21 @@ impl DistributedConfig {
// If a NATS server is configured via env, enable the client regardless of request plane. // If a NATS server is configured via env, enable the client regardless of request plane.
let nats_enabled = request_plane.is_nats() let nats_enabled = request_plane.is_nats()
|| std::env::var(crate::config::environment_names::nats::NATS_SERVER).is_ok(); || std::env::var(crate::config::environment_names::nats::NATS_SERVER).is_ok();
// Check discovery backend to determine the appropriate KV store backend -
// kubernetes discovery, or etcd.
let discovery_backend =
std::env::var("DYN_DISCOVERY_BACKEND").unwrap_or_else(|_| "kv_store".to_string());
let store_backend = if discovery_backend == "kubernetes" {
tracing::info!("Using Kubernetes discovery backend");
kv::Selector::Memory
} else {
kv::Selector::Etcd(Box::default())
};
DistributedConfig { DistributedConfig {
store_backend: kv::Selector::Etcd(Box::default()), store_backend,
nats_config: if nats_enabled { nats_config: if nats_enabled {
Some(nats::ClientOptions::default()) Some(nats::ClientOptions::default())
} else { } else {
......
...@@ -189,6 +189,11 @@ First, deploy the Dynamo Graph per instructions above. ...@@ -189,6 +189,11 @@ First, deploy the Dynamo Graph per instructions above.
Then follow [Deploy Inference Gateway Section 2](../deploy/inference-gateway/README.md#2-deploy-inference-gateway) to install GAIE. Then follow [Deploy Inference Gateway Section 2](../deploy/inference-gateway/README.md#2-deploy-inference-gateway) to install GAIE.
Update the containers.epp.image in the deployment file, i.e. llama-3-70b/vllm/agg/gaie/k8s-manifests/epp/deployment.yaml. It should match the release tag and be in the format `nvcr.io/nvidia/ai-dynamo/frontend:<my-tag>` i.e. `nvcr.io/nvstaging/ai-dynamo/dynamo-frontend:0.7.0rc2-amd64` Update the containers.epp.image in the deployment file, i.e. llama-3-70b/vllm/agg/gaie/k8s-manifests/epp/deployment.yaml. It should match the release tag and be in the format `nvcr.io/nvidia/ai-dynamo/frontend:<my-tag>` i.e. `nvcr.io/nvstaging/ai-dynamo/dynamo-frontend:0.7.0rc2-amd64`
The recipe assumes you are using Kubernetes discovery backend and sets the `DYN_DISCOVERY_BACKEND` env variable in the epp deployment. If you want to use etcd enable the lines below and remove the DYN_DISCOVERY_BACKEND env var.
```bash
- name: ETCD_ENDPOINTS
value: "dynamo-platform-etcd.$(PLATFORM_NAMESPACE):2379" # update dynamo-platform to appropriate namespace
```
```bash ```bash
export DEPLOY_PATH=llama-3-70b/vllm/agg/ export DEPLOY_PATH=llama-3-70b/vllm/agg/
......
...@@ -69,8 +69,9 @@ spec: ...@@ -69,8 +69,9 @@ spec:
fieldPath: metadata.namespace fieldPath: metadata.namespace
- name: PLATFORM_NAMESPACE - name: PLATFORM_NAMESPACE
value: "$(POD_NAMESPACE)" # set to your dynamo platform namespace if different value: "$(POD_NAMESPACE)" # set to your dynamo platform namespace if different
- name: ETCD_ENDPOINTS # if you want to use etcd enable this and remove the DYN_DISCOVERY_BACKEND env var
value: "dynamo-platform-etcd.$(PLATFORM_NAMESPACE):2379" # update dynamo-platform to appropriate namespace # - name: ETCD_ENDPOINTS
# value: "dynamo-platform-etcd.$(PLATFORM_NAMESPACE):2379" # update dynamo-platform to appropriate namespace
- name: NATS_SERVER - name: NATS_SERVER
value: "nats://dynamo-platform-nats.$(PLATFORM_NAMESPACE):4222" # update dynamo-platform to appropriate namespace value: "nats://dynamo-platform-nats.$(PLATFORM_NAMESPACE):4222" # update dynamo-platform to appropriate namespace
- name: DYNAMO_NAMESPACE - name: DYNAMO_NAMESPACE
...@@ -83,6 +84,8 @@ spec: ...@@ -83,6 +84,8 @@ spec:
value: "true" value: "true"
- name: DYNAMO_ENFORCE_DISAGG - name: DYNAMO_ENFORCE_DISAGG
value: "false" value: "false"
- name: DYN_DISCOVERY_BACKEND
value: "kubernetes"
ports: ports:
- containerPort: 9002 - containerPort: 9002
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment