Unverified Commit f17fcb15 authored by atchernych's avatar atchernych Committed by GitHub
Browse files

fix: Update flag for GAIE agg deployment [DEP-659] (#5027)


Signed-off-by: default avatarAnna Tchernych <atchernych@nvidia.com>
parent 1247fe31
...@@ -365,7 +365,7 @@ index 0000000..b689c00 ...@@ -365,7 +365,7 @@ index 0000000..b689c00
+ - pluginRef: picker + - pluginRef: picker
diff --git a/pkg/epp/scheduling/plugins/dynamo_kv_scorer/plugin.go b/pkg/epp/scheduling/plugins/dynamo_kv_scorer/plugin.go diff --git a/pkg/epp/scheduling/plugins/dynamo_kv_scorer/plugin.go b/pkg/epp/scheduling/plugins/dynamo_kv_scorer/plugin.go
new file mode 100644 new file mode 100644
index 0000000..75f30e9 index 0000000..6ee6634
--- /dev/null --- /dev/null
+++ b/pkg/epp/scheduling/plugins/dynamo_kv_scorer/plugin.go +++ b/pkg/epp/scheduling/plugins/dynamo_kv_scorer/plugin.go
@@ -0,0 +1,446 @@ @@ -0,0 +1,446 @@
...@@ -541,7 +541,7 @@ index 0000000..75f30e9 ...@@ -541,7 +541,7 @@ index 0000000..75f30e9
+ ffiComponent = getEnvOrDefault("DYNAMO_COMPONENT", "backend") + ffiComponent = getEnvOrDefault("DYNAMO_COMPONENT", "backend")
+ ffiModel = getEnvOrDefault("DYNAMO_MODEL", "Qwen/Qwen3-0.6B") + ffiModel = getEnvOrDefault("DYNAMO_MODEL", "Qwen/Qwen3-0.6B")
+ ffiWorkerID = getEnvInt64OrDefault("DYNAMO_WORKER_ID", 1) + ffiWorkerID = getEnvInt64OrDefault("DYNAMO_WORKER_ID", 1)
+ ffiEnforceDisagg = getEnvBoolOrDefault("DYNAMO_ENFORCE_DISAGG", true) // TODO default to false + ffiEnforceDisagg = getEnvBoolOrDefault("DYNAMO_ENFORCE_DISAGG", false)
+ +
+ ffiOverlapScoreWeight = getEnvFloatOrDefault("DYNAMO_OVERLAP_SCORE_WEIGHT", -1.0) + ffiOverlapScoreWeight = getEnvFloatOrDefault("DYNAMO_OVERLAP_SCORE_WEIGHT", -1.0)
+ ffiRouterTemperature = getEnvFloatOrDefault("DYNAMO_ROUTER_TEMPERATURE", -1.0) + ffiRouterTemperature = getEnvFloatOrDefault("DYNAMO_ROUTER_TEMPERATURE", -1.0)
......
...@@ -81,6 +81,8 @@ spec: ...@@ -81,6 +81,8 @@ spec:
value: "128" # UPDATE to match the --block-size in your deploy.yaml engine command value: "128" # UPDATE to match the --block-size in your deploy.yaml engine command
- name: USE_STREAMING - name: USE_STREAMING
value: "true" value: "true"
- name: DYNAMO_ENFORCE_DISAGG
value: "false"
ports: ports:
- containerPort: 9002 - containerPort: 9002
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment