fix(bindings): Default router config in bindings (#1716)

* Added a default temperature value for text generation requests when no temperature is specified. * Improved handling of missing configuration values to prevent errors during model initialization.

fix(bindings): Default router config in bindings (#1716)
* Added a default temperature value for text generation requests when no temperature is specified. * Improved handling of missing configuration values to prevent errors during model initialization.
edf00c5c · Graham King · GitHub · 6365a015 · edf00c5c · edf00c5c
Unverified Commit edf00c5c authored Jul 01, 2025 by Graham King Committed by GitHub Jul 01, 2025
3 changed files
--- a/lib/bindings/python/examples/hello_world/server_sglang.py
+++ b/lib/bindings/python/examples/hello_world/server_sglang.py
@@ -32,6 +32,7 @@ from dynamo.runtime import DistributedRuntime, dynamo_worker
 DEFAULT_ENDPOINT = "dyn://dynamo.backend.generate"
 DEFAULT_MODEL = "Qwen/Qwen3-0.6B"
+DEFAULT_TEMPERATURE = 0.7
 class Config:
@@ -54,7 +55,8 @@ class RequestHandler:
    async def generate(self, request):
        # print(f"Received request: {request}")
        sampling_params = {
-            "temperature": request["sampling_options"]["temperature"],
+            "temperature": request["sampling_options"]["temperature"]
+            or DEFAULT_TEMPERATURE,
            # sglang defaults this to 128
            "max_new_tokens": request["stop_conditions"]["max_tokens"],
        }

--- a/lib/bindings/python/examples/hello_world/server_vllm.py
+++ b/lib/bindings/python/examples/hello_world/server_vllm.py
@@ -44,6 +44,7 @@ from dynamo.runtime import DistributedRuntime, dynamo_worker
 DEFAULT_ENDPOINT = "dyn://dynamo.backend.generate"
 DEFAULT_MODEL = "Qwen/Qwen3-0.6B"
+DEFAULT_TEMPERATURE = 0.7
 class Config:
@@ -69,7 +70,8 @@ class RequestHandler:
        # print(f"Received request: {request}")
        prompt = TokensPrompt(prompt_token_ids=request["token_ids"])
        sampling_params = SamplingParams(
-            temperature=request["sampling_options"]["temperature"],
+            temperature=request["sampling_options"]["temperature"]
+            or DEFAULT_TEMPERATURE,
            # vllm defaults this to 16
            max_tokens=request["stop_conditions"]["max_tokens"],
        )

--- a/lib/llm/src/local_model.rs
+++ b/lib/llm/src/local_model.rs
@@ -144,11 +144,7 @@ impl LocalModelBuilder {
                endpoint_id,
                template,
                http_port: self.http_port,
-                // We always have one. The Option is so we can take it.
+                router_config: self.router_config.take().unwrap_or_default(),
-                router_config: self
-                    .router_config
-                    .take()
-                    .expect("unreachable, RouterConfig missing"),
            });
        }
@@ -203,10 +199,7 @@ impl LocalModelBuilder {
            endpoint_id,
            template,
            http_port: self.http_port,
-            router_config: self
+            router_config: self.router_config.take().unwrap_or_default(),
-                .router_config
-                .take()
-                .expect("unreachable, RouterConfig missing"),
        })
    }
 }