Unverified Commit edf00c5c authored by Graham King's avatar Graham King Committed by GitHub
Browse files

fix(bindings): Default router config in bindings (#1716)

  * Added a default temperature value for text generation requests when no temperature is specified.
  * Improved handling of missing configuration values to prevent errors during model initialization.
parent 6365a015
...@@ -32,6 +32,7 @@ from dynamo.runtime import DistributedRuntime, dynamo_worker ...@@ -32,6 +32,7 @@ from dynamo.runtime import DistributedRuntime, dynamo_worker
DEFAULT_ENDPOINT = "dyn://dynamo.backend.generate" DEFAULT_ENDPOINT = "dyn://dynamo.backend.generate"
DEFAULT_MODEL = "Qwen/Qwen3-0.6B" DEFAULT_MODEL = "Qwen/Qwen3-0.6B"
DEFAULT_TEMPERATURE = 0.7
class Config: class Config:
...@@ -54,7 +55,8 @@ class RequestHandler: ...@@ -54,7 +55,8 @@ class RequestHandler:
async def generate(self, request): async def generate(self, request):
# print(f"Received request: {request}") # print(f"Received request: {request}")
sampling_params = { sampling_params = {
"temperature": request["sampling_options"]["temperature"], "temperature": request["sampling_options"]["temperature"]
or DEFAULT_TEMPERATURE,
# sglang defaults this to 128 # sglang defaults this to 128
"max_new_tokens": request["stop_conditions"]["max_tokens"], "max_new_tokens": request["stop_conditions"]["max_tokens"],
} }
......
...@@ -44,6 +44,7 @@ from dynamo.runtime import DistributedRuntime, dynamo_worker ...@@ -44,6 +44,7 @@ from dynamo.runtime import DistributedRuntime, dynamo_worker
DEFAULT_ENDPOINT = "dyn://dynamo.backend.generate" DEFAULT_ENDPOINT = "dyn://dynamo.backend.generate"
DEFAULT_MODEL = "Qwen/Qwen3-0.6B" DEFAULT_MODEL = "Qwen/Qwen3-0.6B"
DEFAULT_TEMPERATURE = 0.7
class Config: class Config:
...@@ -69,7 +70,8 @@ class RequestHandler: ...@@ -69,7 +70,8 @@ class RequestHandler:
# print(f"Received request: {request}") # print(f"Received request: {request}")
prompt = TokensPrompt(prompt_token_ids=request["token_ids"]) prompt = TokensPrompt(prompt_token_ids=request["token_ids"])
sampling_params = SamplingParams( sampling_params = SamplingParams(
temperature=request["sampling_options"]["temperature"], temperature=request["sampling_options"]["temperature"]
or DEFAULT_TEMPERATURE,
# vllm defaults this to 16 # vllm defaults this to 16
max_tokens=request["stop_conditions"]["max_tokens"], max_tokens=request["stop_conditions"]["max_tokens"],
) )
......
...@@ -144,11 +144,7 @@ impl LocalModelBuilder { ...@@ -144,11 +144,7 @@ impl LocalModelBuilder {
endpoint_id, endpoint_id,
template, template,
http_port: self.http_port, http_port: self.http_port,
// We always have one. The Option is so we can take it. router_config: self.router_config.take().unwrap_or_default(),
router_config: self
.router_config
.take()
.expect("unreachable, RouterConfig missing"),
}); });
} }
...@@ -203,10 +199,7 @@ impl LocalModelBuilder { ...@@ -203,10 +199,7 @@ impl LocalModelBuilder {
endpoint_id, endpoint_id,
template, template,
http_port: self.http_port, http_port: self.http_port,
router_config: self router_config: self.router_config.take().unwrap_or_default(),
.router_config
.take()
.expect("unreachable, RouterConfig missing"),
}) })
} }
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment