Unverified Commit cc40af70 authored by Alec's avatar Alec Committed by GitHub
Browse files

fix: dynamo-run pass proper args using register-llm (#1230)

parent e450c2c7
...@@ -157,8 +157,8 @@ async def init(runtime: DistributedRuntime, config: Config): ...@@ -157,8 +157,8 @@ async def init(runtime: DistributedRuntime, config: Config):
# KV routing relies on logging KV metrics # KV routing relies on logging KV metrics
"disable_log_stats": False, "disable_log_stats": False,
} }
if config.kv_block_size: assert config.kv_block_size > 0, "Must use non-negative integer for KV Block Size"
arg_map["block_size"] = config.kv_block_size arg_map["block_size"] = config.kv_block_size
if config.context_length: if config.context_length:
# Usually we want it to default to the max (from tokenizer_config.json) # Usually we want it to default to the max (from tokenizer_config.json)
...@@ -201,7 +201,14 @@ async def init(runtime: DistributedRuntime, config: Config): ...@@ -201,7 +201,14 @@ async def init(runtime: DistributedRuntime, config: Config):
engine_client = await engine_context.__aenter__() engine_client = await engine_context.__aenter__()
await register_llm( await register_llm(
ModelType.Backend, endpoint, config.model_path, config.model_name ModelType.Backend,
endpoint,
config.model_path,
config.model_name,
context_length=arg_map.get(
"max_model_len", None
), # if None, takes length from tokenizer
kv_cache_block_size=arg_map["block_size"],
) )
handler = RequestHandler(component, engine_client, default_sampling_params) handler = RequestHandler(component, engine_client, default_sampling_params)
handler.setup_kv_metrics() handler.setup_kv_metrics()
......
...@@ -603,7 +603,7 @@ class ModelType: ...@@ -603,7 +603,7 @@ class ModelType:
"""What type of request this model needs: Chat, Component or Backend (pre-processed)""" """What type of request this model needs: Chat, Component or Backend (pre-processed)"""
... ...
async def register_llm(model_type: ModelType, endpoint: Endpoint, model_path: str, model_name: Optional[str]) -> None: async def register_llm(model_type: ModelType, endpoint: Endpoint, model_path: str, model_name: Optional[str] = None, context_length: Optional[int] = None, kv_cache_block_size: Optional[int] = None) -> None:
"""Attach the model at path to the given endpoint, and advertise it as model_type""" """Attach the model at path to the given endpoint, and advertise it as model_type"""
... ...
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment