Unverified Commit 8291172f authored by Chi's avatar Chi Committed by GitHub
Browse files

fix: Move register_llm_block down (#2316)

parent 12fe3551
......@@ -145,16 +145,6 @@ async def init(runtime: DistributedRuntime, config: Config):
.client()
)
if not config.engine_args.data_parallel_rank: # if rank is 0 or None then register
await register_llm(
ModelType.Backend,
generate_endpoint,
config.model,
config.served_model_name,
kv_cache_block_size=config.engine_args.block_size,
migration_limit=config.migration_limit,
)
factory = StatLoggerFactory(component, config.engine_args.data_parallel_rank or 0)
engine_client, vllm_config, default_sampling_params = setup_vllm_engine(
config, factory
......@@ -190,6 +180,16 @@ async def init(runtime: DistributedRuntime, config: Config):
handler.kv_publisher = kv_publisher
if not config.engine_args.data_parallel_rank: # if rank is 0 or None then register
await register_llm(
ModelType.Backend,
generate_endpoint,
config.model,
config.served_model_name,
kv_cache_block_size=config.engine_args.block_size,
migration_limit=config.migration_limit,
)
try:
await asyncio.gather(
# for decode, we want to transfer the in-flight requests to other decode engines,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment