help="Determines how events are published [nats|zmq]",
help="Determines how events are published [nats|zmq]",
)
)
parser.add_argument(
parser.add_argument(
"--exp-python-factory",
"--chat-processor",
action="store_true",
dest="chat_processor",
default=False,
type=str,
help="[EXPERIMENTAL] Enable Python-based engine factory. When set, engines will be created via a Python callback instead of the default Rust pipeline.",
choices=["dynamo","vllm"],
default="dynamo",
help="[EXPERIMENTAL] When set to 'vllm', use local vllm for the pre and post processor.",
# Ideally we would map NVCreateChatCompletionRequest into Python so it can be type checked, but
# it has a lot of fields.
# request: dynamo.NVCreateChatCompletionRequest
asyncdefgenerator(
self,request:dict[str,Any]
)->AsyncGenerator[dict[str,Any],None]:
"""
Run a single request through the engine. Does pre and post processing on this machine, delegates
model inference to a worker using the router.
"""
# ** VllmProcessor.generator called: {'messages': [{'role': 'user', 'content': 'What is the capital of Tuvalu?'}], 'model': '/home/grahamk/llms/Qwen3-0.6B', 'max_completion_tokens': 1000, 'stream': False}