# Ideally we would map NVCreateChatCompletionRequest into Python so it can be type checked, but
# it has a lot of fields.
...
...
@@ -306,7 +100,7 @@ class VllmProcessor:
)->AsyncGenerator[dict[str,Any],None]:
"""
Run a single request through the engine. Does pre and post processing on this machine, delegates
model inference to a worker using the router.
model inference to a backend using the router.
"""
# ** VllmProcessor.generator called: {'messages': [{'role': 'user', 'content': 'What is the capital of Tuvalu?'}], 'model': '/home/grahamk/llms/Qwen3-0.6B', 'max_completion_tokens': 1000, 'stream': False}