[Core] Asynchronous Output Processor (#7049)

Co-authored-by: Alexander Matveev <alexm@neuralmagic.com>

[Core] Asynchronous Output Processor (#7049)
Co-authored-by: Alexander Matveev <alexm@neuralmagic.com>
2eedede8 · Megha Agarwal · GitHub · 015e6cc2 · 2eedede8
Unverified Commit 2eedede8 authored Aug 26, 2024 by Megha Agarwal Committed by GitHub Aug 26, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 7 additions and 1 deletion

vllm/worker/worker_base.py vllm/worker/worker_base.py +7 -1

No files found.
--- a/vllm/worker/worker_base.py
+++ b/vllm/worker/worker_base.py
@@ -263,6 +263,12 @@ class LocalOrDistributedWorkerBase(WorkerBase):
            broadcast_data.update(kwargs)
            broadcast_tensor_dict(broadcast_data, src=0)

+        if execute_model_req.output_proc_callback_fn:
+            model_input = dataclasses.replace(  # type: ignore
+                model_input,
+                output_proc_callback_fn=execute_model_req.
+                output_proc_callback_fn)
+
        return model_input, worker_input, kwargs

    def prepare_input(
@@ -289,7 +295,7 @@ class LocalOrDistributedWorkerBase(WorkerBase):

    def execute_model(
        self,
-        execute_model_req: Optional[ExecuteModelRequest] = None
+        execute_model_req: Optional[ExecuteModelRequest] = None,
    ) -> Optional[List[SamplerOutput]]:
        """Executes at least one model step on the given sequences, unless no
        sequences are provided."""