[Bugfix] Fix type annotations in CPU model runner (#4256)

e73ed0f1 · Woosuk Kwon · GitHub · 296cdf8a · e73ed0f1
Unverified Commit e73ed0f1 authored Apr 22, 2024 by Woosuk Kwon Committed by GitHub Apr 22, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 4 additions and 3 deletions

vllm/worker/cpu_model_runner.py vllm/worker/cpu_model_runner.py +4 -3

No files found.
--- a/vllm/worker/cpu_model_runner.py
+++ b/vllm/worker/cpu_model_runner.py
@@ -73,7 +73,8 @@ class CPUModelRunner:
    def _prepare_prompt(
        self,
        seq_group_metadata_list: List[SequenceGroupMetadata],
-    ) -> Tuple[torch.Tensor, torch.Tensor, AttentionMetadata, List[int]]:
+    ) -> Tuple[torch.Tensor, torch.Tensor, AttentionMetadata, List[int],
+               Optional[torch.Tensor]]:
        assert len(seq_group_metadata_list) > 0
        input_tokens: List[int] = []
        input_positions: List[int] = []
@@ -347,8 +348,8 @@ class CPUModelRunner:
    def prepare_input_tensors(
        self,
        seq_group_metadata_list: List[SequenceGroupMetadata],
-    ) -> Tuple[torch.Tensor, torch.Tensor, AttentionMetadata,
+    ) -> Tuple[torch.Tensor, torch.Tensor, AttentionMetadata, SamplingMetadata,
-               SamplingMetadata]:
+               Optional[torch.Tensor]]:
        multi_modal_input = None
        if self.is_driver_worker:
            # NOTE: We assume that all sequences in the group are all prompts or