Merge tag 'v0.7.2' into v0.7.2-dev

66b809cc · zhuwenwen · 37b63c24 · 0408efc6 · 66b809cc · 66b809cc
Commit 66b809cc authored Feb 08, 2025 by zhuwenwen
20 changed files
--- a/tests/compile/piecewise/test_toy_llama.py
+++ b/tests/compile/piecewise/test_toy_llama.py
+# SPDX-License-Identifier: Apache-2.0
 """
 Test the piecewise compilation with a simple model, comparing the output
 with and without the piecewise compilation.

--- a/tests/compile/test_basic_correctness.py
+++ b/tests/compile/test_basic_correctness.py
+# SPDX-License-Identifier: Apache-2.0
 import dataclasses
 from typing import Dict, List, Optional

--- a/tests/compile/test_full_graph.py
+++ b/tests/compile/test_full_graph.py
+# SPDX-License-Identifier: Apache-2.0
 import pytest
 from vllm.config import CompilationLevel

--- a/tests/compile/test_functionalization.py
+++ b/tests/compile/test_functionalization.py
+# SPDX-License-Identifier: Apache-2.0
 import os
 import pytest
 import torch

--- a/tests/compile/test_fusion.py
+++ b/tests/compile/test_fusion.py
+# SPDX-License-Identifier: Apache-2.0
 import pytest
 import torch
 from compressed_tensors.quantization import FP8_DTYPE

--- a/tests/compile/test_pass_manager.py
+++ b/tests/compile/test_pass_manager.py
+# SPDX-License-Identifier: Apache-2.0
 import pickle
 import pytest

--- a/tests/compile/test_wrapper.py
+++ b/tests/compile/test_wrapper.py
+# SPDX-License-Identifier: Apache-2.0
 from typing import Optional
 import torch

--- a/tests/compile/utils.py
+++ b/tests/compile/utils.py
+# SPDX-License-Identifier: Apache-2.0
 import os
 import torch

--- a/tests/conftest.py
+++ b/tests/conftest.py
+# SPDX-License-Identifier: Apache-2.0
 import json
 import os
 import tempfile
@@ -738,6 +740,7 @@ class VllmRunner:
        images: Optional[PromptImageInput] = None,
        videos: Optional[PromptVideoInput] = None,
        audios: Optional[PromptAudioInput] = None,
+        **kwargs: Any,
    ) -> List[Tuple[List[List[int]], List[str]]]:
        inputs = self.get_inputs(prompts,
                                 images=images,
@@ -745,7 +748,8 @@ class VllmRunner:
                                 audios=audios)
        req_outputs = self.model.generate(inputs,
-                                          sampling_params=sampling_params)
+                                          sampling_params=sampling_params,
+                                          **kwargs)
        outputs: List[Tuple[List[List[int]], List[str]]] = []
        for req_output in req_outputs:
@@ -783,6 +787,7 @@ class VllmRunner:
        images: Optional[PromptImageInput] = None,
        audios: Optional[PromptAudioInput] = None,
        videos: Optional[PromptVideoInput] = None,
+        **kwargs: Any,
    ) -> Union[List[TokensTextLogprobs],
               List[TokensTextLogprobsPromptLogprobs]]:
        inputs = self.get_inputs(prompts,
@@ -791,7 +796,8 @@ class VllmRunner:
                                 audios=audios)
        req_outputs = self.model.generate(inputs,
-                                          sampling_params=sampling_params)
+                                          sampling_params=sampling_params,
+                                          **kwargs)
        toks_str_logsprobs_prompt_logprobs = (
            self._final_steps_generate_w_logprobs(req_outputs))
@@ -827,13 +833,15 @@ class VllmRunner:
        images: Optional[PromptImageInput] = None,
        videos: Optional[PromptVideoInput] = None,
        audios: Optional[PromptAudioInput] = None,
+        **kwargs: Any,
    ) -> List[Tuple[List[int], str]]:
        greedy_params = SamplingParams(temperature=0.0, max_tokens=max_tokens)
        outputs = self.generate(prompts,
                                greedy_params,
                                images=images,
                                videos=videos,
-                                audios=audios)
+                                audios=audios,
+                                **kwargs)
        return [(output_ids[0], output_str[0])
                for output_ids, output_str in outputs]
@@ -848,6 +856,7 @@ class VllmRunner:
        videos: Optional[PromptVideoInput] = None,
        stop_token_ids: Optional[List[int]] = None,
        stop: Optional[List[str]] = None,
+        **kwargs: Any,
    ) -> Union[List[TokensTextLogprobs],
               List[TokensTextLogprobsPromptLogprobs]]:
        greedy_logprobs_params = SamplingParams(
@@ -862,7 +871,8 @@ class VllmRunner:
                                        greedy_logprobs_params,
                                        images=images,
                                        audios=audios,
-                                        videos=videos)
+                                        videos=videos,
+                                        **kwargs)
    def generate_encoder_decoder_greedy_logprobs(
        self,

--- a/tests/core/block/conftest.py
+++ b/tests/core/block/conftest.py
+# SPDX-License-Identifier: Apache-2.0
 import pytest

--- a/tests/core/block/e2e/conftest.py
+++ b/tests/core/block/e2e/conftest.py
+# SPDX-License-Identifier: Apache-2.0
 from typing import Callable, Iterable, Optional
 import pytest

--- a/tests/core/block/e2e/test_correctness.py
+++ b/tests/core/block/e2e/test_correctness.py
+# SPDX-License-Identifier: Apache-2.0
 from itertools import cycle
 import pytest

--- a/tests/core/block/e2e/test_correctness_sliding_window.py
+++ b/tests/core/block/e2e/test_correctness_sliding_window.py
+# SPDX-License-Identifier: Apache-2.0
 import random
 from typing import List

--- a/tests/core/block/test_block_manager.py
+++ b/tests/core/block/test_block_manager.py
+# SPDX-License-Identifier: Apache-2.0
 import pytest
 from vllm.core.block.utils import (STR_NOT_IMPL_ENC_DEC_PREFIX_CACHE,

--- a/tests/core/block/test_block_table.py
+++ b/tests/core/block/test_block_table.py
+# SPDX-License-Identifier: Apache-2.0
 from typing import List
 import pytest

--- a/tests/core/block/test_common.py
+++ b/tests/core/block/test_common.py
+# SPDX-License-Identifier: Apache-2.0
 import random
 import pytest

--- a/tests/core/block/test_cpu_gpu_block_allocator.py
+++ b/tests/core/block/test_cpu_gpu_block_allocator.py
+# SPDX-License-Identifier: Apache-2.0
 import pytest
 from vllm.core.block.cpu_gpu_block_allocator import CpuGpuBlockAllocator

--- a/tests/core/block/test_naive_block.py
+++ b/tests/core/block/test_naive_block.py
+# SPDX-License-Identifier: Apache-2.0
 from typing import List, Optional
 import pytest

--- a/tests/core/block/test_prefix_caching_block.py
+++ b/tests/core/block/test_prefix_caching_block.py
+# SPDX-License-Identifier: Apache-2.0
 import math
 import random
 from typing import List, Optional
@@ -63,8 +65,8 @@ class TestPrefixCachingBlock:
        previous_block = MagicMock(spec=PrefixCachingBlock)
        prev_block_hash = random.randint(0, 1000)
-        previous_block.content_hash = (prev_block_hash
+        previous_block.content_hash = (prev_block_hash if prev_block_has_hash
-                                       if prev_block_has_hash else None)
+                                       else hash('None'))
        num_to_fill = block_size if is_curr_block_full else random.randint(
            0, block_size - 1)

--- a/tests/core/test_chunked_prefill_scheduler.py
+++ b/tests/core/test_chunked_prefill_scheduler.py
+# SPDX-License-Identifier: Apache-2.0
 from typing import List
 from unittest.mock import MagicMock