[CI/Build] Update Ruff version (#8469)

Signed-off-by: Aaron Pham <contact@aarnphm.xyz> Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>

[CI/Build] Update Ruff version (#8469)
Signed-off-by: Aaron Pham <contact@aarnphm.xyz> Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
9d104b5b · Aaron Pham · GitHub · 6ffa3f31 · 9d104b5b · 9d104b5b
Unverified Commit 9d104b5b authored Sep 18, 2024 by Aaron Pham Committed by GitHub Sep 18, 2024
20 changed files
--- a/.github/workflows/ruff.yml
+++ b/.github/workflows/ruff.yml
@@ -25,10 +25,10 @@ jobs:
    - name: Install dependencies
      run: |
        python -m pip install --upgrade pip
-        pip install ruff==0.1.5 codespell==2.3.0 tomli==2.0.1 isort==5.13.2
+        pip install -r requirements-lint.txt
    - name: Analysing the code with ruff
      run: |
-        ruff .
+        ruff check .
    - name: Spelling check with codespell
      run: |
        codespell --toml pyproject.toml

--- a/benchmarks/kernels/graph_machete_bench.py
+++ b/benchmarks/kernels/graph_machete_bench.py
@@ -45,8 +45,7 @@ if __name__ == "__main__":
    rows = int(math.ceil(len(results) / 2))
    fig, axs = plt.subplots(rows, 2, figsize=(12, 5 * rows))
    axs = axs.flatten()
-    axs_idx = 0
-    for shape, data in results.items():
+    for axs_idx, (shape, data) in enumerate(results.items()):
        plt.sca(axs[axs_idx])
        df = pd.DataFrame(data)
        sns.lineplot(data=df,
@@ -59,6 +58,5 @@ if __name__ == "__main__":
                     palette="Dark2")
        plt.title(f"Shape: {shape}")
        plt.ylabel("time (median, s)")
-        axs_idx += 1
    plt.tight_layout()
    plt.savefig("graph_machete_bench.pdf")
--- a/format.sh
+++ b/format.sh
@@ -159,7 +159,7 @@ echo 'vLLM codespell: Done'

 # Lint specified files
 lint() {
-    ruff "$@"
+    ruff check "$@"
 }

 # Lint files that differ from main branch. Ignores dirs that are not slated
@@ -175,7 +175,7 @@ lint_changed() {

    if ! git diff --diff-filter=ACM --quiet --exit-code "$MERGEBASE" -- '*.py' '*.pyi' &>/dev/null; then
        git diff --name-only --diff-filter=ACM "$MERGEBASE" -- '*.py' '*.pyi' | xargs \
-             ruff
+             ruff check
    fi

 }

--- a/pyproject.toml
+++ b/pyproject.toml
@@ -42,6 +42,8 @@ ignore = [
    "E731",
    # Loop control variable not used within loop body
    "B007",
+    # f-string format
+    "UP032",
 ]

 [tool.mypy]

--- a/requirements-lint.txt
+++ b/requirements-lint.txt
@@ -2,7 +2,7 @@
 yapf==0.32.0
 toml==0.10.2
 tomli==2.0.1
-ruff==0.1.5
+ruff==0.6.5
 codespell==2.3.0
 isort==5.13.2
 clang-format==18.1.5

--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -158,10 +158,7 @@ def should_do_global_cleanup_after_test(request) -> bool:
    to initialize torch.
    """

-    if request.node.get_closest_marker("skip_global_cleanup"):
-        return False
-
-    return True
+    return not request.node.get_closest_marker("skip_global_cleanup")


 @pytest.fixture(autouse=True)

--- a/tests/lora/conftest.py
+++ b/tests/lora/conftest.py
@@ -65,10 +65,7 @@ def should_do_global_cleanup_after_test(request) -> bool:
    to initialize torch.
    """

-    if request.node.get_closest_marker("skip_global_cleanup"):
-        return False
-
-    return True
+    return not request.node.get_closest_marker("skip_global_cleanup")


 @pytest.fixture(autouse=True)

--- a/tests/multimodal/test_base.py
+++ b/tests/multimodal/test_base.py
@@ -5,7 +5,7 @@ from vllm.multimodal.base import MultiModalInputs, NestedTensors

 def assert_nested_tensors_equal(expected: NestedTensors,
                                actual: NestedTensors):
-    assert type(expected) == type(actual)
+    assert type(expected) == type(actual)  # noqa: E721
    if isinstance(expected, torch.Tensor):
        assert torch.equal(expected, actual)
    else:

--- a/tests/test_cache_block_hashing.py
+++ b/tests/test_cache_block_hashing.py
@@ -66,8 +66,7 @@ def test_auto_prefix_caching(model: str, block_size: int, max_num_seqs: int,

            hashes.append([])
            prompts = [prefix + prompt for prompt in sample_prompts]
-            seq_id = 0
-            for prompt in prompts:
+            for seq_id, prompt in enumerate(prompts):
                hashes[-1].append([])
                prompt_token_ids = tokenizer.encode(prompt)
                seq = Sequence(seq_id,
@@ -83,8 +82,6 @@ def test_auto_prefix_caching(model: str, block_size: int, max_num_seqs: int,
                for idx in range(num_blocks):
                    hashes[-1][-1].append(seq.hash_of_block(idx))

-                seq_id += 1
-
    # Check that hashes made with two prefixes with different first blocks are
    # different everywhere.
    for hash0, hash1 in zip(flatten_2d(hashes[0]), flatten_2d(hashes[1])):

--- a/tests/test_logger.py
+++ b/tests/test_logger.py
@@ -111,7 +111,7 @@ def test_an_error_is_raised_when_custom_logging_config_file_does_not_exist():
    configuration occurs."""
    with pytest.raises(RuntimeError) as ex_info:
        _configure_vllm_root_logger()
-    assert ex_info.type == RuntimeError
+    assert ex_info.type == RuntimeError  # noqa: E721
    assert "File does not exist" in str(ex_info)


@@ -152,7 +152,7 @@ def test_an_error_is_raised_when_custom_logging_config_is_unexpected_json(
                   logging_config_file.name):
            with pytest.raises(ValueError) as ex_info:
                _configure_vllm_root_logger()
-            assert ex_info.type == ValueError
+            assert ex_info.type == ValueError  # noqa: E721
            assert "Invalid logging config. Expected Dict, got" in str(ex_info)



--- a/tests/worker/test_encoder_decoder_model_runner.py
+++ b/tests/worker/test_encoder_decoder_model_runner.py
@@ -453,8 +453,7 @@ def test_prepare_decode(batch_size):
    # each sequence) in the decode phase

    expected_selected_token_indices = []
-    selected_token_start_idx = 0
-    for seq_len in seq_lens:
+    for selected_token_start_idx, seq_len in enumerate(seq_lens):
        # Compute the index offset of the final token in each
        # sequence's decoded outputs; since a single token is
        # decoded per iteration per sequence, then the length
@@ -463,7 +462,6 @@ def test_prepare_decode(batch_size):
        # generated tokens is 0 (i.e. the expected sampling index
        # for a given sequence is just `selected_token_start_idx`)
        expected_selected_token_indices.append(selected_token_start_idx)
-        selected_token_start_idx += 1

    sampling_metadata = model_input.sampling_metadata
    actual = sampling_metadata.selected_token_indices

--- a/tests/worker/test_model_runner.py
+++ b/tests/worker/test_model_runner.py
@@ -241,10 +241,8 @@ def test_prepare_decode_cuda_graph(batch_size):

    # Verify Sampling
    expected_selected_token_indices = []
-    selected_token_start_idx = 0
-    for _ in context_lens:
+    for selected_token_start_idx, _ in enumerate(context_lens):
        expected_selected_token_indices.append(selected_token_start_idx)
-        selected_token_start_idx += 1
    sampling_metadata = SamplingMetadata.prepare(
        seq_group_metadata_list,
        seq_lens,

--- a/vllm/adapter_commons/utils.py
+++ b/vllm/adapter_commons/utils.py
@@ -42,7 +42,7 @@ def list_adapters(registered_adapters: Dict[int, Any]) -> Dict[int, Any]:

 def get_adapter(adapter_id: int,
                registered_adapters: Dict[int, Any]) -> Optional[Any]:
-    return registered_adapters.get(adapter_id, None)
+    return registered_adapters.get(adapter_id)


 ## worker functions

--- a/vllm/attention/backends/utils.py
+++ b/vllm/attention/backends/utils.py
@@ -33,10 +33,8 @@ def is_block_tables_empty(block_tables: Union[None, Dict]):
    """
    if block_tables is None:
        return True
-    if isinstance(block_tables, dict) and all(
-            value is None for value in block_tables.values()):
-        return True
-    return False
+    return (isinstance(block_tables, dict)
+            and all(value is None for value in block_tables.values()))


 def compute_slot_mapping_start_idx(is_prompt: bool, query_len: int,

--- a/vllm/core/block/prefix_caching_block.py
+++ b/vllm/core/block/prefix_caching_block.py
@@ -417,9 +417,7 @@ class PrefixCachingBlockAllocator(BlockAllocator):

    def is_block_cached(self, block: Block) -> bool:
        assert block.content_hash is not None
-        if block.content_hash in self._cached_blocks:
-            return True
-        return False
+        return block.content_hash in self._cached_blocks

    def promote_to_immutable_block(self, block: Block) -> BlockId:
        """Once a mutable block is full, it can be promoted to an immutable

--- a/vllm/core/block_manager_v2.py
+++ b/vllm/core/block_manager_v2.py
@@ -399,9 +399,7 @@ class BlockSpaceManagerV2(BlockSpaceManager):
        """
        alloc_status = self._can_swap(seq_group, Device.CPU,
                                      SequenceStatus.RUNNING)
-        if alloc_status == AllocStatus.OK:
-            return True
-        return False
+        return alloc_status == AllocStatus.OK

    def swap_out(self, seq_group: SequenceGroup) -> List[Tuple[int, int]]:
        """Returns the block id mapping (from GPU to CPU) generated by

--- a/vllm/engine/async_llm_engine.py
+++ b/vllm/engine/async_llm_engine.py
@@ -826,7 +826,7 @@ class AsyncLLMEngine:
            request_id: The unique id of the request.
            lora_request: LoRA request to use for generation, if any.
            trace_headers: OpenTelemetry trace headers.
-            prompt_adapter_request: Prompt Adapter request to use 
+            prompt_adapter_request: Prompt Adapter request to use
                                            for generation, if any.

        Yields:
@@ -1042,7 +1042,7 @@ class AsyncLLMEngine:
    async def start_profile(self) -> None:
        # using type instead of isinstance to check to avoid capturing
        # inherited classes
-        if type(self.engine.model_executor) == GPUExecutorAsync:
+        if type(self.engine.model_executor) == GPUExecutorAsync:  # noqa: E721
            self.engine.model_executor.start_profile()
        else:
            self.engine.model_executor._run_workers("start_profile")
@@ -1050,7 +1050,7 @@ class AsyncLLMEngine:
    async def stop_profile(self) -> None:
        # using type instead of isinstance to check to avoid capturing
        # inherited classes
-        if type(self.engine.model_executor) == GPUExecutorAsync:
+        if type(self.engine.model_executor) == GPUExecutorAsync:  # noqa: E721
            self.engine.model_executor.stop_profile()
        else:
            self.engine.model_executor._run_workers("stop_profile")
--- a/vllm/engine/llm_engine.py
+++ b/vllm/engine/llm_engine.py
@@ -144,7 +144,7 @@ class LLMEngine:
            decoding.
        executor_class: The model executor class for managing distributed
            execution.
-        prompt_adapter_config (Optional): The configuration related to serving 
+        prompt_adapter_config (Optional): The configuration related to serving
            prompt adapters.
        log_stats: Whether to log statistics.
        usage_context: Specified entry point, used for usage info collection.
@@ -1605,7 +1605,7 @@ class LLMEngine:
    def start_profile(self) -> None:
        # using type instead of isinstance to check to avoid capturing
        # inherited classes (MultiprocessingGPUExecutor)
-        if type(self.model_executor) == GPUExecutor:
+        if type(self.model_executor) == GPUExecutor:  # noqa: E721
            self.model_executor.start_profile()
        else:
            self.model_executor._run_workers("start_profile")
@@ -1613,7 +1613,7 @@ class LLMEngine:
    def stop_profile(self) -> None:
        # using type instead of isinstance to check to avoid capturing
        # inherited classes (MultiprocessingGPUExecutor)
-        if type(self.model_executor) == GPUExecutor:
+        if type(self.model_executor) == GPUExecutor:  # noqa: E721
            self.model_executor.stop_profile()
        else:
            self.model_executor._run_workers("stop_profile")

--- a/vllm/model_executor/guided_decoding/outlines_logits_processors.py
+++ b/vllm/model_executor/guided_decoding/outlines_logits_processors.py
@@ -67,9 +67,9 @@ class BaseLogitsProcessor:
        instruction = self._guide.get_next_instruction(
            state=self._fsm_state[seq_id])

-        if type(instruction) == Generate:
+        if type(instruction) == Generate:  # noqa: E721
            allowed_tokens = instruction.tokens
-        elif type(instruction) == Write:
+        elif type(instruction) == Write:  # noqa: E721
            # TODO: support fast forward tokens
            allowed_tokens = [instruction.tokens[0]]
        else:

--- a/vllm/model_executor/layers/quantization/awq_marlin.py
+++ b/vllm/model_executor/layers/quantization/awq_marlin.py
@@ -110,9 +110,9 @@ class AWQMarlinConfig(QuantizationConfig):
    def is_awq_marlin_compatible(cls, quant_config: Dict[str, Any]):
        # Extract data from quant config.
        quant_method = quant_config.get("quant_method", "").lower()
-        num_bits = quant_config.get("bits", None)
-        group_size = quant_config.get("group_size", None)
-        has_zp = quant_config.get("zero_point", None)
+        num_bits = quant_config.get("bits")
+        group_size = quant_config.get("group_size")
+        has_zp = quant_config.get("zero_point")

        if quant_method != "awq":
            return False