[CI/Build] drop support for Python 3.8 EOL (#8464)

Signed-off-by: Aaron Pham <contact@aarnphm.xyz>

[CI/Build] drop support for Python 3.8 EOL (#8464)
Signed-off-by: Aaron Pham <contact@aarnphm.xyz>
21063c11 · Aaron Pham · GitHub · 4be3a451 · 21063c11 · 21063c11
Unverified Commit 21063c11 authored Nov 06, 2024 by Aaron Pham Committed by GitHub Nov 06, 2024
15 changed files
--- a/vllm/model_executor/models/qwen2_vl.py
+++ b/vllm/model_executor/models/qwen2_vl.py
-# coding=utf-8
 # Adapted from
 # https://github.com/huggingface/transformers/blob/19e6e80e10118f855137b90740936c0b11ac397f/src/transformers/models/qwen2_vl/modeling_qwen2_vl.py
 # Copyright 2024 The Qwen team.
@@ -246,9 +245,8 @@ class Qwen2VisionAttention(nn.Module):
        q, k, v = dist_utils.split_tensor_along_last_dim(x, 3)
        batch_size = q.shape[1]
-        q, k, v = [
+        q, k, v = (rearrange(x, "s b ... -> b s ...").contiguous()
-            rearrange(x, "s b ... -> b s ...").contiguous() for x in (q, k, v)
+                   for x in (q, k, v))
-        ]
        if rotary_pos_emb is not None:
            q = apply_rotary_pos_emb_vision(q, rotary_pos_emb)
            k = apply_rotary_pos_emb_vision(k, rotary_pos_emb)
@@ -258,7 +256,7 @@ class Qwen2VisionAttention(nn.Module):
            #   flash_attn_varlen_func)
            from flash_attn import flash_attn_varlen_func
-            q, k, v = [rearrange(x, "b s ... -> (b s) ...") for x in [q, k, v]]
+            q, k, v = (rearrange(x, "b s ... -> (b s) ...") for x in [q, k, v])
            max_seqlen = (cu_seqlens[1:] - cu_seqlens[:-1]).max().item()
            output = flash_attn_varlen_func(q,
@@ -276,7 +274,7 @@ class Qwen2VisionAttention(nn.Module):
                                      b=batch_size)
        elif self.attn_backend == _Backend.TORCH_SDPA:
            seq_length = q.size(1)
-            q, k, v = [rearrange(x, "b s h d -> b h s d") for x in [q, k, v]]
+            q, k, v = (rearrange(x, "b s h d -> b h s d") for x in [q, k, v])
            attention_mask = torch.zeros([1, seq_length, seq_length],
                                         device=q.device,
                                         dtype=torch.bool)

--- a/vllm/model_executor/models/solar.py
+++ b/vllm/model_executor/models/solar.py
-# coding=utf-8
 # Adapted from
 # https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/llama/modeling_llama.py
 # Copyright 2023 The vLLM team.

--- a/vllm/model_executor/models/stablelm.py
+++ b/vllm/model_executor/models/stablelm.py
-# coding=utf-8
 # Copyright 2023 Stability AI, EleutherAI, and The HuggingFace Inc. team.
 # All rights reserved.
 #

--- a/vllm/model_executor/models/starcoder2.py
+++ b/vllm/model_executor/models/starcoder2.py
-# coding=utf-8
 # Copyright 2024 BigCode and the HuggingFace Inc. team. All rights reserved.
 #
 # This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX

--- a/vllm/model_executor/models/xverse.py
+++ b/vllm/model_executor/models/xverse.py
-# coding=utf-8
 # Adapted from
 # https://huggingface.co/xverse/XVERSE-7B/blob/main/modeling_xverse.py
 # Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.

--- a/vllm/multimodal/base.py
+++ b/vllm/multimodal/base.py
-import sys
 from abc import ABC, abstractmethod
 from collections import UserDict, defaultdict
 from typing import (TYPE_CHECKING, Any, Callable, Dict, List, Mapping,
@@ -34,14 +33,9 @@ A dictionary containing nested tensors which have been batched via
 :meth:`MultiModalInputs.batch`.
 """
-if sys.version_info < (3, 9):
-    # UserDict cannot be subscripted
-    class _MultiModalInputsBase(UserDict):
-        pass
-else:
-    class _MultiModalInputsBase(UserDict[str, NestedTensors]):
+class _MultiModalInputsBase(UserDict[str, NestedTensors]):
-        pass
+    pass
 class MultiModalInputs(_MultiModalInputsBase):
@@ -262,18 +256,23 @@ class MultiModalPlugin(ABC):
                logger.warning(
                    "Model class %s already has an input mapper "
                    "registered to %s. It is overwritten by the new one.",
-                    model_cls, self)
+                    model_cls,
+                    self,
+                )
-            self._input_mappers[model_cls] = mapper \
+            self._input_mappers[model_cls] = (mapper
-                or self._default_input_mapper
+                                              or self._default_input_mapper)
            return model_cls
        return wrapper
-    def map_input(self, model_config: "ModelConfig",
+    def map_input(
-                  data: MultiModalData[object],
+        self,
-                  mm_processor_kwargs: Dict[str, Any]) -> MultiModalInputs:
+        model_config: "ModelConfig",
+        data: MultiModalData[object],
+        mm_processor_kwargs: Dict[str, Any],
+    ) -> MultiModalInputs:
        """
        Transform the data into a dictionary of model inputs using the
        input mapper registered for that model.
@@ -348,13 +347,15 @@ class MultiModalPlugin(ABC):
                logger.warning(
                    "Model class %s already calculates maximum number of "
                    "tokens in %s. It is overwritten by the new one.",
-                    model_cls, self)
+                    model_cls,
+                    self,
+                )
            if isinstance(max_mm_tokens, int):
                self._validate_max_multimodal_tokens(max_mm_tokens)
-            self._max_mm_tokens[model_cls] = max_mm_tokens \
+            self._max_mm_tokens[model_cls] = (
-                or self._default_max_multimodal_tokens
+                max_mm_tokens or self._default_max_multimodal_tokens)
            return model_cls
@@ -482,8 +483,10 @@ class MultiModalPlaceholderMap:
        placeholder_maps: Dict[str, MultiModalPlaceholderMap] = defaultdict(
            MultiModalPlaceholderMap)
-        for modality, placeholders in seq_group.multi_modal_placeholders.items(
+        for (
-        ):
+                modality,
+                placeholders,
+        ) in seq_group.multi_modal_placeholders.items():
            mm_items = mm_data.pop(modality)
            if not isinstance(mm_items, list):
                mm_items = [mm_items]
@@ -499,8 +502,11 @@ class MultiModalPlaceholderMap:
        return mm_data, placeholder_maps
    def append_items_from_seq_group(
-            self, positions: range, multi_modal_items: List[_T],
+        self,
-            multi_modal_placeholders: List[PlaceholderRange]) -> List[_T]:
+        positions: range,
+        multi_modal_items: List[_T],
+        multi_modal_placeholders: List[PlaceholderRange],
+    ) -> List[_T]:
        """
        Adds the multi-modal items that intersect ```positions`` to this
        placeholder map and returns the intersecting items.
@@ -515,20 +521,26 @@ class MultiModalPlaceholderMap:
                                             multi_modal_items):
            placeholder = range(
                placeholder_dict["offset"],
-                placeholder_dict["offset"] + placeholder_dict["length"])
+                placeholder_dict["offset"] + placeholder_dict["length"],
-            intersection = range(max(positions.start, placeholder.start),
+            )
-                                 min(positions.stop, placeholder.stop))
+            intersection = range(
+                max(positions.start, placeholder.start),
+                min(positions.stop, placeholder.stop),
+            )
            if not intersection:
                # Skip this multi-modal item.
                continue
-            token_embedding_range = range(intersection.start - positions.start,
+            token_embedding_range = range(
-                                          intersection.stop - positions.start)
+                intersection.start - positions.start,
+                intersection.stop - positions.start,
+            )
            multimodal_embedding_range = range(
                intersection.start - placeholder.start + self.src_len,
-                intersection.stop - placeholder.start + self.src_len)
+                intersection.stop - placeholder.start + self.src_len,
+            )
            intersecting_items.append(mm_item)
            self.dest_ranges.append(token_embedding_range)

--- a/vllm/prompt_adapter/utils.py
+++ b/vllm/prompt_adapter/utils.py
@@ -37,9 +37,8 @@ def load_peft_weights(model_id: str,
            Additional arguments to pass to the `hf_hub_download` method when 
            loading from the HuggingFace Hub.
    """
-    path = (os.path.join(model_id, hf_hub_download_kwargs["subfolder"])
+    path = (os.path.join(model_id, hf_hub_download_kwargs["subfolder"]) if
-            if hf_hub_download_kwargs.get("subfolder", None) is not None else
+            hf_hub_download_kwargs.get("subfolder") is not None else model_id)
-            model_id)
    if device is None:
        device = infer_device()
@@ -51,19 +50,19 @@ def load_peft_weights(model_id: str,
        filename = os.path.join(path, WEIGHTS_NAME)
        use_safetensors = False
    else:
-        token = hf_hub_download_kwargs.get("token", None)
+        token = hf_hub_download_kwargs.get("token")
        if token is None:
-            token = hf_hub_download_kwargs.get("use_auth_token", None)
+            token = hf_hub_download_kwargs.get("use_auth_token")
        hub_filename = (os.path.join(hf_hub_download_kwargs["subfolder"],
                                     SAFETENSORS_WEIGHTS_NAME)
-                        if hf_hub_download_kwargs.get("subfolder", None)
+                        if hf_hub_download_kwargs.get("subfolder") is not None
-                        is not None else SAFETENSORS_WEIGHTS_NAME)
+                        else SAFETENSORS_WEIGHTS_NAME)
        has_remote_safetensors_file = file_exists(
            repo_id=model_id,
            filename=hub_filename,
-            revision=hf_hub_download_kwargs.get("revision", None),
+            revision=hf_hub_download_kwargs.get("revision"),
-            repo_type=hf_hub_download_kwargs.get("repo_type", None),
+            repo_type=hf_hub_download_kwargs.get("repo_type"),
            token=token,
        )
        use_safetensors = has_remote_safetensors_file

--- a/vllm/transformers_utils/config.py
+++ b/vllm/transformers_utils/config.py
@@ -308,7 +308,7 @@ def load_params_config(model, revision) -> PretrainedConfig:
        config_path = Path(
            hf_hub_download(model, config_file_name, revision=revision))
-    with open(config_path, "r") as file:
+    with open(config_path) as file:
        config_dict = json.load(file)
    config_mapping = {

--- a/vllm/transformers_utils/configs/chatglm.py
+++ b/vllm/transformers_utils/configs/chatglm.py
-# coding=utf-8
 # Adapted from
 # https://github.com/THUDM/ChatGLM2-6B
 from transformers import PretrainedConfig

--- a/vllm/transformers_utils/configs/exaone.py
+++ b/vllm/transformers_utils/configs/exaone.py
-# coding=utf-8
 # Copied from
 # https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/blob/main/configuration_exaone.py
 # Copyright 2021 The LG AI Research EXAONE Lab. All rights reserved.

--- a/vllm/transformers_utils/configs/jais.py
+++ b/vllm/transformers_utils/configs/jais.py
-# coding=utf-8
 # Copyright 2023 The OpenAI Team Authors and HuggingFace Inc. team.
 # Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
 # Copyright 2023 Cerebras Systems.

--- a/vllm/transformers_utils/configs/mpt.py
+++ b/vllm/transformers_utils/configs/mpt.py
-# coding=utf-8
 # Copied from
 # https://huggingface.co/mosaicml/mpt-7b/blob/main/configuration_mpt.py
 """A HuggingFace-style model configuration."""
@@ -117,10 +116,10 @@ class MPTConfig(PretrainedConfig):
                                                     init_config_defaults)
        if self.d_model % self.n_heads != 0:
            raise ValueError('d_model must be divisible by n_heads')
-        if any((
+        if any(
                prob < 0 or prob > 1 for prob in
-            [self.attn_config['attn_pdrop'], self.resid_pdrop, self.emb_pdrop]
+            [self.attn_config['attn_pdrop'], self.resid_pdrop, self.emb_pdrop
-        )):
+             ]):
            raise ValueError(
                "self.attn_config['attn_pdrop'], resid_pdrop, emb_pdrop are "
                "probabilities and must be between 0 and 1")

--- a/vllm/transformers_utils/configs/nemotron.py
+++ b/vllm/transformers_utils/configs/nemotron.py
-# coding=utf-8
 # Copyright 2024 HuggingFace Inc. team. All rights reserved.
 # Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
 #
@@ -144,7 +143,7 @@ class NemotronConfig(PretrainedConfig):
        self.intermediate_size = intermediate_size
        self.num_hidden_layers = num_hidden_layers
        self.num_attention_heads = num_attention_heads
-        head_dim = head_dim or kwargs.get("kv_channels", None)
+        head_dim = head_dim or kwargs.get("kv_channels")
        self.head_dim = head_dim if head_dim is not None else (
            hidden_size // num_attention_heads)
@@ -160,8 +159,8 @@ class NemotronConfig(PretrainedConfig):
        self.rope_theta = rope_theta
        self.rope_scaling = rope_scaling
        # for backward compatibility
-        partial_rotary_factor = kwargs.get("rope_percent", None) or kwargs.get(
+        partial_rotary_factor = kwargs.get("rope_percent") or kwargs.get(
-            "rope_percentage", None) or partial_rotary_factor
+            "rope_percentage") or partial_rotary_factor
        self.partial_rotary_factor = partial_rotary_factor
        self._rope_scaling_validation()
        self.attention_bias = attention_bias

--- a/vllm/transformers_utils/configs/solar.py
+++ b/vllm/transformers_utils/configs/solar.py
-# coding=utf-8
 # Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
 #
 # This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX

--- a/vllm/utils.py
+++ b/vllm/utils.py
@@ -1153,7 +1153,7 @@ class SortedHelpFormatter(argparse.HelpFormatter):
    def add_arguments(self, actions):
        actions = sorted(actions, key=lambda x: x.option_strings)
-        super(SortedHelpFormatter, self).add_arguments(actions)
+        super().add_arguments(actions)
 class FlexibleArgumentParser(argparse.ArgumentParser):
@@ -1279,7 +1279,7 @@ class FlexibleArgumentParser(argparse.ArgumentParser):
        config: Dict[str, Union[int, str]] = {}
        try:
-            with open(file_path, 'r') as config_file:
+            with open(file_path) as config_file:
                config = yaml.safe_load(config_file)
        except Exception as ex:
            logger.error(