[CI/Build] drop support for Python 3.8 EOL (#8464)

Signed-off-by: Aaron Pham <contact@aarnphm.xyz>

[CI/Build] drop support for Python 3.8 EOL (#8464)
Signed-off-by: Aaron Pham <contact@aarnphm.xyz>
21063c11 · Aaron Pham · GitHub · 4be3a451 · 21063c11 · 21063c11
Unverified Commit 21063c11 authored Nov 06, 2024 by Aaron Pham Committed by GitHub Nov 06, 2024
20 changed files
--- a/vllm/model_executor/custom_op.py
+++ b/vllm/model_executor/custom_op.py
@@ -103,7 +103,7 @@ class CustomOp(nn.Module):
    # On by default if VLLM_TORCH_COMPILE_LEVEL < CompilationLevel.PIECEWISE
    # Specifying 'all' or 'none' in VLLM_CUSTOM_OPS takes precedence.
    @staticmethod
-    @lru_cache()
+    @lru_cache
    def default_on() -> bool:
        count_none = envs.VLLM_CUSTOM_OPS.count("none")
        count_all = envs.VLLM_CUSTOM_OPS.count("all")

--- a/vllm/model_executor/layers/resampler.py
+++ b/vllm/model_executor/layers/resampler.py
-# coding=utf-8
 # Adapted from
 # https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/llama/modeling_llama.py
 # https://huggingface.co/Qwen/Qwen-7B/blob/main/modeling_qwen.py

--- a/vllm/model_executor/layers/rotary_embedding.py
+++ b/vllm/model_executor/layers/rotary_embedding.py
-# coding=utf-8
 # Adapted from
 # https://github.com/huggingface/transformers/blob/v4.33.2/src/transformers/models/llama/modeling_llama.py
 # Copyright 2023 The vLLM team.

--- a/vllm/model_executor/model_loader/loader.py
+++ b/vllm/model_executor/model_loader/loader.py
@@ -746,7 +746,7 @@ class BitsAndBytesModelLoader(BaseModelLoader):

        config_file_path = self._get_config_file(qlora_adapter)

-        with open(config_file_path, "r") as f:
+        with open(config_file_path) as f:
            config = json.load(f)
            self.target_modules = config["target_modules"]


--- a/vllm/model_executor/model_loader/openvino.py
+++ b/vllm/model_executor/model_loader/openvino.py
@@ -190,7 +190,7 @@ def get_model(
    kv_cache_dtype: ov.Type,
    **kwargs,
 ) -> torch.nn.Module:
-    lora_config = kwargs.get("lora_config", None)
+    lora_config = kwargs.get("lora_config")
    ov_core = kwargs.get("ov_core")
    if lora_config:
        raise ValueError(

--- a/vllm/model_executor/model_loader/tensorizer.py
+++ b/vllm/model_executor/model_loader/tensorizer.py
@@ -280,7 +280,7 @@ class TensorizerAgent:
        self.tensorizer_args = (
            self.tensorizer_config._construct_tensorizer_args())
        self.extra_kwargs = extra_kwargs
-        if extra_kwargs.get("quant_config", None) is not None:
+        if extra_kwargs.get("quant_config") is not None:
            self.quant_config = extra_kwargs["quant_config"]
        else:
            self.quant_config = quant_config
@@ -380,8 +380,7 @@ def tensorizer_weights_iterator(
    stream = open_stream(tensorizer_args.tensorizer_uri, **stream_params)
    with TensorDeserializer(stream, **deserializer_args,
                            device="cpu") as state:
-        for name, param in state.items():
-            yield name, param
+        yield from state.items()
    del state



--- a/vllm/model_executor/model_loader/weight_utils.py
+++ b/vllm/model_executor/model_loader/weight_utils.py
@@ -188,7 +188,7 @@ def get_quant_config(model_config: ModelConfig,
            f"{quant_config_files}")

    quant_config_file = quant_config_files[0]
-    with open(quant_config_file, "r") as f:
+    with open(quant_config_file) as f:
        config = json.load(f)

        if model_config.quantization == "bitsandbytes":
@@ -306,7 +306,7 @@ def filter_duplicate_safetensors_files(hf_weights_files: List[str],

    # Iterate through the weight_map (weight_name: safetensors files)
    # to identify weights that we should use.
-    with open(index_file_name, "r") as f:
+    with open(index_file_name) as f:
        weight_map = json.load(f)["weight_map"]
    weight_files_in_index = set()
    for weight_name in weight_map:
@@ -382,7 +382,7 @@ def np_cache_weights_iterator(
            with open(weight_names_file, "w") as f:
                json.dump(weight_names, f)

-    with open(weight_names_file, "r") as f:
+    with open(weight_names_file) as f:
        weight_names = json.load(f)

    for name in weight_names:
@@ -423,8 +423,7 @@ def pt_weights_iterator(
            bar_format=_BAR_FORMAT,
    ):
        state = torch.load(bin_file, map_location="cpu")
-        for name, param in state.items():
-            yield name, param
+        yield from state.items()
        del state
        torch.cuda.empty_cache()


--- a/vllm/model_executor/models/arctic.py
+++ b/vllm/model_executor/models/arctic.py
@@ -48,7 +48,7 @@ class ArcticMLP(nn.Module):
                 is_residual_mlp: bool = False,
                 quant_config: Optional[QuantizationConfig] = None,
                 reduce_results: bool = True):
-        super(ArcticMLP, self).__init__()
+        super().__init__()
        self.hidden_size = config.hidden_size
        self.expert_id = expert_id
        self.layer_id = layer_id
@@ -89,7 +89,7 @@ class ArcticMoE(nn.Module):
                 params_dtype: Optional[torch.dtype] = None,
                 quant_config: Optional[QuantizationConfig] = None,
                 reduce_results: bool = True):
-        super(ArcticMoE, self).__init__()
+        super().__init__()

        self.tp_size = tp_size or get_tensor_model_parallel_world_size()
        self.hidden_size = config.hidden_size

--- a/vllm/model_executor/models/baichuan.py
+++ b/vllm/model_executor/models/baichuan.py
-# coding=utf-8
 # Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
 #
 # This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX

--- a/vllm/model_executor/models/bloom.py
+++ b/vllm/model_executor/models/bloom.py
-# coding=utf-8
 # Adapted from
 # https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/bloom/modeling_bloom.py
 # Copyright 2023 The vLLM team.

--- a/vllm/model_executor/models/chatglm.py
+++ b/vllm/model_executor/models/chatglm.py
-# coding=utf-8
 # Adapted from
 # https://github.com/THUDM/GLM-4
 """Inference-only ChatGLM model compatible with THUDM weights."""

--- a/vllm/model_executor/models/commandr.py
+++ b/vllm/model_executor/models/commandr.py
-# coding=utf-8
 # Copyright 2024 Cohere and the HuggingFace Inc. team. All rights reserved.
 #
 # This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX

--- a/vllm/model_executor/models/dbrx.py
+++ b/vllm/model_executor/models/dbrx.py
-# coding=utf-8
 from typing import Iterable, List, Optional, Tuple, Union

 import torch

--- a/vllm/model_executor/models/decilm.py
+++ b/vllm/model_executor/models/decilm.py
-# coding=utf-8
 # Adapted from
 # https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/llama/modeling_llama.py
 # Copyright 2023 DeciAI Research Team. All rights reserved.

--- a/vllm/model_executor/models/deepseek.py
+++ b/vllm/model_executor/models/deepseek.py
-# coding=utf-8
 # Adapted from
 # https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/llama/modeling_llama.py
 # Copyright 2023 The vLLM team.

--- a/vllm/model_executor/models/deepseek_v2.py
+++ b/vllm/model_executor/models/deepseek_v2.py
-# coding=utf-8
 # Adapted from
 # https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/llama/modeling_llama.py
 # Copyright 2023 The vLLM team.

--- a/vllm/model_executor/models/exaone.py
+++ b/vllm/model_executor/models/exaone.py
-# coding=utf-8
 # Adapted from
 # https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/blob/main/modeling_exaone.py
 # Copyright 2024 The LG U+ CTO AI Tech Lab.

--- a/vllm/model_executor/models/falcon.py
+++ b/vllm/model_executor/models/falcon.py
-# coding=utf-8
 # Adapted from
 # https://github.com/huggingface/transformers/blob/a5cc30d72ae2dc19af534e4b35c986cc28db1275/src/transformers/models/falcon/modeling_falcon.py
 # Copyright 2023 The vLLM team.

--- a/vllm/model_executor/models/fuyu.py
+++ b/vllm/model_executor/models/fuyu.py
-# coding=utf-8
 # adapted from https://github.com/huggingface/transformers/blob/v4.39.3/src/transformers/models/fuyu/modeling_fuyu.py
 # Copyright 2023 The vLLM team.
 # Copyright 2023 HuggingFace Inc. team. All rights reserved.

--- a/vllm/model_executor/models/gemma.py
+++ b/vllm/model_executor/models/gemma.py
-# coding=utf-8
 # Copyright 2023 The vLLM team.
 # Copyright (c) Google Inc.
 #