[CI/Build] drop support for Python 3.8 EOL (#8464)

Signed-off-by: Aaron Pham <contact@aarnphm.xyz>

[CI/Build] drop support for Python 3.8 EOL (#8464)
Signed-off-by: Aaron Pham <contact@aarnphm.xyz>
21063c11 · Aaron Pham · GitHub · 4be3a451 · 21063c11 · 21063c11
Unverified Commit 21063c11 authored Nov 06, 2024 by Aaron Pham Committed by GitHub Nov 06, 2024
20 changed files
--- a/vllm/model_executor/models/mllama.py
+++ b/vllm/model_executor/models/mllama.py
-# coding=utf-8
 # Copyright 2024 the HuggingFace Inc. team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");

--- a/vllm/model_executor/models/mlp_speculator.py
+++ b/vllm/model_executor/models/mlp_speculator.py
@@ -37,7 +37,7 @@ class MLPSpeculatorLayerNorm(nn.Module):
        eps=1e-06,
        elementwise_scale_and_shift=True,
    ):
-        super(MLPSpeculatorLayerNorm, self).__init__()
+        super().__init__()
        self.elementwise_scale_and_shift = elementwise_scale_and_shift
        if self.elementwise_scale_and_shift:
            self.weight = nn.Parameter(torch.empty(normalized_shape))

--- a/vllm/model_executor/models/molmo.py
+++ b/vllm/model_executor/models/molmo.py
@@ -1121,9 +1121,9 @@ class MolmoForCausalLM(nn.Module, SupportsMultiModal, SupportsPP):
            batch_size * num_image * num_patch, -1).contiguous()
        image_input_idx = image_input_idx * valid.to(image_input_idx.dtype)
-        offset = torch.cat(
+        offset = torch.cat([seq_len.new_zeros(1),
-            [seq_len.new_zeros(
+                            seq_len.cumsum(dim=0)[:-1]],
-                (1)), seq_len.cumsum(dim=0)[:-1]], dim=0)[:, None]
+                           dim=0)[:, None]
        image_input_idx = image_input_idx + offset.to(image_input_idx.dtype)
        image_input_idx = image_input_idx.flatten()[:, None]
        mat = image_input_idx == torch.arange(

--- a/vllm/model_executor/models/mpt.py
+++ b/vllm/model_executor/models/mpt.py
-# coding=utf-8
 # Adapted from https://huggingface.co/mosaicml/mpt-7b/tree/main
 import math
 from typing import Iterable, List, Optional, Tuple, Union

--- a/vllm/model_executor/models/nemotron.py
+++ b/vllm/model_executor/models/nemotron.py
-# coding=utf-8
 # Adapted from
 # https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/llama/modeling_llama.py
 # Copyright 2023 The vLLM team.

--- a/vllm/model_executor/models/olmo.py
+++ b/vllm/model_executor/models/olmo.py
-# coding=utf-8
 # Adapted from
 # https://github.com/huggingface/transformers/blob/v4.40.1/src/transformers/models/olmo/modeling_olmo.py
 # Copyright 2024 The vLLM team.

--- a/vllm/model_executor/models/opt.py
+++ b/vllm/model_executor/models/opt.py
-# coding=utf-8
 # Adapted from
 # https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/opt/modeling_opt.py
 # Copyright 2023 The vLLM team.

--- a/vllm/model_executor/models/orion.py
+++ b/vllm/model_executor/models/orion.py
-# coding=utf-8
 # Adapted from
 # https://huggingface.co/OrionStarAI/Orion-14B-Base/blob/main/modeling_orion.py
 # Copyright (c) OrionStar Inc.

--- a/vllm/model_executor/models/persimmon.py
+++ b/vllm/model_executor/models/persimmon.py
-# coding=utf-8
 # adapted from https://github.com/huggingface/transformers/blob/v4.39.3/src/transformers/models/persimmon/modeling_persimmon.py
 # Copyright 2023 The vLLM team.
 # Copyright 2023 EleutherAI and the HuggingFace Inc. team. All rights reserved.

--- a/vllm/model_executor/models/phi.py
+++ b/vllm/model_executor/models/phi.py
-# coding=utf-8
 # Adapted from
 # https://huggingface.co/microsoft/phi-1_5/blob/main/modeling_phi.py
 # Copyright 2023 The vLLM team.

--- a/vllm/model_executor/models/phi3.py
+++ b/vllm/model_executor/models/phi3.py
-# coding=utf-8
 # Adapted from llama.py
 """Inference-only Phi3 model code inherit from Llama.py"""

--- a/vllm/model_executor/models/phi3v.py
+++ b/vllm/model_executor/models/phi3v.py
-# coding=utf-8
 # Copyright 2024 The vLLM team.
 # Copyright 2024 Microsoft and the HuggingFace Inc. team. All rights reserved.
 #

--- a/vllm/model_executor/models/phimoe.py
+++ b/vllm/model_executor/models/phimoe.py
-# coding=utf-8
 # Adapted from
 # https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/llama/modeling_llama.py
 # Copyright 2023 The vLLM team.

--- a/vllm/model_executor/models/pixtral.py
+++ b/vllm/model_executor/models/pixtral.py
@@ -136,11 +136,11 @@ def input_processor_for_pixtral(ctx: InputContext, inputs: DecoderOnlyInputs):
        if image_token_id not in inputs['prompt_token_ids']:
            raise ValueError(
-                (f"You've passed {inputs=} without {image_token_id=}"
+                f"You've passed {inputs=} without {image_token_id=}"
                " Make sure to process your input via mistral_common's"
                " tokenizer or pass a chat completion request. For more"
                " For more info, see: "
-                 "https://github.com/vllm-project/vllm/issues/8411."))
+                "https://github.com/vllm-project/vllm/issues/8411.")
    return inputs

--- a/vllm/model_executor/models/qwen.py
+++ b/vllm/model_executor/models/qwen.py
-# coding=utf-8
 # Adapted from
 # https://huggingface.co/Qwen/Qwen-7B/blob/main/modeling_qwen.py
 # Copyright (c) Alibaba Cloud.

--- a/vllm/model_executor/models/qwen2.py
+++ b/vllm/model_executor/models/qwen2.py
-# coding=utf-8
 # Adapted from
 # https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/qwen2/modeling_qwen2.py
 # Copyright 2024 The Qwen team.
@@ -417,9 +416,9 @@ class Qwen2ForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
                and hasattr(config, "max_window_layers")):
            raise ValueError("Sliding window for some but all layers is not "
                             "supported. This model uses sliding window "
-                             "but `max_window_layers` = %s is less than "
+                             "but `max_window_layers` = {} is less than "
-                             "`num_hidden_layers` = %s. Please open an issue "
+                             "`num_hidden_layers` = {}. Please open an issue "
-                             "to discuss this feature." % (
+                             "to discuss this feature.".format(
                                 config.max_window_layers,
                                 config.num_hidden_layers,
                             ))

--- a/vllm/model_executor/models/qwen2_audio.py
+++ b/vllm/model_executor/models/qwen2_audio.py
-# coding=utf-8
 # Copyright 2024 The Qwen team.
 # Copyright 2023 The vLLM team.
 # Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.

--- a/vllm/model_executor/models/qwen2_cls.py
+++ b/vllm/model_executor/models/qwen2_cls.py
-# coding=utf-8
 # Adapted from
 # https://huggingface.co/Qwen/Qwen2.5-Math-RM-72B/blob/main/modeling_qwen2_rm.py
 # Copyright 2024 Kakao Corp. (Kanana-X Team)
@@ -60,9 +59,9 @@ class Qwen2ForSequenceClassification(nn.Module):
                and hasattr(config, "max_window_layers")):
            raise ValueError("Sliding window for some but all layers is not "
                             "supported. This model uses sliding window "
-                             "but `max_window_layers` = %s is less than "
+                             "but `max_window_layers` = {} is less than "
-                             "`num_hidden_layers` = %s. Please open an issue "
+                             "`num_hidden_layers` = {}. Please open an issue "
-                             "to discuss this feature." % (
+                             "to discuss this feature.".format(
                                 config.max_window_layers,
                                 config.num_hidden_layers,
                             ))

--- a/vllm/model_executor/models/qwen2_moe.py
+++ b/vllm/model_executor/models/qwen2_moe.py
-# coding=utf-8
 # Adapted from
 # https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/qwen2_moe/modeling_qwen2_moe.py
 # Copyright 2024 The Qwen team.

--- a/vllm/model_executor/models/qwen2_rm.py
+++ b/vllm/model_executor/models/qwen2_rm.py
-# coding=utf-8
 # Adapted from
 # https://huggingface.co/Qwen/Qwen2.5-Math-RM-72B/blob/main/modeling_qwen2_rm.py
 # Copyright 2024 The Qwen team.
@@ -71,9 +70,9 @@ class Qwen2ForRewardModel(nn.Module, SupportsPP):
                and hasattr(config, "max_window_layers")):
            raise ValueError("Sliding window for some but all layers is not "
                             "supported. This model uses sliding window "
-                             "but `max_window_layers` = %s is less than "
+                             "but `max_window_layers` = {} is less than "
-                             "`num_hidden_layers` = %s. Please open an issue "
+                             "`num_hidden_layers` = {}. Please open an issue "
-                             "to discuss this feature." % (
+                             "to discuss this feature.".format(
                                 config.max_window_layers,
                                 config.num_hidden_layers,
                             ))