Unverified Commit 21063c11 authored by Aaron Pham's avatar Aaron Pham Committed by GitHub
Browse files

[CI/Build] drop support for Python 3.8 EOL (#8464)


Signed-off-by: default avatarAaron Pham <contact@aarnphm.xyz>
parent 4be3a451
...@@ -103,7 +103,7 @@ class CustomOp(nn.Module): ...@@ -103,7 +103,7 @@ class CustomOp(nn.Module):
# On by default if VLLM_TORCH_COMPILE_LEVEL < CompilationLevel.PIECEWISE # On by default if VLLM_TORCH_COMPILE_LEVEL < CompilationLevel.PIECEWISE
# Specifying 'all' or 'none' in VLLM_CUSTOM_OPS takes precedence. # Specifying 'all' or 'none' in VLLM_CUSTOM_OPS takes precedence.
@staticmethod @staticmethod
@lru_cache() @lru_cache
def default_on() -> bool: def default_on() -> bool:
count_none = envs.VLLM_CUSTOM_OPS.count("none") count_none = envs.VLLM_CUSTOM_OPS.count("none")
count_all = envs.VLLM_CUSTOM_OPS.count("all") count_all = envs.VLLM_CUSTOM_OPS.count("all")
......
# coding=utf-8
# Adapted from # Adapted from
# https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/llama/modeling_llama.py # https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/llama/modeling_llama.py
# https://huggingface.co/Qwen/Qwen-7B/blob/main/modeling_qwen.py # https://huggingface.co/Qwen/Qwen-7B/blob/main/modeling_qwen.py
......
# coding=utf-8
# Adapted from # Adapted from
# https://github.com/huggingface/transformers/blob/v4.33.2/src/transformers/models/llama/modeling_llama.py # https://github.com/huggingface/transformers/blob/v4.33.2/src/transformers/models/llama/modeling_llama.py
# Copyright 2023 The vLLM team. # Copyright 2023 The vLLM team.
......
...@@ -746,7 +746,7 @@ class BitsAndBytesModelLoader(BaseModelLoader): ...@@ -746,7 +746,7 @@ class BitsAndBytesModelLoader(BaseModelLoader):
config_file_path = self._get_config_file(qlora_adapter) config_file_path = self._get_config_file(qlora_adapter)
with open(config_file_path, "r") as f: with open(config_file_path) as f:
config = json.load(f) config = json.load(f)
self.target_modules = config["target_modules"] self.target_modules = config["target_modules"]
......
...@@ -190,7 +190,7 @@ def get_model( ...@@ -190,7 +190,7 @@ def get_model(
kv_cache_dtype: ov.Type, kv_cache_dtype: ov.Type,
**kwargs, **kwargs,
) -> torch.nn.Module: ) -> torch.nn.Module:
lora_config = kwargs.get("lora_config", None) lora_config = kwargs.get("lora_config")
ov_core = kwargs.get("ov_core") ov_core = kwargs.get("ov_core")
if lora_config: if lora_config:
raise ValueError( raise ValueError(
......
...@@ -280,7 +280,7 @@ class TensorizerAgent: ...@@ -280,7 +280,7 @@ class TensorizerAgent:
self.tensorizer_args = ( self.tensorizer_args = (
self.tensorizer_config._construct_tensorizer_args()) self.tensorizer_config._construct_tensorizer_args())
self.extra_kwargs = extra_kwargs self.extra_kwargs = extra_kwargs
if extra_kwargs.get("quant_config", None) is not None: if extra_kwargs.get("quant_config") is not None:
self.quant_config = extra_kwargs["quant_config"] self.quant_config = extra_kwargs["quant_config"]
else: else:
self.quant_config = quant_config self.quant_config = quant_config
...@@ -380,8 +380,7 @@ def tensorizer_weights_iterator( ...@@ -380,8 +380,7 @@ def tensorizer_weights_iterator(
stream = open_stream(tensorizer_args.tensorizer_uri, **stream_params) stream = open_stream(tensorizer_args.tensorizer_uri, **stream_params)
with TensorDeserializer(stream, **deserializer_args, with TensorDeserializer(stream, **deserializer_args,
device="cpu") as state: device="cpu") as state:
for name, param in state.items(): yield from state.items()
yield name, param
del state del state
......
...@@ -188,7 +188,7 @@ def get_quant_config(model_config: ModelConfig, ...@@ -188,7 +188,7 @@ def get_quant_config(model_config: ModelConfig,
f"{quant_config_files}") f"{quant_config_files}")
quant_config_file = quant_config_files[0] quant_config_file = quant_config_files[0]
with open(quant_config_file, "r") as f: with open(quant_config_file) as f:
config = json.load(f) config = json.load(f)
if model_config.quantization == "bitsandbytes": if model_config.quantization == "bitsandbytes":
...@@ -306,7 +306,7 @@ def filter_duplicate_safetensors_files(hf_weights_files: List[str], ...@@ -306,7 +306,7 @@ def filter_duplicate_safetensors_files(hf_weights_files: List[str],
# Iterate through the weight_map (weight_name: safetensors files) # Iterate through the weight_map (weight_name: safetensors files)
# to identify weights that we should use. # to identify weights that we should use.
with open(index_file_name, "r") as f: with open(index_file_name) as f:
weight_map = json.load(f)["weight_map"] weight_map = json.load(f)["weight_map"]
weight_files_in_index = set() weight_files_in_index = set()
for weight_name in weight_map: for weight_name in weight_map:
...@@ -382,7 +382,7 @@ def np_cache_weights_iterator( ...@@ -382,7 +382,7 @@ def np_cache_weights_iterator(
with open(weight_names_file, "w") as f: with open(weight_names_file, "w") as f:
json.dump(weight_names, f) json.dump(weight_names, f)
with open(weight_names_file, "r") as f: with open(weight_names_file) as f:
weight_names = json.load(f) weight_names = json.load(f)
for name in weight_names: for name in weight_names:
...@@ -423,8 +423,7 @@ def pt_weights_iterator( ...@@ -423,8 +423,7 @@ def pt_weights_iterator(
bar_format=_BAR_FORMAT, bar_format=_BAR_FORMAT,
): ):
state = torch.load(bin_file, map_location="cpu") state = torch.load(bin_file, map_location="cpu")
for name, param in state.items(): yield from state.items()
yield name, param
del state del state
torch.cuda.empty_cache() torch.cuda.empty_cache()
......
...@@ -48,7 +48,7 @@ class ArcticMLP(nn.Module): ...@@ -48,7 +48,7 @@ class ArcticMLP(nn.Module):
is_residual_mlp: bool = False, is_residual_mlp: bool = False,
quant_config: Optional[QuantizationConfig] = None, quant_config: Optional[QuantizationConfig] = None,
reduce_results: bool = True): reduce_results: bool = True):
super(ArcticMLP, self).__init__() super().__init__()
self.hidden_size = config.hidden_size self.hidden_size = config.hidden_size
self.expert_id = expert_id self.expert_id = expert_id
self.layer_id = layer_id self.layer_id = layer_id
...@@ -89,7 +89,7 @@ class ArcticMoE(nn.Module): ...@@ -89,7 +89,7 @@ class ArcticMoE(nn.Module):
params_dtype: Optional[torch.dtype] = None, params_dtype: Optional[torch.dtype] = None,
quant_config: Optional[QuantizationConfig] = None, quant_config: Optional[QuantizationConfig] = None,
reduce_results: bool = True): reduce_results: bool = True):
super(ArcticMoE, self).__init__() super().__init__()
self.tp_size = tp_size or get_tensor_model_parallel_world_size() self.tp_size = tp_size or get_tensor_model_parallel_world_size()
self.hidden_size = config.hidden_size self.hidden_size = config.hidden_size
......
# coding=utf-8
# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved. # Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
# #
# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX # This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
......
# coding=utf-8
# Adapted from # Adapted from
# https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/bloom/modeling_bloom.py # https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/bloom/modeling_bloom.py
# Copyright 2023 The vLLM team. # Copyright 2023 The vLLM team.
......
# coding=utf-8
# Adapted from # Adapted from
# https://github.com/THUDM/GLM-4 # https://github.com/THUDM/GLM-4
"""Inference-only ChatGLM model compatible with THUDM weights.""" """Inference-only ChatGLM model compatible with THUDM weights."""
......
# coding=utf-8
# Copyright 2024 Cohere and the HuggingFace Inc. team. All rights reserved. # Copyright 2024 Cohere and the HuggingFace Inc. team. All rights reserved.
# #
# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX # This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
......
# coding=utf-8
from typing import Iterable, List, Optional, Tuple, Union from typing import Iterable, List, Optional, Tuple, Union
import torch import torch
......
# coding=utf-8
# Adapted from # Adapted from
# https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/llama/modeling_llama.py # https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/llama/modeling_llama.py
# Copyright 2023 DeciAI Research Team. All rights reserved. # Copyright 2023 DeciAI Research Team. All rights reserved.
......
# coding=utf-8
# Adapted from # Adapted from
# https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/llama/modeling_llama.py # https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/llama/modeling_llama.py
# Copyright 2023 The vLLM team. # Copyright 2023 The vLLM team.
......
# coding=utf-8
# Adapted from # Adapted from
# https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/llama/modeling_llama.py # https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/llama/modeling_llama.py
# Copyright 2023 The vLLM team. # Copyright 2023 The vLLM team.
......
# coding=utf-8
# Adapted from # Adapted from
# https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/blob/main/modeling_exaone.py # https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/blob/main/modeling_exaone.py
# Copyright 2024 The LG U+ CTO AI Tech Lab. # Copyright 2024 The LG U+ CTO AI Tech Lab.
......
# coding=utf-8
# Adapted from # Adapted from
# https://github.com/huggingface/transformers/blob/a5cc30d72ae2dc19af534e4b35c986cc28db1275/src/transformers/models/falcon/modeling_falcon.py # https://github.com/huggingface/transformers/blob/a5cc30d72ae2dc19af534e4b35c986cc28db1275/src/transformers/models/falcon/modeling_falcon.py
# Copyright 2023 The vLLM team. # Copyright 2023 The vLLM team.
......
# coding=utf-8
# adapted from https://github.com/huggingface/transformers/blob/v4.39.3/src/transformers/models/fuyu/modeling_fuyu.py # adapted from https://github.com/huggingface/transformers/blob/v4.39.3/src/transformers/models/fuyu/modeling_fuyu.py
# Copyright 2023 The vLLM team. # Copyright 2023 The vLLM team.
# Copyright 2023 HuggingFace Inc. team. All rights reserved. # Copyright 2023 HuggingFace Inc. team. All rights reserved.
......
# coding=utf-8
# Copyright 2023 The vLLM team. # Copyright 2023 The vLLM team.
# Copyright (c) Google Inc. # Copyright (c) Google Inc.
# #
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment