Unverified Commit 21063c11 authored by Aaron Pham's avatar Aaron Pham Committed by GitHub
Browse files

[CI/Build] drop support for Python 3.8 EOL (#8464)


Signed-off-by: default avatarAaron Pham <contact@aarnphm.xyz>
parent 4be3a451
......@@ -103,7 +103,7 @@ class CustomOp(nn.Module):
# On by default if VLLM_TORCH_COMPILE_LEVEL < CompilationLevel.PIECEWISE
# Specifying 'all' or 'none' in VLLM_CUSTOM_OPS takes precedence.
@staticmethod
@lru_cache()
@lru_cache
def default_on() -> bool:
count_none = envs.VLLM_CUSTOM_OPS.count("none")
count_all = envs.VLLM_CUSTOM_OPS.count("all")
......
# coding=utf-8
# Adapted from
# https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/llama/modeling_llama.py
# https://huggingface.co/Qwen/Qwen-7B/blob/main/modeling_qwen.py
......
# coding=utf-8
# Adapted from
# https://github.com/huggingface/transformers/blob/v4.33.2/src/transformers/models/llama/modeling_llama.py
# Copyright 2023 The vLLM team.
......
......@@ -746,7 +746,7 @@ class BitsAndBytesModelLoader(BaseModelLoader):
config_file_path = self._get_config_file(qlora_adapter)
with open(config_file_path, "r") as f:
with open(config_file_path) as f:
config = json.load(f)
self.target_modules = config["target_modules"]
......
......@@ -190,7 +190,7 @@ def get_model(
kv_cache_dtype: ov.Type,
**kwargs,
) -> torch.nn.Module:
lora_config = kwargs.get("lora_config", None)
lora_config = kwargs.get("lora_config")
ov_core = kwargs.get("ov_core")
if lora_config:
raise ValueError(
......
......@@ -280,7 +280,7 @@ class TensorizerAgent:
self.tensorizer_args = (
self.tensorizer_config._construct_tensorizer_args())
self.extra_kwargs = extra_kwargs
if extra_kwargs.get("quant_config", None) is not None:
if extra_kwargs.get("quant_config") is not None:
self.quant_config = extra_kwargs["quant_config"]
else:
self.quant_config = quant_config
......@@ -380,8 +380,7 @@ def tensorizer_weights_iterator(
stream = open_stream(tensorizer_args.tensorizer_uri, **stream_params)
with TensorDeserializer(stream, **deserializer_args,
device="cpu") as state:
for name, param in state.items():
yield name, param
yield from state.items()
del state
......
......@@ -188,7 +188,7 @@ def get_quant_config(model_config: ModelConfig,
f"{quant_config_files}")
quant_config_file = quant_config_files[0]
with open(quant_config_file, "r") as f:
with open(quant_config_file) as f:
config = json.load(f)
if model_config.quantization == "bitsandbytes":
......@@ -306,7 +306,7 @@ def filter_duplicate_safetensors_files(hf_weights_files: List[str],
# Iterate through the weight_map (weight_name: safetensors files)
# to identify weights that we should use.
with open(index_file_name, "r") as f:
with open(index_file_name) as f:
weight_map = json.load(f)["weight_map"]
weight_files_in_index = set()
for weight_name in weight_map:
......@@ -382,7 +382,7 @@ def np_cache_weights_iterator(
with open(weight_names_file, "w") as f:
json.dump(weight_names, f)
with open(weight_names_file, "r") as f:
with open(weight_names_file) as f:
weight_names = json.load(f)
for name in weight_names:
......@@ -423,8 +423,7 @@ def pt_weights_iterator(
bar_format=_BAR_FORMAT,
):
state = torch.load(bin_file, map_location="cpu")
for name, param in state.items():
yield name, param
yield from state.items()
del state
torch.cuda.empty_cache()
......
......@@ -48,7 +48,7 @@ class ArcticMLP(nn.Module):
is_residual_mlp: bool = False,
quant_config: Optional[QuantizationConfig] = None,
reduce_results: bool = True):
super(ArcticMLP, self).__init__()
super().__init__()
self.hidden_size = config.hidden_size
self.expert_id = expert_id
self.layer_id = layer_id
......@@ -89,7 +89,7 @@ class ArcticMoE(nn.Module):
params_dtype: Optional[torch.dtype] = None,
quant_config: Optional[QuantizationConfig] = None,
reduce_results: bool = True):
super(ArcticMoE, self).__init__()
super().__init__()
self.tp_size = tp_size or get_tensor_model_parallel_world_size()
self.hidden_size = config.hidden_size
......
# coding=utf-8
# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
#
# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
......
# coding=utf-8
# Adapted from
# https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/bloom/modeling_bloom.py
# Copyright 2023 The vLLM team.
......
# coding=utf-8
# Adapted from
# https://github.com/THUDM/GLM-4
"""Inference-only ChatGLM model compatible with THUDM weights."""
......
# coding=utf-8
# Copyright 2024 Cohere and the HuggingFace Inc. team. All rights reserved.
#
# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
......
# coding=utf-8
from typing import Iterable, List, Optional, Tuple, Union
import torch
......
# coding=utf-8
# Adapted from
# https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/llama/modeling_llama.py
# Copyright 2023 DeciAI Research Team. All rights reserved.
......
# coding=utf-8
# Adapted from
# https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/llama/modeling_llama.py
# Copyright 2023 The vLLM team.
......
# coding=utf-8
# Adapted from
# https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/llama/modeling_llama.py
# Copyright 2023 The vLLM team.
......
# coding=utf-8
# Adapted from
# https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/blob/main/modeling_exaone.py
# Copyright 2024 The LG U+ CTO AI Tech Lab.
......
# coding=utf-8
# Adapted from
# https://github.com/huggingface/transformers/blob/a5cc30d72ae2dc19af534e4b35c986cc28db1275/src/transformers/models/falcon/modeling_falcon.py
# Copyright 2023 The vLLM team.
......
# coding=utf-8
# adapted from https://github.com/huggingface/transformers/blob/v4.39.3/src/transformers/models/fuyu/modeling_fuyu.py
# Copyright 2023 The vLLM team.
# Copyright 2023 HuggingFace Inc. team. All rights reserved.
......
# coding=utf-8
# Copyright 2023 The vLLM team.
# Copyright (c) Google Inc.
#
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment