Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
21063c11
Unverified
Commit
21063c11
authored
Nov 06, 2024
by
Aaron Pham
Committed by
GitHub
Nov 06, 2024
Browse files
[CI/Build] drop support for Python 3.8 EOL (#8464)
Signed-off-by:
Aaron Pham
<
contact@aarnphm.xyz
>
parent
4be3a451
Changes
115
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
11 additions
and
27 deletions
+11
-27
vllm/model_executor/custom_op.py
vllm/model_executor/custom_op.py
+1
-1
vllm/model_executor/layers/resampler.py
vllm/model_executor/layers/resampler.py
+0
-1
vllm/model_executor/layers/rotary_embedding.py
vllm/model_executor/layers/rotary_embedding.py
+0
-1
vllm/model_executor/model_loader/loader.py
vllm/model_executor/model_loader/loader.py
+1
-1
vllm/model_executor/model_loader/openvino.py
vllm/model_executor/model_loader/openvino.py
+1
-1
vllm/model_executor/model_loader/tensorizer.py
vllm/model_executor/model_loader/tensorizer.py
+2
-3
vllm/model_executor/model_loader/weight_utils.py
vllm/model_executor/model_loader/weight_utils.py
+4
-5
vllm/model_executor/models/arctic.py
vllm/model_executor/models/arctic.py
+2
-2
vllm/model_executor/models/baichuan.py
vllm/model_executor/models/baichuan.py
+0
-1
vllm/model_executor/models/bloom.py
vllm/model_executor/models/bloom.py
+0
-1
vllm/model_executor/models/chatglm.py
vllm/model_executor/models/chatglm.py
+0
-1
vllm/model_executor/models/commandr.py
vllm/model_executor/models/commandr.py
+0
-1
vllm/model_executor/models/dbrx.py
vllm/model_executor/models/dbrx.py
+0
-1
vllm/model_executor/models/decilm.py
vllm/model_executor/models/decilm.py
+0
-1
vllm/model_executor/models/deepseek.py
vllm/model_executor/models/deepseek.py
+0
-1
vllm/model_executor/models/deepseek_v2.py
vllm/model_executor/models/deepseek_v2.py
+0
-1
vllm/model_executor/models/exaone.py
vllm/model_executor/models/exaone.py
+0
-1
vllm/model_executor/models/falcon.py
vllm/model_executor/models/falcon.py
+0
-1
vllm/model_executor/models/fuyu.py
vllm/model_executor/models/fuyu.py
+0
-1
vllm/model_executor/models/gemma.py
vllm/model_executor/models/gemma.py
+0
-1
No files found.
vllm/model_executor/custom_op.py
View file @
21063c11
...
@@ -103,7 +103,7 @@ class CustomOp(nn.Module):
...
@@ -103,7 +103,7 @@ class CustomOp(nn.Module):
# On by default if VLLM_TORCH_COMPILE_LEVEL < CompilationLevel.PIECEWISE
# On by default if VLLM_TORCH_COMPILE_LEVEL < CompilationLevel.PIECEWISE
# Specifying 'all' or 'none' in VLLM_CUSTOM_OPS takes precedence.
# Specifying 'all' or 'none' in VLLM_CUSTOM_OPS takes precedence.
@
staticmethod
@
staticmethod
@
lru_cache
()
@
lru_cache
def
default_on
()
->
bool
:
def
default_on
()
->
bool
:
count_none
=
envs
.
VLLM_CUSTOM_OPS
.
count
(
"none"
)
count_none
=
envs
.
VLLM_CUSTOM_OPS
.
count
(
"none"
)
count_all
=
envs
.
VLLM_CUSTOM_OPS
.
count
(
"all"
)
count_all
=
envs
.
VLLM_CUSTOM_OPS
.
count
(
"all"
)
...
...
vllm/model_executor/layers/resampler.py
View file @
21063c11
# coding=utf-8
# Adapted from
# Adapted from
# https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/llama/modeling_llama.py
# https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/llama/modeling_llama.py
# https://huggingface.co/Qwen/Qwen-7B/blob/main/modeling_qwen.py
# https://huggingface.co/Qwen/Qwen-7B/blob/main/modeling_qwen.py
...
...
vllm/model_executor/layers/rotary_embedding.py
View file @
21063c11
# coding=utf-8
# Adapted from
# Adapted from
# https://github.com/huggingface/transformers/blob/v4.33.2/src/transformers/models/llama/modeling_llama.py
# https://github.com/huggingface/transformers/blob/v4.33.2/src/transformers/models/llama/modeling_llama.py
# Copyright 2023 The vLLM team.
# Copyright 2023 The vLLM team.
...
...
vllm/model_executor/model_loader/loader.py
View file @
21063c11
...
@@ -746,7 +746,7 @@ class BitsAndBytesModelLoader(BaseModelLoader):
...
@@ -746,7 +746,7 @@ class BitsAndBytesModelLoader(BaseModelLoader):
config_file_path
=
self
.
_get_config_file
(
qlora_adapter
)
config_file_path
=
self
.
_get_config_file
(
qlora_adapter
)
with
open
(
config_file_path
,
"r"
)
as
f
:
with
open
(
config_file_path
)
as
f
:
config
=
json
.
load
(
f
)
config
=
json
.
load
(
f
)
self
.
target_modules
=
config
[
"target_modules"
]
self
.
target_modules
=
config
[
"target_modules"
]
...
...
vllm/model_executor/model_loader/openvino.py
View file @
21063c11
...
@@ -190,7 +190,7 @@ def get_model(
...
@@ -190,7 +190,7 @@ def get_model(
kv_cache_dtype
:
ov
.
Type
,
kv_cache_dtype
:
ov
.
Type
,
**
kwargs
,
**
kwargs
,
)
->
torch
.
nn
.
Module
:
)
->
torch
.
nn
.
Module
:
lora_config
=
kwargs
.
get
(
"lora_config"
,
None
)
lora_config
=
kwargs
.
get
(
"lora_config"
)
ov_core
=
kwargs
.
get
(
"ov_core"
)
ov_core
=
kwargs
.
get
(
"ov_core"
)
if
lora_config
:
if
lora_config
:
raise
ValueError
(
raise
ValueError
(
...
...
vllm/model_executor/model_loader/tensorizer.py
View file @
21063c11
...
@@ -280,7 +280,7 @@ class TensorizerAgent:
...
@@ -280,7 +280,7 @@ class TensorizerAgent:
self
.
tensorizer_args
=
(
self
.
tensorizer_args
=
(
self
.
tensorizer_config
.
_construct_tensorizer_args
())
self
.
tensorizer_config
.
_construct_tensorizer_args
())
self
.
extra_kwargs
=
extra_kwargs
self
.
extra_kwargs
=
extra_kwargs
if
extra_kwargs
.
get
(
"quant_config"
,
None
)
is
not
None
:
if
extra_kwargs
.
get
(
"quant_config"
)
is
not
None
:
self
.
quant_config
=
extra_kwargs
[
"quant_config"
]
self
.
quant_config
=
extra_kwargs
[
"quant_config"
]
else
:
else
:
self
.
quant_config
=
quant_config
self
.
quant_config
=
quant_config
...
@@ -380,8 +380,7 @@ def tensorizer_weights_iterator(
...
@@ -380,8 +380,7 @@ def tensorizer_weights_iterator(
stream
=
open_stream
(
tensorizer_args
.
tensorizer_uri
,
**
stream_params
)
stream
=
open_stream
(
tensorizer_args
.
tensorizer_uri
,
**
stream_params
)
with
TensorDeserializer
(
stream
,
**
deserializer_args
,
with
TensorDeserializer
(
stream
,
**
deserializer_args
,
device
=
"cpu"
)
as
state
:
device
=
"cpu"
)
as
state
:
for
name
,
param
in
state
.
items
():
yield
from
state
.
items
()
yield
name
,
param
del
state
del
state
...
...
vllm/model_executor/model_loader/weight_utils.py
View file @
21063c11
...
@@ -188,7 +188,7 @@ def get_quant_config(model_config: ModelConfig,
...
@@ -188,7 +188,7 @@ def get_quant_config(model_config: ModelConfig,
f
"
{
quant_config_files
}
"
)
f
"
{
quant_config_files
}
"
)
quant_config_file
=
quant_config_files
[
0
]
quant_config_file
=
quant_config_files
[
0
]
with
open
(
quant_config_file
,
"r"
)
as
f
:
with
open
(
quant_config_file
)
as
f
:
config
=
json
.
load
(
f
)
config
=
json
.
load
(
f
)
if
model_config
.
quantization
==
"bitsandbytes"
:
if
model_config
.
quantization
==
"bitsandbytes"
:
...
@@ -306,7 +306,7 @@ def filter_duplicate_safetensors_files(hf_weights_files: List[str],
...
@@ -306,7 +306,7 @@ def filter_duplicate_safetensors_files(hf_weights_files: List[str],
# Iterate through the weight_map (weight_name: safetensors files)
# Iterate through the weight_map (weight_name: safetensors files)
# to identify weights that we should use.
# to identify weights that we should use.
with
open
(
index_file_name
,
"r"
)
as
f
:
with
open
(
index_file_name
)
as
f
:
weight_map
=
json
.
load
(
f
)[
"weight_map"
]
weight_map
=
json
.
load
(
f
)[
"weight_map"
]
weight_files_in_index
=
set
()
weight_files_in_index
=
set
()
for
weight_name
in
weight_map
:
for
weight_name
in
weight_map
:
...
@@ -382,7 +382,7 @@ def np_cache_weights_iterator(
...
@@ -382,7 +382,7 @@ def np_cache_weights_iterator(
with
open
(
weight_names_file
,
"w"
)
as
f
:
with
open
(
weight_names_file
,
"w"
)
as
f
:
json
.
dump
(
weight_names
,
f
)
json
.
dump
(
weight_names
,
f
)
with
open
(
weight_names_file
,
"r"
)
as
f
:
with
open
(
weight_names_file
)
as
f
:
weight_names
=
json
.
load
(
f
)
weight_names
=
json
.
load
(
f
)
for
name
in
weight_names
:
for
name
in
weight_names
:
...
@@ -423,8 +423,7 @@ def pt_weights_iterator(
...
@@ -423,8 +423,7 @@ def pt_weights_iterator(
bar_format
=
_BAR_FORMAT
,
bar_format
=
_BAR_FORMAT
,
):
):
state
=
torch
.
load
(
bin_file
,
map_location
=
"cpu"
)
state
=
torch
.
load
(
bin_file
,
map_location
=
"cpu"
)
for
name
,
param
in
state
.
items
():
yield
from
state
.
items
()
yield
name
,
param
del
state
del
state
torch
.
cuda
.
empty_cache
()
torch
.
cuda
.
empty_cache
()
...
...
vllm/model_executor/models/arctic.py
View file @
21063c11
...
@@ -48,7 +48,7 @@ class ArcticMLP(nn.Module):
...
@@ -48,7 +48,7 @@ class ArcticMLP(nn.Module):
is_residual_mlp
:
bool
=
False
,
is_residual_mlp
:
bool
=
False
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
reduce_results
:
bool
=
True
):
reduce_results
:
bool
=
True
):
super
(
ArcticMLP
,
self
).
__init__
()
super
().
__init__
()
self
.
hidden_size
=
config
.
hidden_size
self
.
hidden_size
=
config
.
hidden_size
self
.
expert_id
=
expert_id
self
.
expert_id
=
expert_id
self
.
layer_id
=
layer_id
self
.
layer_id
=
layer_id
...
@@ -89,7 +89,7 @@ class ArcticMoE(nn.Module):
...
@@ -89,7 +89,7 @@ class ArcticMoE(nn.Module):
params_dtype
:
Optional
[
torch
.
dtype
]
=
None
,
params_dtype
:
Optional
[
torch
.
dtype
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
quant_config
:
Optional
[
QuantizationConfig
]
=
None
,
reduce_results
:
bool
=
True
):
reduce_results
:
bool
=
True
):
super
(
ArcticMoE
,
self
).
__init__
()
super
().
__init__
()
self
.
tp_size
=
tp_size
or
get_tensor_model_parallel_world_size
()
self
.
tp_size
=
tp_size
or
get_tensor_model_parallel_world_size
()
self
.
hidden_size
=
config
.
hidden_size
self
.
hidden_size
=
config
.
hidden_size
...
...
vllm/model_executor/models/baichuan.py
View file @
21063c11
# coding=utf-8
# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
#
#
# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
...
...
vllm/model_executor/models/bloom.py
View file @
21063c11
# coding=utf-8
# Adapted from
# Adapted from
# https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/bloom/modeling_bloom.py
# https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/bloom/modeling_bloom.py
# Copyright 2023 The vLLM team.
# Copyright 2023 The vLLM team.
...
...
vllm/model_executor/models/chatglm.py
View file @
21063c11
# coding=utf-8
# Adapted from
# Adapted from
# https://github.com/THUDM/GLM-4
# https://github.com/THUDM/GLM-4
"""Inference-only ChatGLM model compatible with THUDM weights."""
"""Inference-only ChatGLM model compatible with THUDM weights."""
...
...
vllm/model_executor/models/commandr.py
View file @
21063c11
# coding=utf-8
# Copyright 2024 Cohere and the HuggingFace Inc. team. All rights reserved.
# Copyright 2024 Cohere and the HuggingFace Inc. team. All rights reserved.
#
#
# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
...
...
vllm/model_executor/models/dbrx.py
View file @
21063c11
# coding=utf-8
from
typing
import
Iterable
,
List
,
Optional
,
Tuple
,
Union
from
typing
import
Iterable
,
List
,
Optional
,
Tuple
,
Union
import
torch
import
torch
...
...
vllm/model_executor/models/decilm.py
View file @
21063c11
# coding=utf-8
# Adapted from
# Adapted from
# https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/llama/modeling_llama.py
# https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/llama/modeling_llama.py
# Copyright 2023 DeciAI Research Team. All rights reserved.
# Copyright 2023 DeciAI Research Team. All rights reserved.
...
...
vllm/model_executor/models/deepseek.py
View file @
21063c11
# coding=utf-8
# Adapted from
# Adapted from
# https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/llama/modeling_llama.py
# https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/llama/modeling_llama.py
# Copyright 2023 The vLLM team.
# Copyright 2023 The vLLM team.
...
...
vllm/model_executor/models/deepseek_v2.py
View file @
21063c11
# coding=utf-8
# Adapted from
# Adapted from
# https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/llama/modeling_llama.py
# https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/llama/modeling_llama.py
# Copyright 2023 The vLLM team.
# Copyright 2023 The vLLM team.
...
...
vllm/model_executor/models/exaone.py
View file @
21063c11
# coding=utf-8
# Adapted from
# Adapted from
# https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/blob/main/modeling_exaone.py
# https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/blob/main/modeling_exaone.py
# Copyright 2024 The LG U+ CTO AI Tech Lab.
# Copyright 2024 The LG U+ CTO AI Tech Lab.
...
...
vllm/model_executor/models/falcon.py
View file @
21063c11
# coding=utf-8
# Adapted from
# Adapted from
# https://github.com/huggingface/transformers/blob/a5cc30d72ae2dc19af534e4b35c986cc28db1275/src/transformers/models/falcon/modeling_falcon.py
# https://github.com/huggingface/transformers/blob/a5cc30d72ae2dc19af534e4b35c986cc28db1275/src/transformers/models/falcon/modeling_falcon.py
# Copyright 2023 The vLLM team.
# Copyright 2023 The vLLM team.
...
...
vllm/model_executor/models/fuyu.py
View file @
21063c11
# coding=utf-8
# adapted from https://github.com/huggingface/transformers/blob/v4.39.3/src/transformers/models/fuyu/modeling_fuyu.py
# adapted from https://github.com/huggingface/transformers/blob/v4.39.3/src/transformers/models/fuyu/modeling_fuyu.py
# Copyright 2023 The vLLM team.
# Copyright 2023 The vLLM team.
# Copyright 2023 HuggingFace Inc. team. All rights reserved.
# Copyright 2023 HuggingFace Inc. team. All rights reserved.
...
...
vllm/model_executor/models/gemma.py
View file @
21063c11
# coding=utf-8
# Copyright 2023 The vLLM team.
# Copyright 2023 The vLLM team.
# Copyright (c) Google Inc.
# Copyright (c) Google Inc.
#
#
...
...
Prev
1
2
3
4
5
6
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment