Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
xdb4_94051
vllm
Commits
5ffc0d13
Unverified
Commit
5ffc0d13
authored
Nov 20, 2023
by
Simon Mo
Committed by
GitHub
Nov 20, 2023
Browse files
Migrate linter from `pylint` to `ruff` (#1665)
parent
112627e8
Changes
45
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
15 additions
and
20 deletions
+15
-20
vllm/model_executor/weight_utils.py
vllm/model_executor/weight_utils.py
+4
-6
vllm/transformers_utils/config.py
vllm/transformers_utils/config.py
+1
-1
vllm/transformers_utils/configs/mpt.py
vllm/transformers_utils/configs/mpt.py
+8
-8
vllm/utils.py
vllm/utils.py
+1
-1
vllm/worker/worker.py
vllm/worker/worker.py
+1
-4
No files found.
vllm/model_executor/weight_utils.py
View file @
5ffc0d13
...
@@ -131,11 +131,9 @@ def prepare_hf_model_weights(
...
@@ -131,11 +131,9 @@ def prepare_hf_model_weights(
)
->
Tuple
[
str
,
List
[
str
],
bool
]:
)
->
Tuple
[
str
,
List
[
str
],
bool
]:
# Download model weights from huggingface.
# Download model weights from huggingface.
is_local
=
os
.
path
.
isdir
(
model_name_or_path
)
is_local
=
os
.
path
.
isdir
(
model_name_or_path
)
if
use_safetensors
:
# Some quantized models use .pt files for storing the weights.
allow_patterns
=
[
"*.safetensors"
]
allow_patterns
=
[
"*.safetensors"
else
:
]
if
use_safetensors
else
[
"*.bin"
,
"*.pt"
]
# Some quantized models use .pt files for storing the weights.
allow_patterns
=
[
"*.bin"
,
"*.pt"
]
if
not
is_local
:
if
not
is_local
:
# Use file lock to prevent multiple processes from
# Use file lock to prevent multiple processes from
# downloading the same model weights at the same time.
# downloading the same model weights at the same time.
...
@@ -242,7 +240,7 @@ def hf_model_weights_iterator(
...
@@ -242,7 +240,7 @@ def hf_model_weights_iterator(
elif
use_safetensors
:
elif
use_safetensors
:
for
st_file
in
hf_weights_files
:
for
st_file
in
hf_weights_files
:
with
safe_open
(
st_file
,
framework
=
"pt"
)
as
f
:
with
safe_open
(
st_file
,
framework
=
"pt"
)
as
f
:
for
name
in
f
.
keys
()
:
for
name
in
f
:
param
=
f
.
get_tensor
(
name
)
param
=
f
.
get_tensor
(
name
)
yield
name
,
param
yield
name
,
param
else
:
else
:
...
...
vllm/transformers_utils/config.py
View file @
5ffc0d13
...
@@ -2,7 +2,7 @@ from typing import Optional
...
@@ -2,7 +2,7 @@ from typing import Optional
from
transformers
import
AutoConfig
,
PretrainedConfig
from
transformers
import
AutoConfig
,
PretrainedConfig
from
vllm.transformers_utils.configs
import
*
# pylint: disable=wildcard-import
from
vllm.transformers_utils.configs
import
*
_CONFIG_REGISTRY
=
{
_CONFIG_REGISTRY
=
{
"aquila"
:
AquilaConfig
,
"aquila"
:
AquilaConfig
,
...
...
vllm/transformers_utils/configs/mpt.py
View file @
5ffc0d13
...
@@ -62,7 +62,6 @@ class MPTConfig(PretrainedConfig):
...
@@ -62,7 +62,6 @@ class MPTConfig(PretrainedConfig):
fc_type
:
str
=
'torch'
,
fc_type
:
str
=
'torch'
,
verbose
:
Optional
[
int
]
=
None
,
verbose
:
Optional
[
int
]
=
None
,
**
kwargs
:
Any
):
**
kwargs
:
Any
):
# pylint: disable=line-too-long
"""The MPT configuration class.
"""The MPT configuration class.
Args:
Args:
d_model (int): The size of the embedding dimension of the model.
d_model (int): The size of the embedding dimension of the model.
...
@@ -139,10 +138,10 @@ class MPTConfig(PretrainedConfig):
...
@@ -139,10 +138,10 @@ class MPTConfig(PretrainedConfig):
self
.
init_config
=
init_config
self
.
init_config
=
init_config
self
.
fc_type
=
fc_type
self
.
fc_type
=
fc_type
if
verbose
is
not
None
:
if
verbose
is
not
None
:
warnings
.
warn
(
warnings
.
warn
(
DeprecationWarning
(
DeprecationWarning
(
'verbose argument for MPTConfig is now ignored and will be removed. Use python_log_level instead.'
'verbose argument for MPTConfig is now ignored and will be removed. Use python_log_level instead.'
),
)
)
stacklevel
=
2
)
if
'name'
in
kwargs
:
if
'name'
in
kwargs
:
del
kwargs
[
'name'
]
del
kwargs
[
'name'
]
if
'loss_fn'
in
kwargs
:
if
'loss_fn'
in
kwargs
:
...
@@ -150,8 +149,8 @@ class MPTConfig(PretrainedConfig):
...
@@ -150,8 +149,8 @@ class MPTConfig(PretrainedConfig):
if
self
.
attn_config
.
get
(
'alibi'
,
False
):
if
self
.
attn_config
.
get
(
'alibi'
,
False
):
self
.
learned_pos_emb
=
False
self
.
learned_pos_emb
=
False
warnings
.
warn
(
warnings
.
warn
(
f
'alibi is turned on, setting `learned_pos_emb` to
{
self
.
learned_pos_emb
}
`'
f
'alibi is turned on, setting `learned_pos_emb` to
{
self
.
learned_pos_emb
}
`'
,
)
stacklevel
=
2
)
super
().
__init__
(
**
kwargs
)
super
().
__init__
(
**
kwargs
)
self
.
_validate_config
()
self
.
_validate_config
()
...
@@ -211,7 +210,8 @@ class MPTConfig(PretrainedConfig):
...
@@ -211,7 +210,8 @@ class MPTConfig(PretrainedConfig):
)
)
if
not
self
.
learned_pos_emb
and
(
not
self
.
attn_config
[
'alibi'
]):
if
not
self
.
learned_pos_emb
and
(
not
self
.
attn_config
[
'alibi'
]):
warnings
.
warn
(
warnings
.
warn
(
'Positional information not being provided to the model.'
)
'Positional information not being provided to the model.'
,
stacklevel
=
2
)
if
self
.
fc_type
==
'te'
or
self
.
ffn_config
[
'ffn_type'
]
==
'te_ln_mlp'
:
if
self
.
fc_type
==
'te'
or
self
.
ffn_config
[
'ffn_type'
]
==
'te_ln_mlp'
:
try
:
try
:
# pylint: disable=import-outside-toplevel
# pylint: disable=import-outside-toplevel
...
...
vllm/utils.py
View file @
5ffc0d13
...
@@ -30,7 +30,7 @@ class Counter:
...
@@ -30,7 +30,7 @@ class Counter:
def
get_max_shared_memory_bytes
(
gpu
:
int
=
0
)
->
int
:
def
get_max_shared_memory_bytes
(
gpu
:
int
=
0
)
->
int
:
"""Returns the maximum shared memory per thread block in bytes."""
"""Returns the maximum shared memory per thread block in bytes."""
# https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html
# https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html
cudaDevAttrMaxSharedMemoryPerBlockOptin
=
97
# pylint: disable=invalid-name
cudaDevAttrMaxSharedMemoryPerBlockOptin
=
97
max_shared_mem
=
cuda_utils
.
get_device_attribute
(
max_shared_mem
=
cuda_utils
.
get_device_attribute
(
cudaDevAttrMaxSharedMemoryPerBlockOptin
,
gpu
)
cudaDevAttrMaxSharedMemoryPerBlockOptin
,
gpu
)
return
int
(
max_shared_mem
)
return
int
(
max_shared_mem
)
...
...
vllm/worker/worker.py
View file @
5ffc0d13
...
@@ -350,10 +350,7 @@ class Worker:
...
@@ -350,10 +350,7 @@ class Worker:
self
.
cache_engine
.
copy
(
blocks_to_copy
)
self
.
cache_engine
.
copy
(
blocks_to_copy
)
issued_cache_op
=
True
issued_cache_op
=
True
if
issued_cache_op
:
cache_events
=
self
.
cache_events
if
issued_cache_op
else
None
cache_events
=
self
.
cache_events
else
:
cache_events
=
None
# If there is no input, we don't need to execute the model.
# If there is no input, we don't need to execute the model.
if
not
seq_group_metadata_list
:
if
not
seq_group_metadata_list
:
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment