Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
norm
vllm
Commits
5ffc0d13
Unverified
Commit
5ffc0d13
authored
Nov 20, 2023
by
Simon Mo
Committed by
GitHub
Nov 20, 2023
Browse files
Migrate linter from `pylint` to `ruff` (#1665)
parent
112627e8
Changes
45
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
15 additions
and
20 deletions
+15
-20
vllm/model_executor/weight_utils.py
vllm/model_executor/weight_utils.py
+4
-6
vllm/transformers_utils/config.py
vllm/transformers_utils/config.py
+1
-1
vllm/transformers_utils/configs/mpt.py
vllm/transformers_utils/configs/mpt.py
+8
-8
vllm/utils.py
vllm/utils.py
+1
-1
vllm/worker/worker.py
vllm/worker/worker.py
+1
-4
No files found.
vllm/model_executor/weight_utils.py
View file @
5ffc0d13
...
...
@@ -131,11 +131,9 @@ def prepare_hf_model_weights(
)
->
Tuple
[
str
,
List
[
str
],
bool
]:
# Download model weights from huggingface.
is_local
=
os
.
path
.
isdir
(
model_name_or_path
)
if
use_safetensors
:
allow_patterns
=
[
"*.safetensors"
]
else
:
# Some quantized models use .pt files for storing the weights.
allow_patterns
=
[
"*.bin"
,
"*.pt"
]
# Some quantized models use .pt files for storing the weights.
allow_patterns
=
[
"*.safetensors"
]
if
use_safetensors
else
[
"*.bin"
,
"*.pt"
]
if
not
is_local
:
# Use file lock to prevent multiple processes from
# downloading the same model weights at the same time.
...
...
@@ -242,7 +240,7 @@ def hf_model_weights_iterator(
elif
use_safetensors
:
for
st_file
in
hf_weights_files
:
with
safe_open
(
st_file
,
framework
=
"pt"
)
as
f
:
for
name
in
f
.
keys
()
:
for
name
in
f
:
param
=
f
.
get_tensor
(
name
)
yield
name
,
param
else
:
...
...
vllm/transformers_utils/config.py
View file @
5ffc0d13
...
...
@@ -2,7 +2,7 @@ from typing import Optional
from
transformers
import
AutoConfig
,
PretrainedConfig
from
vllm.transformers_utils.configs
import
*
# pylint: disable=wildcard-import
from
vllm.transformers_utils.configs
import
*
_CONFIG_REGISTRY
=
{
"aquila"
:
AquilaConfig
,
...
...
vllm/transformers_utils/configs/mpt.py
View file @
5ffc0d13
...
...
@@ -62,7 +62,6 @@ class MPTConfig(PretrainedConfig):
fc_type
:
str
=
'torch'
,
verbose
:
Optional
[
int
]
=
None
,
**
kwargs
:
Any
):
# pylint: disable=line-too-long
"""The MPT configuration class.
Args:
d_model (int): The size of the embedding dimension of the model.
...
...
@@ -139,10 +138,10 @@ class MPTConfig(PretrainedConfig):
self
.
init_config
=
init_config
self
.
fc_type
=
fc_type
if
verbose
is
not
None
:
warnings
.
warn
(
DeprecationWarning
(
'verbose argument for MPTConfig is now ignored and will be removed. Use python_log_level instead.'
)
)
warnings
.
warn
(
DeprecationWarning
(
'verbose argument for MPTConfig is now ignored and will be removed. Use python_log_level instead.'
),
stacklevel
=
2
)
if
'name'
in
kwargs
:
del
kwargs
[
'name'
]
if
'loss_fn'
in
kwargs
:
...
...
@@ -150,8 +149,8 @@ class MPTConfig(PretrainedConfig):
if
self
.
attn_config
.
get
(
'alibi'
,
False
):
self
.
learned_pos_emb
=
False
warnings
.
warn
(
f
'alibi is turned on, setting `learned_pos_emb` to
{
self
.
learned_pos_emb
}
`'
)
f
'alibi is turned on, setting `learned_pos_emb` to
{
self
.
learned_pos_emb
}
`'
,
stacklevel
=
2
)
super
().
__init__
(
**
kwargs
)
self
.
_validate_config
()
...
...
@@ -211,7 +210,8 @@ class MPTConfig(PretrainedConfig):
)
if
not
self
.
learned_pos_emb
and
(
not
self
.
attn_config
[
'alibi'
]):
warnings
.
warn
(
'Positional information not being provided to the model.'
)
'Positional information not being provided to the model.'
,
stacklevel
=
2
)
if
self
.
fc_type
==
'te'
or
self
.
ffn_config
[
'ffn_type'
]
==
'te_ln_mlp'
:
try
:
# pylint: disable=import-outside-toplevel
...
...
vllm/utils.py
View file @
5ffc0d13
...
...
@@ -30,7 +30,7 @@ class Counter:
def
get_max_shared_memory_bytes
(
gpu
:
int
=
0
)
->
int
:
"""Returns the maximum shared memory per thread block in bytes."""
# https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html
cudaDevAttrMaxSharedMemoryPerBlockOptin
=
97
# pylint: disable=invalid-name
cudaDevAttrMaxSharedMemoryPerBlockOptin
=
97
max_shared_mem
=
cuda_utils
.
get_device_attribute
(
cudaDevAttrMaxSharedMemoryPerBlockOptin
,
gpu
)
return
int
(
max_shared_mem
)
...
...
vllm/worker/worker.py
View file @
5ffc0d13
...
...
@@ -350,10 +350,7 @@ class Worker:
self
.
cache_engine
.
copy
(
blocks_to_copy
)
issued_cache_op
=
True
if
issued_cache_op
:
cache_events
=
self
.
cache_events
else
:
cache_events
=
None
cache_events
=
self
.
cache_events
if
issued_cache_op
else
None
# If there is no input, we don't need to execute the model.
if
not
seq_group_metadata_list
:
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment