Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
6a84164a
Unverified
Commit
6a84164a
authored
Mar 01, 2025
by
Jee Jee Li
Committed by
GitHub
Mar 01, 2025
Browse files
[Bugfix] Add file lock for ModelScope download (#14060)
Signed-off-by:
Jee Jee Li
<
pandaleefree@gmail.com
>
parent
f64ffa8c
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
40 additions
and
22 deletions
+40
-22
benchmarks/backend_request_func.py
benchmarks/backend_request_func.py
+10
-5
vllm/model_executor/model_loader/loader.py
vllm/model_executor/model_loader/loader.py
+12
-8
vllm/model_executor/model_loader/weight_utils.py
vllm/model_executor/model_loader/weight_utils.py
+4
-1
vllm/transformers_utils/tokenizer.py
vllm/transformers_utils/tokenizer.py
+14
-8
No files found.
benchmarks/backend_request_func.py
View file @
6a84164a
...
...
@@ -14,6 +14,8 @@ from tqdm.asyncio import tqdm
from
transformers
import
(
AutoTokenizer
,
PreTrainedTokenizer
,
PreTrainedTokenizerFast
)
from
vllm.model_executor.model_loader.weight_utils
import
get_lock
AIOHTTP_TIMEOUT
=
aiohttp
.
ClientTimeout
(
total
=
6
*
60
*
60
)
...
...
@@ -430,12 +432,15 @@ def get_model(pretrained_model_name_or_path: str) -> str:
if
os
.
getenv
(
'VLLM_USE_MODELSCOPE'
,
'False'
).
lower
()
==
'true'
:
from
modelscope
import
snapshot_download
model_path
=
snapshot_download
(
model_id
=
pretrained_model_name_or_path
,
local_files_only
=
huggingface_hub
.
constants
.
HF_HUB_OFFLINE
,
ignore_file_pattern
=
[
".*.pt"
,
".*.safetensors"
,
".*.bin"
])
# Use file lock to prevent multiple processes from
# downloading the same model weights at the same time.
with
get_lock
(
pretrained_model_name_or_path
):
model_path
=
snapshot_download
(
model_id
=
pretrained_model_name_or_path
,
local_files_only
=
huggingface_hub
.
constants
.
HF_HUB_OFFLINE
,
ignore_file_pattern
=
[
".*.pt"
,
".*.safetensors"
,
".*.bin"
])
return
model_path
return
model_path
return
pretrained_model_name_or_path
...
...
vllm/model_executor/model_loader/loader.py
View file @
6a84164a
...
...
@@ -49,7 +49,7 @@ from vllm.model_executor.model_loader.utils import (ParamMapping,
from
vllm.model_executor.model_loader.weight_utils
import
(
download_safetensors_index_file_from_hf
,
download_weights_from_hf
,
filter_duplicate_safetensors_files
,
filter_files_not_needed_for_inference
,
get_gguf_extra_tensor_names
,
gguf_quant_weights_iterator
,
get_gguf_extra_tensor_names
,
get_lock
,
gguf_quant_weights_iterator
,
initialize_dummy_weights
,
np_cache_weights_iterator
,
pt_weights_iterator
,
runai_safetensors_weights_iterator
,
safetensors_weights_iterator
)
from
vllm.model_executor.utils
import
set_weight_attrs
...
...
@@ -235,13 +235,17 @@ class DefaultModelLoader(BaseModelLoader):
from
modelscope.hub.snapshot_download
import
snapshot_download
if
not
os
.
path
.
exists
(
model
):
model_path
=
snapshot_download
(
model_id
=
model
,
cache_dir
=
self
.
load_config
.
download_dir
,
local_files_only
=
huggingface_hub
.
constants
.
HF_HUB_OFFLINE
,
revision
=
revision
,
ignore_file_pattern
=
self
.
load_config
.
ignore_patterns
,
)
# Use file lock to prevent multiple processes from
# downloading the same model weights at the same time.
with
get_lock
(
model
,
self
.
load_config
.
download_dir
):
model_path
=
snapshot_download
(
model_id
=
model
,
cache_dir
=
self
.
load_config
.
download_dir
,
local_files_only
=
huggingface_hub
.
constants
.
HF_HUB_OFFLINE
,
revision
=
revision
,
ignore_file_pattern
=
self
.
load_config
.
ignore_patterns
,
)
else
:
model_path
=
model
return
model_path
...
...
vllm/model_executor/model_loader/weight_utils.py
View file @
6a84164a
...
...
@@ -8,6 +8,7 @@ import os
import
tempfile
import
time
from
collections
import
defaultdict
from
pathlib
import
Path
from
typing
import
Any
,
Callable
,
Dict
,
Generator
,
List
,
Optional
,
Tuple
,
Union
import
filelock
...
...
@@ -67,8 +68,10 @@ class DisabledTqdm(tqdm):
super
().
__init__
(
*
args
,
**
kwargs
,
disable
=
True
)
def
get_lock
(
model_name_or_path
:
str
,
cache_dir
:
Optional
[
str
]
=
None
):
def
get_lock
(
model_name_or_path
:
Union
[
str
,
Path
],
cache_dir
:
Optional
[
str
]
=
None
):
lock_dir
=
cache_dir
or
temp_dir
model_name_or_path
=
str
(
model_name_or_path
)
os
.
makedirs
(
os
.
path
.
dirname
(
lock_dir
),
exist_ok
=
True
)
model_name
=
model_name_or_path
.
replace
(
"/"
,
"-"
)
hash_name
=
hashlib
.
sha256
(
model_name
.
encode
()).
hexdigest
()
...
...
vllm/transformers_utils/tokenizer.py
View file @
6a84164a
...
...
@@ -150,16 +150,22 @@ def get_tokenizer(
# pylint: disable=C.
from
modelscope.hub.snapshot_download
import
snapshot_download
# avoid circuit import
from
vllm.model_executor.model_loader.weight_utils
import
get_lock
# Only set the tokenizer here, model will be downloaded on the workers.
if
not
os
.
path
.
exists
(
tokenizer_name
):
tokenizer_path
=
snapshot_download
(
model_id
=
tokenizer_name
,
cache_dir
=
download_dir
,
revision
=
revision
,
local_files_only
=
huggingface_hub
.
constants
.
HF_HUB_OFFLINE
,
# Ignore weights - we only need the tokenizer.
ignore_file_pattern
=
[
".*.pt"
,
".*.safetensors"
,
".*.bin"
])
tokenizer_name
=
tokenizer_path
# Use file lock to prevent multiple processes from
# downloading the same file at the same time.
with
get_lock
(
tokenizer_name
,
download_dir
):
tokenizer_path
=
snapshot_download
(
model_id
=
tokenizer_name
,
cache_dir
=
download_dir
,
revision
=
revision
,
local_files_only
=
huggingface_hub
.
constants
.
HF_HUB_OFFLINE
,
# Ignore weights - we only need the tokenizer.
ignore_file_pattern
=
[
".*.pt"
,
".*.safetensors"
,
".*.bin"
])
tokenizer_name
=
tokenizer_path
if
tokenizer_mode
==
"slow"
:
if
kwargs
.
get
(
"use_fast"
,
False
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment