Unverified Commit 6a84164a authored by Jee Jee Li's avatar Jee Jee Li Committed by GitHub
Browse files

[Bugfix] Add file lock for ModelScope download (#14060)


Signed-off-by: default avatarJee Jee Li <pandaleefree@gmail.com>
parent f64ffa8c
...@@ -14,6 +14,8 @@ from tqdm.asyncio import tqdm ...@@ -14,6 +14,8 @@ from tqdm.asyncio import tqdm
from transformers import (AutoTokenizer, PreTrainedTokenizer, from transformers import (AutoTokenizer, PreTrainedTokenizer,
PreTrainedTokenizerFast) PreTrainedTokenizerFast)
from vllm.model_executor.model_loader.weight_utils import get_lock
AIOHTTP_TIMEOUT = aiohttp.ClientTimeout(total=6 * 60 * 60) AIOHTTP_TIMEOUT = aiohttp.ClientTimeout(total=6 * 60 * 60)
...@@ -430,6 +432,9 @@ def get_model(pretrained_model_name_or_path: str) -> str: ...@@ -430,6 +432,9 @@ def get_model(pretrained_model_name_or_path: str) -> str:
if os.getenv('VLLM_USE_MODELSCOPE', 'False').lower() == 'true': if os.getenv('VLLM_USE_MODELSCOPE', 'False').lower() == 'true':
from modelscope import snapshot_download from modelscope import snapshot_download
# Use file lock to prevent multiple processes from
# downloading the same model weights at the same time.
with get_lock(pretrained_model_name_or_path):
model_path = snapshot_download( model_path = snapshot_download(
model_id=pretrained_model_name_or_path, model_id=pretrained_model_name_or_path,
local_files_only=huggingface_hub.constants.HF_HUB_OFFLINE, local_files_only=huggingface_hub.constants.HF_HUB_OFFLINE,
......
...@@ -49,7 +49,7 @@ from vllm.model_executor.model_loader.utils import (ParamMapping, ...@@ -49,7 +49,7 @@ from vllm.model_executor.model_loader.utils import (ParamMapping,
from vllm.model_executor.model_loader.weight_utils import ( from vllm.model_executor.model_loader.weight_utils import (
download_safetensors_index_file_from_hf, download_weights_from_hf, download_safetensors_index_file_from_hf, download_weights_from_hf,
filter_duplicate_safetensors_files, filter_files_not_needed_for_inference, filter_duplicate_safetensors_files, filter_files_not_needed_for_inference,
get_gguf_extra_tensor_names, gguf_quant_weights_iterator, get_gguf_extra_tensor_names, get_lock, gguf_quant_weights_iterator,
initialize_dummy_weights, np_cache_weights_iterator, pt_weights_iterator, initialize_dummy_weights, np_cache_weights_iterator, pt_weights_iterator,
runai_safetensors_weights_iterator, safetensors_weights_iterator) runai_safetensors_weights_iterator, safetensors_weights_iterator)
from vllm.model_executor.utils import set_weight_attrs from vllm.model_executor.utils import set_weight_attrs
...@@ -235,10 +235,14 @@ class DefaultModelLoader(BaseModelLoader): ...@@ -235,10 +235,14 @@ class DefaultModelLoader(BaseModelLoader):
from modelscope.hub.snapshot_download import snapshot_download from modelscope.hub.snapshot_download import snapshot_download
if not os.path.exists(model): if not os.path.exists(model):
# Use file lock to prevent multiple processes from
# downloading the same model weights at the same time.
with get_lock(model, self.load_config.download_dir):
model_path = snapshot_download( model_path = snapshot_download(
model_id=model, model_id=model,
cache_dir=self.load_config.download_dir, cache_dir=self.load_config.download_dir,
local_files_only=huggingface_hub.constants.HF_HUB_OFFLINE, local_files_only=huggingface_hub.constants.
HF_HUB_OFFLINE,
revision=revision, revision=revision,
ignore_file_pattern=self.load_config.ignore_patterns, ignore_file_pattern=self.load_config.ignore_patterns,
) )
......
...@@ -8,6 +8,7 @@ import os ...@@ -8,6 +8,7 @@ import os
import tempfile import tempfile
import time import time
from collections import defaultdict from collections import defaultdict
from pathlib import Path
from typing import Any, Callable, Dict, Generator, List, Optional, Tuple, Union from typing import Any, Callable, Dict, Generator, List, Optional, Tuple, Union
import filelock import filelock
...@@ -67,8 +68,10 @@ class DisabledTqdm(tqdm): ...@@ -67,8 +68,10 @@ class DisabledTqdm(tqdm):
super().__init__(*args, **kwargs, disable=True) super().__init__(*args, **kwargs, disable=True)
def get_lock(model_name_or_path: str, cache_dir: Optional[str] = None): def get_lock(model_name_or_path: Union[str, Path],
cache_dir: Optional[str] = None):
lock_dir = cache_dir or temp_dir lock_dir = cache_dir or temp_dir
model_name_or_path = str(model_name_or_path)
os.makedirs(os.path.dirname(lock_dir), exist_ok=True) os.makedirs(os.path.dirname(lock_dir), exist_ok=True)
model_name = model_name_or_path.replace("/", "-") model_name = model_name_or_path.replace("/", "-")
hash_name = hashlib.sha256(model_name.encode()).hexdigest() hash_name = hashlib.sha256(model_name.encode()).hexdigest()
......
...@@ -150,8 +150,14 @@ def get_tokenizer( ...@@ -150,8 +150,14 @@ def get_tokenizer(
# pylint: disable=C. # pylint: disable=C.
from modelscope.hub.snapshot_download import snapshot_download from modelscope.hub.snapshot_download import snapshot_download
# avoid circuit import
from vllm.model_executor.model_loader.weight_utils import get_lock
# Only set the tokenizer here, model will be downloaded on the workers. # Only set the tokenizer here, model will be downloaded on the workers.
if not os.path.exists(tokenizer_name): if not os.path.exists(tokenizer_name):
# Use file lock to prevent multiple processes from
# downloading the same file at the same time.
with get_lock(tokenizer_name, download_dir):
tokenizer_path = snapshot_download( tokenizer_path = snapshot_download(
model_id=tokenizer_name, model_id=tokenizer_name,
cache_dir=download_dir, cache_dir=download_dir,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment