Unverified Commit 4377b1ae authored by pwschuurman's avatar pwschuurman Committed by GitHub
Browse files

[Bugfix] Update Run:AI Model Streamer Loading Integration (#23845)


Signed-off-by: default avatarOmer Dayan (SW-GPU) <omer@run.ai>
Signed-off-by: default avatarPeter Schuurman <psch@google.com>
Co-authored-by: default avatarOmer Dayan (SW-GPU) <omer@run.ai>
Co-authored-by: default avatarCyrus Leung <tlleungac@connect.ust.hk>
parent 009d689b
...@@ -656,8 +656,10 @@ setup( ...@@ -656,8 +656,10 @@ setup(
"bench": ["pandas", "datasets"], "bench": ["pandas", "datasets"],
"tensorizer": ["tensorizer==2.10.1"], "tensorizer": ["tensorizer==2.10.1"],
"fastsafetensors": ["fastsafetensors >= 0.1.10"], "fastsafetensors": ["fastsafetensors >= 0.1.10"],
"runai": "runai": [
["runai-model-streamer >= 0.13.3", "runai-model-streamer-s3", "boto3"], "runai-model-streamer >= 0.14.0", "runai-model-streamer-gcs",
"google-cloud-storage", "runai-model-streamer-s3", "boto3"
],
"audio": ["librosa", "soundfile", "audio": ["librosa", "soundfile",
"mistral_common[audio]"], # Required for audio processing "mistral_common[audio]"], # Required for audio processing
"video": [], # Kept for backwards compatibility "video": [], # Kept for backwards compatibility
......
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import glob
import os
import tempfile
import huggingface_hub.constants
from vllm.model_executor.model_loader.weight_utils import (
download_weights_from_hf)
from vllm.transformers_utils.runai_utils import (is_runai_obj_uri,
list_safetensors)
def test_is_runai_obj_uri():
assert is_runai_obj_uri("gs://some-gcs-bucket/path")
assert is_runai_obj_uri("s3://some-s3-bucket/path")
assert not is_runai_obj_uri("nfs://some-nfs-path")
def test_runai_list_safetensors_local():
with tempfile.TemporaryDirectory() as tmpdir:
huggingface_hub.constants.HF_HUB_OFFLINE = False
download_weights_from_hf("openai-community/gpt2",
allow_patterns=["*.safetensors", "*.json"],
cache_dir=tmpdir)
safetensors = glob.glob(f"{tmpdir}/**/*.safetensors", recursive=True)
assert len(safetensors) > 0
parentdir = [
os.path.dirname(safetensor) for safetensor in safetensors
][0]
files = list_safetensors(parentdir)
assert len(safetensors) == len(files)
if __name__ == "__main__":
test_is_runai_obj_uri()
test_runai_list_safetensors_local()
...@@ -48,8 +48,9 @@ from vllm.transformers_utils.config import ( ...@@ -48,8 +48,9 @@ from vllm.transformers_utils.config import (
is_interleaved, maybe_override_with_speculators_target_model, is_interleaved, maybe_override_with_speculators_target_model,
try_get_generation_config, try_get_safetensors_metadata, try_get_generation_config, try_get_safetensors_metadata,
try_get_tokenizer_config, uses_mrope) try_get_tokenizer_config, uses_mrope)
from vllm.transformers_utils.s3_utils import S3Model from vllm.transformers_utils.runai_utils import (ObjectStorageModel,
from vllm.transformers_utils.utils import is_s3, maybe_model_redirect is_runai_obj_uri)
from vllm.transformers_utils.utils import maybe_model_redirect
from vllm.utils import (DEFAULT_MAX_NUM_BATCHED_TOKENS, from vllm.utils import (DEFAULT_MAX_NUM_BATCHED_TOKENS,
STR_DUAL_CHUNK_FLASH_ATTN_VAL, LayerBlockType, STR_DUAL_CHUNK_FLASH_ATTN_VAL, LayerBlockType,
LazyLoader, common_broadcastable_dtype, random_uuid) LazyLoader, common_broadcastable_dtype, random_uuid)
...@@ -556,15 +557,6 @@ class ModelConfig: ...@@ -556,15 +557,6 @@ class ModelConfig:
"affect the random state of the Python process that " "affect the random state of the Python process that "
"launched vLLM.", self.seed) "launched vLLM.", self.seed)
if self.runner != "draft":
# If we're not running the draft model, check for speculators config
# If speculators config, set model / tokenizer to be target model
self.model, self.tokenizer = maybe_override_with_speculators_target_model( # noqa: E501
model=self.model,
tokenizer=self.tokenizer,
revision=self.revision,
trust_remote_code=self.trust_remote_code)
# Keep set served_model_name before maybe_model_redirect(self.model) # Keep set served_model_name before maybe_model_redirect(self.model)
self.served_model_name = get_served_model_name(self.model, self.served_model_name = get_served_model_name(self.model,
self.served_model_name) self.served_model_name)
...@@ -603,7 +595,16 @@ class ModelConfig: ...@@ -603,7 +595,16 @@ class ModelConfig:
f"'Please instead use `--hf-overrides '{hf_overrides_str}'`") f"'Please instead use `--hf-overrides '{hf_overrides_str}'`")
warnings.warn(DeprecationWarning(msg), stacklevel=2) warnings.warn(DeprecationWarning(msg), stacklevel=2)
self.maybe_pull_model_tokenizer_for_s3(self.model, self.tokenizer) self.maybe_pull_model_tokenizer_for_runai(self.model, self.tokenizer)
if self.runner != "draft":
# If we're not running the draft model, check for speculators config
# If speculators config, set model / tokenizer to be target model
self.model, self.tokenizer = maybe_override_with_speculators_target_model( # noqa: E501
model=self.model,
tokenizer=self.tokenizer,
revision=self.revision,
trust_remote_code=self.trust_remote_code)
if (backend := envs.VLLM_ATTENTION_BACKEND if (backend := envs.VLLM_ATTENTION_BACKEND
) and backend == "FLASHINFER" and find_spec("flashinfer") is None: ) and backend == "FLASHINFER" and find_spec("flashinfer") is None:
...@@ -832,41 +833,42 @@ class ModelConfig: ...@@ -832,41 +833,42 @@ class ModelConfig:
"""The architecture vllm actually used.""" """The architecture vllm actually used."""
return self._architecture return self._architecture
def maybe_pull_model_tokenizer_for_s3(self, model: str, def maybe_pull_model_tokenizer_for_runai(self, model: str,
tokenizer: str) -> None: tokenizer: str) -> None:
"""Pull model/tokenizer from S3 to temporary directory when needed. """Pull model/tokenizer from Object Storage to temporary
directory when needed.
Args: Args:
model: Model name or path model: Model name or path
tokenizer: Tokenizer name or path tokenizer: Tokenizer name or path
""" """
if not (is_s3(model) or is_s3(tokenizer)): if not (is_runai_obj_uri(model) or is_runai_obj_uri(tokenizer)):
return return
if is_s3(model): if is_runai_obj_uri(model):
s3_model = S3Model() object_storage_model = ObjectStorageModel()
s3_model.pull_files(model, object_storage_model.pull_files(
allow_pattern=["*.model", "*.py", "*.json"]) model, allow_pattern=["*.model", "*.py", "*.json"])
self.model_weights = model self.model_weights = model
self.model = s3_model.dir self.model = object_storage_model.dir
# If tokenizer is same as model, download to same directory # If tokenizer is same as model, download to same directory
if model == tokenizer: if model == tokenizer:
s3_model.pull_files(model, object_storage_model.pull_files(model,
ignore_pattern=[ ignore_pattern=[
"*.pt", "*.safetensors", "*.bin", "*.pt", "*.safetensors",
"*.tensors" "*.bin", "*.tensors"
]) ])
self.tokenizer = s3_model.dir self.tokenizer = object_storage_model.dir
return return
# Only download tokenizer if needed and not already handled # Only download tokenizer if needed and not already handled
if is_s3(tokenizer): if is_runai_obj_uri(tokenizer):
s3_tokenizer = S3Model() object_storage_tokenizer = ObjectStorageModel()
s3_tokenizer.pull_files( object_storage_tokenizer.pull_files(
model, model,
ignore_pattern=["*.pt", "*.safetensors", "*.bin", "*.tensors"]) ignore_pattern=["*.pt", "*.safetensors", "*.bin", "*.tensors"])
self.tokenizer = s3_tokenizer.dir self.tokenizer = object_storage_tokenizer.dir
def _init_multimodal_config(self) -> Optional["MultiModalConfig"]: def _init_multimodal_config(self) -> Optional["MultiModalConfig"]:
if self._model_info.supports_multimodal: if self._model_info.supports_multimodal:
......
...@@ -1053,9 +1053,10 @@ class EngineArgs: ...@@ -1053,9 +1053,10 @@ class EngineArgs:
SpeculatorsConfig) SpeculatorsConfig)
if self.speculative_config is None: if self.speculative_config is None:
hf_config = get_config(self.hf_config_path or self.model, hf_config = get_config(
self.trust_remote_code, self.revision, self.hf_config_path or target_model_config.model,
self.code_revision, self.config_format) self.trust_remote_code, self.revision, self.code_revision,
self.config_format)
# if loading a SpeculatorsConfig, load the speculative_config # if loading a SpeculatorsConfig, load the speculative_config
# details from the config directly # details from the config directly
...@@ -1065,7 +1066,7 @@ class EngineArgs: ...@@ -1065,7 +1066,7 @@ class EngineArgs:
self.speculative_config = {} self.speculative_config = {}
self.speculative_config[ self.speculative_config[
"num_speculative_tokens"] = hf_config.num_lookahead_tokens "num_speculative_tokens"] = hf_config.num_lookahead_tokens
self.speculative_config["model"] = self.model self.speculative_config["model"] = target_model_config.model
self.speculative_config["method"] = hf_config.method self.speculative_config["method"] = hf_config.method
else: else:
return None return None
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# ruff: noqa: SIM117 # ruff: noqa: SIM117
import glob
import os import os
from collections.abc import Generator from collections.abc import Generator
from typing import Optional from typing import Optional
...@@ -15,8 +14,8 @@ from vllm.model_executor.model_loader.base_loader import BaseModelLoader ...@@ -15,8 +14,8 @@ from vllm.model_executor.model_loader.base_loader import BaseModelLoader
from vllm.model_executor.model_loader.weight_utils import ( from vllm.model_executor.model_loader.weight_utils import (
download_safetensors_index_file_from_hf, download_weights_from_hf, download_safetensors_index_file_from_hf, download_weights_from_hf,
runai_safetensors_weights_iterator) runai_safetensors_weights_iterator)
from vllm.transformers_utils.s3_utils import glob as s3_glob from vllm.transformers_utils.runai_utils import (is_runai_obj_uri,
from vllm.transformers_utils.utils import is_s3 list_safetensors)
class RunaiModelStreamerLoader(BaseModelLoader): class RunaiModelStreamerLoader(BaseModelLoader):
...@@ -53,27 +52,22 @@ class RunaiModelStreamerLoader(BaseModelLoader): ...@@ -53,27 +52,22 @@ class RunaiModelStreamerLoader(BaseModelLoader):
If the model is not local, it will be downloaded.""" If the model is not local, it will be downloaded."""
is_s3_path = is_s3(model_name_or_path) is_object_storage_path = is_runai_obj_uri(model_name_or_path)
is_local = os.path.isdir(model_name_or_path) is_local = os.path.isdir(model_name_or_path)
safetensors_pattern = "*.safetensors" safetensors_pattern = "*.safetensors"
index_file = SAFE_WEIGHTS_INDEX_NAME index_file = SAFE_WEIGHTS_INDEX_NAME
hf_folder = (model_name_or_path if hf_folder = (model_name_or_path if (is_local or is_object_storage_path)
(is_local or is_s3_path) else download_weights_from_hf( else download_weights_from_hf(
model_name_or_path, model_name_or_path,
self.load_config.download_dir, self.load_config.download_dir,
[safetensors_pattern], [safetensors_pattern],
revision, revision,
ignore_patterns=self.load_config.ignore_patterns, ignore_patterns=self.load_config.ignore_patterns,
)) ))
if is_s3_path: hf_weights_files = list_safetensors(path=hf_folder)
hf_weights_files = s3_glob(path=hf_folder,
allow_pattern=[safetensors_pattern]) if not is_local and not is_object_storage_path:
else:
hf_weights_files = glob.glob(
os.path.join(hf_folder, safetensors_pattern))
if not is_local and not is_s3_path:
download_safetensors_index_file_from_hf( download_safetensors_index_file_from_hf(
model_name_or_path, index_file, self.load_config.download_dir, model_name_or_path, index_file, self.load_config.download_dir,
revision) revision)
......
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import os
import shutil
import signal
import tempfile
from typing import Optional
from vllm.logger import init_logger
from vllm.utils import PlaceholderModule
logger = init_logger(__name__)
SUPPORTED_SCHEMES = ['s3://', 'gs://']
try:
from runai_model_streamer import list_safetensors as runai_list_safetensors
from runai_model_streamer import pull_files as runai_pull_files
except (ImportError, OSError):
# see https://github.com/run-ai/runai-model-streamer/issues/26
# OSError will be raised on arm64 platform
runai_model_streamer = PlaceholderModule(
"runai_model_streamer") # type: ignore[assignment]
runai_pull_files = runai_model_streamer.placeholder_attr("pull_files")
runai_list_safetensors = runai_model_streamer.placeholder_attr(
"list_safetensors")
def list_safetensors(path: str = "") -> list[str]:
"""
List full file names from object path and filter by allow pattern.
Args:
path: The object storage path to list from.
allow_pattern: A list of patterns of which files to pull.
Returns:
list[str]: List of full object storage paths allowed by the pattern
"""
return runai_list_safetensors(path)
def is_runai_obj_uri(model_or_path: str) -> bool:
return model_or_path.lower().startswith(tuple(SUPPORTED_SCHEMES))
class ObjectStorageModel:
"""
A class representing an ObjectStorage model mirrored into a
temporary directory.
Attributes:
dir: The temporary created directory.
Methods:
pull_files(): Pull model from object storage to the temporary
directory.
"""
def __init__(self) -> None:
for sig in (signal.SIGINT, signal.SIGTERM):
existing_handler = signal.getsignal(sig)
signal.signal(sig, self._close_by_signal(existing_handler))
self.dir = tempfile.mkdtemp()
def __del__(self):
self._close()
def _close(self) -> None:
if os.path.exists(self.dir):
shutil.rmtree(self.dir)
def _close_by_signal(self, existing_handler=None):
def new_handler(signum, frame):
self._close()
if existing_handler:
existing_handler(signum, frame)
return new_handler
def pull_files(self,
model_path: str = "",
allow_pattern: Optional[list[str]] = None,
ignore_pattern: Optional[list[str]] = None) -> None:
"""
Pull files from object storage into the temporary directory.
Args:
model_path: The object storage path of the model.
allow_pattern: A list of patterns of which files to pull.
ignore_pattern: A list of patterns of which files not to pull.
"""
if not model_path.endswith("/"):
model_path = model_path + "/"
runai_pull_files(model_path, self.dir, allow_pattern, ignore_pattern)
...@@ -2,11 +2,6 @@ ...@@ -2,11 +2,6 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import fnmatch import fnmatch
import os
import shutil
import signal
import tempfile
from pathlib import Path
from typing import Optional from typing import Optional
from vllm.utils import PlaceholderModule from vllm.utils import PlaceholderModule
...@@ -93,70 +88,3 @@ def list_files( ...@@ -93,70 +88,3 @@ def list_files(
paths = _filter_ignore(paths, ignore_pattern) paths = _filter_ignore(paths, ignore_pattern)
return bucket_name, prefix, paths return bucket_name, prefix, paths
class S3Model:
"""
A class representing a S3 model mirrored into a temporary directory.
Attributes:
s3: S3 client.
dir: The temporary created directory.
Methods:
pull_files(): Pull model from S3 to the temporary directory.
"""
def __init__(self) -> None:
self.s3 = boto3.client('s3')
for sig in (signal.SIGINT, signal.SIGTERM):
existing_handler = signal.getsignal(sig)
signal.signal(sig, self._close_by_signal(existing_handler))
self.dir = tempfile.mkdtemp()
def __del__(self):
self._close()
def _close(self) -> None:
if os.path.exists(self.dir):
shutil.rmtree(self.dir)
def _close_by_signal(self, existing_handler=None):
def new_handler(signum, frame):
self._close()
if existing_handler:
existing_handler(signum, frame)
return new_handler
def pull_files(self,
s3_model_path: str = "",
allow_pattern: Optional[list[str]] = None,
ignore_pattern: Optional[list[str]] = None) -> None:
"""
Pull files from S3 storage into the temporary directory.
Args:
s3_model_path: The S3 path of the model.
allow_pattern: A list of patterns of which files to pull.
ignore_pattern: A list of patterns of which files not to pull.
"""
if not s3_model_path.endswith("/"):
s3_model_path = s3_model_path + "/"
bucket_name, base_dir, files = list_files(self.s3, s3_model_path,
allow_pattern,
ignore_pattern)
if len(files) == 0:
return
for file in files:
destination_file = os.path.join(
self.dir,
file.removeprefix(base_dir).lstrip("/"))
local_dir = Path(destination_file).parent
os.makedirs(local_dir, exist_ok=True)
self.s3.download_file(bucket_name, file, destination_file)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment