[Misc] Use model_redirect to redirect the model name to a local folder. (#14116)

3f532cb6 · wang.yuqi · GitHub · e6c9053f · 3f532cb6 · 3f532cb6
Unverified Commit 3f532cb6 authored Mar 27, 2025 by wang.yuqi Committed by GitHub Mar 27, 2025
Show whitespace changes
Inline Side-by-side

Showing with 50 additions and 3 deletions

vllm/config.py vllm/config.py +7 -3

vllm/envs.py vllm/envs.py +5 -0

vllm/transformers_utils/utils.py vllm/transformers_utils/utils.py +38 -0

No files found.
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -38,7 +38,7 @@ from vllm.transformers_utils.config import (
    get_sentence_transformer_tokenizer_config, is_encoder_decoder,
    try_get_generation_config, uses_mrope)
 from vllm.transformers_utils.s3_utils import S3Model
-from vllm.transformers_utils.utils import is_s3
+from vllm.transformers_utils.utils import is_s3, maybe_model_redirect
 from vllm.utils import (GiB_bytes, LayerBlockType, cuda_device_count_stateless,
                        get_cpu_memory, random_uuid, resolve_obj_by_qualname)

@@ -266,9 +266,13 @@ class ModelConfig:
        override_generation_config: Optional[dict[str, Any]] = None,
        model_impl: Union[str, ModelImpl] = ModelImpl.AUTO,
    ) -> None:
-        self.model = model
+        self.model = maybe_model_redirect(model)
+        self.tokenizer = maybe_model_redirect(tokenizer)
+
        self.hf_config_path = hf_config_path
-        self.tokenizer = tokenizer
+        if isinstance(hf_config_path, str):
+            self.hf_config_path = maybe_model_redirect(hf_config_path)
+
        self.tokenizer_mode = tokenizer_mode
        self.trust_remote_code = trust_remote_code
        self.allowed_local_media_path = allowed_local_media_path

--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -22,6 +22,7 @@ if TYPE_CHECKING:
    S3_ACCESS_KEY_ID: Optional[str] = None
    S3_SECRET_ACCESS_KEY: Optional[str] = None
    S3_ENDPOINT_URL: Optional[str] = None
+    VLLM_MODEL_REDIRECT_PATH: Optional[str] = None
    VLLM_CACHE_ROOT: str = os.path.expanduser("~/.cache/vllm")
    VLLM_CONFIG_ROOT: str = os.path.expanduser("~/.config/vllm")
    VLLM_USAGE_STATS_SERVER: str = "https://stats.vllm.ai"
@@ -635,6 +636,10 @@ environment_variables: dict[str, Callable[[], Any]] = {
    "VLLM_CI_USE_S3":
    lambda: os.environ.get("VLLM_CI_USE_S3", "0") == "1",

+    # Use model_redirect to redirect the model name to a local folder.
+    "VLLM_MODEL_REDIRECT_PATH":
+    lambda: os.environ.get("VLLM_MODEL_REDIRECT_PATH", None),
+
    # Whether to use atomicAdd reduce in gptq/awq marlin kernel.
    "VLLM_MARLIN_USE_ATOMIC_ADD":
    lambda: os.environ.get("VLLM_MARLIN_USE_ATOMIC_ADD", "0") == "1",

--- a/vllm/transformers_utils/utils.py
+++ b/vllm/transformers_utils/utils.py
 # SPDX-License-Identifier: Apache-2.0

+from functools import cache
 from os import PathLike
 from pathlib import Path
 from typing import List, Optional, Union

+from vllm.envs import VLLM_MODEL_REDIRECT_PATH
+from vllm.logger import init_logger
+
+logger = init_logger(__name__)
+

 def is_s3(model_or_path: str) -> bool:
    return model_or_path.lower().startswith('s3://')
@@ -38,3 +44,35 @@ def modelscope_list_repo_files(
        if file['Type'] == 'blob'
    ]
    return files
+
+
+@cache
+def maybe_model_redirect(model: str) -> str:
+    """
+    Use model_redirect to redirect the model name to a local folder.
+
+    :param model: hf model name
+    :return: maybe redirect to a local folder
+    """
+
+    model_redirect_path = VLLM_MODEL_REDIRECT_PATH
+
+    if not model_redirect_path:
+        return model
+
+    if not Path(model_redirect_path).exists():
+        return model
+
+    with open(model_redirect_path) as f:
+        for line in f.readlines():
+            try:
+                model_name, redirect_name = line.split("\t")
+                if model == model_name:
+                    redirect_name = redirect_name.strip()
+                    logger.info("model redirect: [ %s ] -> [ %s ]", model,
+                                redirect_name)
+                    return redirect_name
+            except Exception:
+                pass
+
+    return model