[Platform] add pre_register_and_update function (#12432)

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>

[Platform] add pre_register_and_update function (#12432)
Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
2e3b969e · wangxiyuan · GitHub · da317197 · 2e3b969e · 2e3b969e
Unverified Commit 2e3b969e authored Feb 11, 2025 by wangxiyuan Committed by GitHub Feb 11, 2025
Show whitespace changes
Inline Side-by-side

Showing with 41 additions and 1 deletion

vllm/config.py vllm/config.py +2 -1

vllm/engine/arg_utils.py vllm/engine/arg_utils.py +21 -0

vllm/platforms/interface.py vllm/platforms/interface.py +18 -0

No files found.
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -3057,7 +3057,8 @@ class VllmConfig:
    kv_transfer_config: KVTransferConfig = field(default=None,
                                                 init=True)  # type: ignore
    # some opaque config, only used to provide additional information
-    # for the hash computation, mainly used for testing and debugging.
+    # for the hash computation, mainly used for testing, debugging or out of
+    # tree config registration.
    additional_config: SupportsHash = field(default=None,
                                            init=True)  # type: ignore
    instance_id: str = ""

--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -20,6 +20,7 @@ from vllm.config import (CacheConfig, CompilationConfig, ConfigFormat,
 from vllm.executor.executor_base import ExecutorBase
 from vllm.logger import init_logger
 from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS
+from vllm.plugins import load_general_plugins
 from vllm.transformers_utils.utils import check_gguf_file
 from vllm.usage.usage_lib import UsageContext
 from vllm.utils import FlexibleArgumentParser, StoreBoolean
@@ -203,6 +204,8 @@ class EngineArgs:
    calculate_kv_scales: Optional[bool] = None
+    additional_config: Optional[Dict[str, Any]] = None
    def __post_init__(self):
        if not self.tokenizer:
            self.tokenizer = self.model
@@ -984,6 +987,14 @@ class EngineArgs:
            'be loaded from the model checkpoint if available. '
            'Otherwise, the scales will default to 1.0.')
+        parser.add_argument(
+            "--additional-config",
+            type=json.loads,
+            default=None,
+            help="Additional config for specified platform in JSON format. "
+            "Different platforms may support different configs. Make sure the "
+            "configs are valid for the platform you are using. The input format"
+            " is like '{\"config_key\":\"config_value\"}'")
        return parser
    @classmethod
@@ -1044,6 +1055,9 @@ class EngineArgs:
    def create_engine_config(self,
                             usage_context: Optional[UsageContext] = None
                             ) -> VllmConfig:
+        from vllm.platforms import current_platform
+        current_platform.pre_register_and_update()
        if envs.VLLM_USE_V1:
            self._override_v1_engine_args(usage_context)
@@ -1287,6 +1301,7 @@ class EngineArgs:
            prompt_adapter_config=prompt_adapter_config,
            compilation_config=self.compilation_config,
            kv_transfer_config=self.kv_transfer_config,
+            additional_config=self.additional_config,
        )
        if envs.VLLM_USE_V1:
@@ -1347,6 +1362,12 @@ class AsyncEngineArgs(EngineArgs):
        parser.add_argument('--disable-log-requests',
                            action='store_true',
                            help='Disable logging requests.')
+        # Initialize plugin to update the parser, for example, The plugin may
+        # adding a new kind of quantization method to --quantization argument or
+        # a new device to --device argument.
+        load_general_plugins()
+        from vllm.platforms import current_platform
+        current_platform.pre_register_and_update(parser)
        return parser

--- a/vllm/platforms/interface.py
+++ b/vllm/platforms/interface.py
@@ -13,8 +13,10 @@ from vllm.logger import init_logger
 if TYPE_CHECKING:
    from vllm.config import VllmConfig
+    from vllm.utils import FlexibleArgumentParser
 else:
    VllmConfig = None
+    FlexibleArgumentParser = None
 logger = init_logger(__name__)
@@ -223,6 +225,22 @@ class Platform:
            np.random.seed(seed)
            torch.manual_seed(seed)
+    @classmethod
+    def pre_register_and_update(cls,
+                                parser: Optional[FlexibleArgumentParser] = None
+                                ) -> None:
+        """
+        Do some pre-registeration or update action for the current platform.
+        This function is called before global VllmConfig is initialized or cli
+        arguments are parsed. It's used for out-of-tree platforms to register or
+        update the configuration.
+        For example, the out-of-tree quantization config can be imported and
+        registered here dynamically.
+        """
+        pass
    @classmethod
    def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
        """