[Doc] add load_format items in docs (#14804)

Signed-off-by: wwl2755 <wangwenlong2755@gmail.com>

[Doc] add load_format items in docs (#14804)
Signed-off-by: wwl2755 <wangwenlong2755@gmail.com>
1c2bec0f · wwl2755 · GitHub · ec870fba · 1c2bec0f · 1c2bec0f
Unverified Commit 1c2bec0f authored Mar 22, 2025 by wwl2755 Committed by GitHub Mar 21, 2025
Show whitespace changes
Inline Side-by-side

Showing with 14 additions and 2 deletions

vllm/config.py vllm/config.py +6 -0

vllm/engine/arg_utils.py vllm/engine/arg_utils.py +8 -2

No files found.
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -1294,6 +1294,12 @@ class LoadConfig:
            "tensorizer" will use CoreWeave's tensorizer library for
                fast weight loading.
            "bitsandbytes" will load nf4 type weights.
+            "sharded_state" will load weights from pre-sharded checkpoint files,
+                supporting efficient loading of tensor-parallel models.
+            "gguf" will load weights from GGUF format files.
+            "mistral" will load weights from consolidated safetensors files used
+                by Mistral models.
+            "runai_streamer" will load weights from RunAI streamer format files.
        model_loader_extra_config: The extra config for the model loader.
        ignore_patterns: The list of patterns to ignore when loading the model.
            Default to "original/**/*" to avoid repeated loading of llama's

--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -339,9 +339,15 @@ class EngineArgs:
            'CoreWeave. See the Tensorize vLLM Model script in the Examples '
            'section for more information.\n'
            '* "runai_streamer" will load the Safetensors weights using Run:ai'
-            'Model Streamer \n'
+            'Model Streamer.\n'
            '* "bitsandbytes" will load the weights using bitsandbytes '
-            'quantization.\n')
+            'quantization.\n'
+            '* "sharded_state" will load weights from pre-sharded checkpoint '
+            'files, supporting efficient loading of tensor-parallel models\n'
+            '* "gguf" will load weights from GGUF format files (details '
+            'specified in https://github.com/ggml-org/ggml/blob/master/docs/gguf.md).\n'
+            '* "mistral" will load weights from consolidated safetensors files '
+            'used by Mistral models.\n')
        parser.add_argument(
            '--config-format',
            default=EngineArgs.config_format,