[Bugfix] Use LoadFormat values for `vllm serve --load-format` (#7784)

15310b51 · Michael Goin · GitHub · 57792ed4 · 15310b51
Unverified Commit 15310b51 authored Aug 22, 2024 by Michael Goin Committed by GitHub Aug 22, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 3 additions and 6 deletions

vllm/engine/arg_utils.py vllm/engine/arg_utils.py +3 -6

No files found.
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -9,8 +9,8 @@ import torch

 import vllm.envs as envs
 from vllm.config import (CacheConfig, DecodingConfig, DeviceConfig,
-                         EngineConfig, LoadConfig, LoRAConfig, ModelConfig,
-                         ObservabilityConfig, ParallelConfig,
+                         EngineConfig, LoadConfig, LoadFormat, LoRAConfig,
+                         ModelConfig, ObservabilityConfig, ParallelConfig,
                         PromptAdapterConfig, SchedulerConfig,
                         SpeculativeConfig, TokenizerPoolConfig)
 from vllm.executor.executor_base import ExecutorBase
@@ -214,10 +214,7 @@ class EngineArgs:
            '--load-format',
            type=str,
            default=EngineArgs.load_format,
-            choices=[
-                'auto', 'pt', 'safetensors', 'npcache', 'dummy', 'tensorizer',
-                'bitsandbytes'
-            ],
+            choices=[f.value for f in LoadFormat],
            help='The format of the model weights to load.\n\n'
            '* "auto" will try to load the weights in the safetensors format '
            'and fall back to the pytorch bin format if safetensors format '