[Bugfix] fix lora_dtype value type in arg_utils.py - part 2 (#5428)

dad961ef · Ali Panahi · GitHub · 3ac50b47 · dad961ef
Unverified Commit dad961ef authored Aug 19, 2024 by Ali Panahi Committed by GitHub Aug 19, 2024
Show whitespace changes
Inline Side-by-side

Showing with 3 additions and 1 deletion

vllm/engine/arg_utils.py vllm/engine/arg_utils.py +3 -1

No files found.
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -5,6 +5,8 @@ from dataclasses import dataclass
 from typing import (TYPE_CHECKING, Dict, List, Mapping, Optional, Tuple, Type,
                    Union)
+import torch
 import vllm.envs as envs
 from vllm.config import (CacheConfig, DecodingConfig, DeviceConfig,
                         EngineConfig, LoadConfig, LoRAConfig, ModelConfig,
@@ -113,7 +115,7 @@ class EngineArgs:
    fully_sharded_loras: bool = False
    lora_extra_vocab_size: int = 256
    long_lora_scaling_factors: Optional[Tuple[float]] = None
-    lora_dtype: str = 'auto'
+    lora_dtype: Optional[Union[str, torch.dtype]] = 'auto'
    max_cpu_loras: Optional[int] = None
    device: str = 'auto'
    num_scheduler_steps: int = 1