Auto-detect device if not specified in server arguments. (#4423)

bf63ee54 · vikram singh shekhawat · GitHub · 22c96f78 · bf63ee54
Unverified Commit bf63ee54 authored Mar 16, 2025 by vikram singh shekhawat Committed by GitHub Mar 15, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 7 additions and 4 deletions

python/sglang/srt/server_args.py python/sglang/srt/server_args.py +7 -4

No files found.
--- a/python/sglang/srt/server_args.py
+++ b/python/sglang/srt/server_args.py
@@ -24,6 +24,7 @@ from sglang.srt.hf_transformers_utils import check_gguf_file
 from sglang.srt.reasoning_parser import ReasoningParser
 from sglang.srt.utils import (
    get_amdgpu_memory_capacity,
+    get_device,
    get_hpu_memory_capacity,
    get_nvgpu_memory_capacity,
    is_cuda,
@@ -52,7 +53,7 @@ class ServerArgs:
    quantization: Optional[str] = None
    quantization_param_path: nullable_str = None
    context_length: Optional[int] = None
-    device: str = "cuda"
+    device: Optional[str] = None
    served_model_name: Optional[str] = None
    chat_template: Optional[str] = None
    is_embedding: bool = False
@@ -185,6 +186,9 @@ class ServerArgs:
        if self.tokenizer_path is None:
            self.tokenizer_path = self.model_path
+        if self.device is None:
+            self.device = get_device()
        if self.served_model_name is None:
            self.served_model_name = self.model_path
@@ -435,9 +439,8 @@ class ServerArgs:
        parser.add_argument(
            "--device",
            type=str,
-            default="cuda",
+            default=ServerArgs.device,
-            choices=["cuda", "xpu", "hpu", "cpu"],
+            help="The device to use ('cuda', 'xpu', 'hpu', 'cpu'). Defaults to auto-detection if not specified.",
-            help="The device type.",
        )
        parser.add_argument(
            "--served-model-name",