improve logging & fix vllm version

565d7274 · Lianmin Zheng · 09de730d · 565d7274 · 565d7274
Commit 565d7274 authored May 27, 2024 by Lianmin Zheng
Hide whitespace changes
Inline Side-by-side

Showing with 2 additions and 1 deletion

python/pyproject.toml python/pyproject.toml +1 -1

python/sglang/srt/managers/router/model_runner.py python/sglang/srt/managers/router/model_runner.py +1 -0

No files found.
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -20,7 +20,7 @@ dependencies = [
 [project.optional-dependencies]
 srt = ["aiohttp", "fastapi", "psutil", "rpyc", "torch", "uvloop", "uvicorn",
-       "zmq", "vllm>=0.4.2", "interegular", "pydantic", "pillow", "packaging", "huggingface_hub", "hf_transfer", "outlines>=0.0.34"]
+       "zmq", "vllm==0.4.2", "interegular", "pydantic", "pillow", "packaging", "huggingface_hub", "hf_transfer", "outlines>=0.0.34"]
 openai = ["openai>=1.0", "numpy", "tiktoken"]
 anthropic = ["anthropic>=0.20.0", "numpy"]
 all = ["sglang[srt]", "sglang[openai]", "sglang[anthropic]"]

--- a/python/sglang/srt/managers/router/model_runner.py
+++ b/python/sglang/srt/managers/router/model_runner.py
@@ -235,6 +235,7 @@ class ModelRunner:
        }
        # Init torch distributed
+        logger.info(f"[rank={self.tp_rank}] Set cuda device.")
        torch.cuda.set_device(self.tp_rank)
        logger.info(f"[rank={self.tp_rank}] Init torch begin. Avail mem={get_available_gpu_memory(self.tp_rank):.2f} GB")
        torch.distributed.init_process_group(