Make sglang compat with vllm 0.5.1 (#598)

b3868722 · Tommy Yang · GitHub · 710f614e · b3868722 · b3868722
Unverified Commit b3868722 authored Jul 09, 2024 by Tommy Yang Committed by GitHub Jul 08, 2024
4 changed files
--- a/README.md
+++ b/README.md
@@ -53,7 +53,11 @@ pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.3/
 The docker images are available on Docker Hub as [lmsysorg/sglang](https://hub.docker.com/r/lmsysorg/sglang/tags).

 ### Common Notes
- If you see errors from the Triton compiler, please install the [Triton Nightly](https://triton-lang.org/main/getting-started/installation.html).
+- If you see errors from the Triton compiler, please install the [Triton Nightly](https://triton-lang.org/main/getting-started/installation.html) by
+```
+pip uninstall -y triton triton-nightly
+pip install -U --index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/Triton-Nightly/pypi/simple/ triton-nightly
+```
 - If you cannot install FlashInfer, check out its [installation](https://docs.flashinfer.ai/installation.html#) page. If you still cannot install it, you can use the slower Triton kernels by adding `--disable-flashinfer` when launching the server.
 - If you only need to use the OpenAI backend, you can avoid installing other dependencies by using `pip install "sglang[openai]"`.


--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -21,7 +21,7 @@ dependencies = [

 [project.optional-dependencies]
 srt = ["aiohttp", "fastapi", "hf_transfer", "huggingface_hub", "interegular", "packaging", "pillow",
-       "psutil", "pydantic", "rpyc", "torch", "uvicorn", "uvloop", "zmq", "vllm==0.5.0", "outlines>=0.0.44"]
+       "psutil", "pydantic", "rpyc", "torch", "uvicorn", "uvloop", "zmq", "vllm==0.5.1", "outlines>=0.0.44"]
 openai = ["openai>=1.0", "tiktoken"]
 anthropic = ["anthropic>=0.20.0"]
 litellm = ["litellm>=1.0.0"]

--- a/python/sglang/srt/managers/controller/model_runner.py
+++ b/python/sglang/srt/managers/controller/model_runner.py
@@ -326,7 +326,7 @@ class ModelRunner:
            device_config=device_config,
            load_config=load_config,
            lora_config=None,
-            vision_language_config=None,
+            multimodal_config=None,
            parallel_config=None,
            scheduler_config=None,
            cache_config=None,

--- a/python/sglang/srt/utils.py
+++ b/python/sglang/srt/utils.py
@@ -476,7 +476,7 @@ def monkey_patch_vllm_dummy_weight_loader():
        ModelConfig,
        ParallelConfig,
        SchedulerConfig,
-        VisionLanguageConfig,
+        MultiModalConfig,
        _initialize_model,
        initialize_dummy_weights,
        nn,
@@ -489,7 +489,7 @@ def monkey_patch_vllm_dummy_weight_loader():
        model_config: ModelConfig,
        device_config: DeviceConfig,
        lora_config: Optional[LoRAConfig],
-        vision_language_config: Optional[VisionLanguageConfig],
+        multimodal_config: Optional[MultiModalConfig],
        parallel_config: ParallelConfig,
        scheduler_config: SchedulerConfig,
        cache_config: CacheConfig,
@@ -500,7 +500,7 @@ def monkey_patch_vllm_dummy_weight_loader():
                    model_config,
                    self.load_config,
                    lora_config,
-                    vision_language_config,
+                    multimodal_config,
                    cache_config,
                )