Unverified Commit b3868722 authored by Tommy Yang's avatar Tommy Yang Committed by GitHub
Browse files

Make sglang compat with vllm 0.5.1 (#598)

parent 710f614e
......@@ -53,7 +53,11 @@ pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.3/
The docker images are available on Docker Hub as [lmsysorg/sglang](https://hub.docker.com/r/lmsysorg/sglang/tags).
### Common Notes
- If you see errors from the Triton compiler, please install the [Triton Nightly](https://triton-lang.org/main/getting-started/installation.html).
- If you see errors from the Triton compiler, please install the [Triton Nightly](https://triton-lang.org/main/getting-started/installation.html) by
```
pip uninstall -y triton triton-nightly
pip install -U --index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/Triton-Nightly/pypi/simple/ triton-nightly
```
- If you cannot install FlashInfer, check out its [installation](https://docs.flashinfer.ai/installation.html#) page. If you still cannot install it, you can use the slower Triton kernels by adding `--disable-flashinfer` when launching the server.
- If you only need to use the OpenAI backend, you can avoid installing other dependencies by using `pip install "sglang[openai]"`.
......
......@@ -21,7 +21,7 @@ dependencies = [
[project.optional-dependencies]
srt = ["aiohttp", "fastapi", "hf_transfer", "huggingface_hub", "interegular", "packaging", "pillow",
"psutil", "pydantic", "rpyc", "torch", "uvicorn", "uvloop", "zmq", "vllm==0.5.0", "outlines>=0.0.44"]
"psutil", "pydantic", "rpyc", "torch", "uvicorn", "uvloop", "zmq", "vllm==0.5.1", "outlines>=0.0.44"]
openai = ["openai>=1.0", "tiktoken"]
anthropic = ["anthropic>=0.20.0"]
litellm = ["litellm>=1.0.0"]
......
......@@ -326,7 +326,7 @@ class ModelRunner:
device_config=device_config,
load_config=load_config,
lora_config=None,
vision_language_config=None,
multimodal_config=None,
parallel_config=None,
scheduler_config=None,
cache_config=None,
......
......@@ -476,7 +476,7 @@ def monkey_patch_vllm_dummy_weight_loader():
ModelConfig,
ParallelConfig,
SchedulerConfig,
VisionLanguageConfig,
MultiModalConfig,
_initialize_model,
initialize_dummy_weights,
nn,
......@@ -489,7 +489,7 @@ def monkey_patch_vllm_dummy_weight_loader():
model_config: ModelConfig,
device_config: DeviceConfig,
lora_config: Optional[LoRAConfig],
vision_language_config: Optional[VisionLanguageConfig],
multimodal_config: Optional[MultiModalConfig],
parallel_config: ParallelConfig,
scheduler_config: SchedulerConfig,
cache_config: CacheConfig,
......@@ -500,7 +500,7 @@ def monkey_patch_vllm_dummy_weight_loader():
model_config,
self.load_config,
lora_config,
vision_language_config,
multimodal_config,
cache_config,
)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment