release v0.1.10

a49dc52b · Lianmin Zheng · 873d0e85 · a49dc52b · a49dc52b · a49dc52b
Commit a49dc52b authored Jan 30, 2024 by Lianmin Zheng
Hide whitespace changes
Inline Side-by-side

Showing with 3 additions and 2 deletions

README.md README.md +1 -0

python/pyproject.toml python/pyproject.toml +1 -1

python/sglang/__init__.py python/sglang/__init__.py +1 -1

No files found.
--- a/README.md
+++ b/README.md
@@ -351,6 +351,7 @@ python -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port
 ```
 python -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000 --mem-fraction-static 0.7
 ```
+- You can turn on [flashinfer](docs/flashinfer.md) to acclerate the inference by using highly optimized CUDA kernels.

 ### Supported Models
 - Llama

--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

 [project]
 name = "sglang"
-version = "0.1.9"
+version = "0.1.10"
 description = "A structured generation langauge for LLMs." 
 readme = "README.md"
 requires-python = ">=3.8"

--- a/python/sglang/__init__.py
+++ b/python/sglang/__init__.py
-__version__ = "0.1.9"
+__version__ = "0.1.10"

 from sglang.api import *
 from sglang.global_config import global_config