fix flashinfer & http log level

63fbef98 · Lianmin Zheng · 2a754e57 · 63fbef98 · 63fbef98 · 63fbef98
Commit 63fbef98 authored Jul 03, 2024 by Lianmin Zheng
4 changed files
--- a/docs/release_process.md
+++ b/docs/release_process.md
+# PyPI Package Release Process
+## Update the version in code
+Update the package version in `python/pyproject.toml` and `python/sglang/__init__.py`.
+## Upload the PyPI package
 ```
 pip install build twine
 ```
@@ -5,4 +12,7 @@ pip install build twine
 ```
 cd python
 bash upload_pypi.sh
 ```
\ No newline at end of file
+## Make a release in GitHub
+Make a new release https://github.com/sgl-project/sglang/releases/new.
--- a/python/sglang/srt/server.py
+++ b/python/sglang/srt/server.py
@@ -152,7 +152,9 @@ def launch_server(server_args: ServerArgs, pipe_finish_writer, model_overide_arg
    if server_args.disable_disk_cache:
        disable_cache()
    if not server_args.disable_flashinfer:
-        assert_pkg_version("flashinfer", "0.0.8")
+        assert_pkg_version("flashinfer", "0.0.8", "Please uninstall the old version and "
+                           "reinstall the latest version by following the instructions "
+                           "at https://docs.flashinfer.ai/installation.html.")
    if server_args.chat_template:
        # TODO: replace this with huggingface transformers template
        load_chat_template_for_openai_api(server_args.chat_template)
@@ -293,7 +295,7 @@ def launch_server(server_args: ServerArgs, pipe_finish_writer, model_overide_arg
            app,
            host=server_args.host,
            port=server_args.port,
-            log_level=server_args.log_level_http or server_args.log_level_http,
+            log_level=server_args.log_level_http or server_args.log_level,
            timeout_keep_alive=5,
            loop="uvloop",
        )

--- a/python/sglang/srt/server_args.py
+++ b/python/sglang/srt/server_args.py
@@ -50,11 +50,11 @@ class ServerArgs:
    load_balance_method: str = "round_robin"
    # Optimization/debug options
-    disable_flashinfer: bool = True
+    disable_flashinfer: bool = False
-    attention_reduce_in_fp32: bool = False
    disable_radix_cache: bool = False
    disable_regex_jump_forward: bool = False
    disable_disk_cache: bool = False
+    attention_reduce_in_fp32: bool = False
    # Distributed args
    nccl_init_addr: Optional[str] = None

--- a/python/sglang/srt/utils.py
+++ b/python/sglang/srt/utils.py
@@ -426,17 +426,19 @@ def suppress_other_loggers():
    logging.getLogger("vllm.utils").setLevel(logging.WARN)
-def assert_pkg_version(pkg: str, min_version: str):
+def assert_pkg_version(pkg: str, min_version: str, message: str):
    try:
        installed_version = version(pkg)
        if pkg_version.parse(installed_version) < pkg_version.parse(min_version):
            raise Exception(
-                f"{pkg} is installed with version {installed_version} which "
+                f"{pkg} is installed with version {installed_version}, which "
-                f"is less than the minimum required version {min_version}"
+                f"is less than the minimum required version {min_version}. " +
+                message
            )
    except PackageNotFoundError:
        raise Exception(
-            f"{pkg} with minimum required version {min_version} is not installed"
+            f"{pkg} with minimum required version {min_version} is not installed. " +
+            message
        )
@@ -588,4 +590,4 @@ def receive_addrs(model_port_args, server_args):
        print(f"Node 0 received from rank {src_rank}: {tensor.tolist()}")
    dist.barrier()
    dist.destroy_process_group() 
\ No newline at end of file