"examples/distributed/graphsage/node_classification.py" did not exist on "37be02a486eec1448fdf43fceb9ff50be04a9cd2"
Commit 63fbef98 authored by Lianmin Zheng's avatar Lianmin Zheng
Browse files

fix flashinfer & http log level

parent 2a754e57
# PyPI Package Release Process
## Update the version in code
Update the package version in `python/pyproject.toml` and `python/sglang/__init__.py`.
## Upload the PyPI package
``` ```
pip install build twine pip install build twine
``` ```
...@@ -5,4 +12,7 @@ pip install build twine ...@@ -5,4 +12,7 @@ pip install build twine
``` ```
cd python cd python
bash upload_pypi.sh bash upload_pypi.sh
``` ```
\ No newline at end of file
## Make a release in GitHub
Make a new release https://github.com/sgl-project/sglang/releases/new.
...@@ -152,7 +152,9 @@ def launch_server(server_args: ServerArgs, pipe_finish_writer, model_overide_arg ...@@ -152,7 +152,9 @@ def launch_server(server_args: ServerArgs, pipe_finish_writer, model_overide_arg
if server_args.disable_disk_cache: if server_args.disable_disk_cache:
disable_cache() disable_cache()
if not server_args.disable_flashinfer: if not server_args.disable_flashinfer:
assert_pkg_version("flashinfer", "0.0.8") assert_pkg_version("flashinfer", "0.0.8", "Please uninstall the old version and "
"reinstall the latest version by following the instructions "
"at https://docs.flashinfer.ai/installation.html.")
if server_args.chat_template: if server_args.chat_template:
# TODO: replace this with huggingface transformers template # TODO: replace this with huggingface transformers template
load_chat_template_for_openai_api(server_args.chat_template) load_chat_template_for_openai_api(server_args.chat_template)
...@@ -293,7 +295,7 @@ def launch_server(server_args: ServerArgs, pipe_finish_writer, model_overide_arg ...@@ -293,7 +295,7 @@ def launch_server(server_args: ServerArgs, pipe_finish_writer, model_overide_arg
app, app,
host=server_args.host, host=server_args.host,
port=server_args.port, port=server_args.port,
log_level=server_args.log_level_http or server_args.log_level_http, log_level=server_args.log_level_http or server_args.log_level,
timeout_keep_alive=5, timeout_keep_alive=5,
loop="uvloop", loop="uvloop",
) )
......
...@@ -50,11 +50,11 @@ class ServerArgs: ...@@ -50,11 +50,11 @@ class ServerArgs:
load_balance_method: str = "round_robin" load_balance_method: str = "round_robin"
# Optimization/debug options # Optimization/debug options
disable_flashinfer: bool = True disable_flashinfer: bool = False
attention_reduce_in_fp32: bool = False
disable_radix_cache: bool = False disable_radix_cache: bool = False
disable_regex_jump_forward: bool = False disable_regex_jump_forward: bool = False
disable_disk_cache: bool = False disable_disk_cache: bool = False
attention_reduce_in_fp32: bool = False
# Distributed args # Distributed args
nccl_init_addr: Optional[str] = None nccl_init_addr: Optional[str] = None
......
...@@ -426,17 +426,19 @@ def suppress_other_loggers(): ...@@ -426,17 +426,19 @@ def suppress_other_loggers():
logging.getLogger("vllm.utils").setLevel(logging.WARN) logging.getLogger("vllm.utils").setLevel(logging.WARN)
def assert_pkg_version(pkg: str, min_version: str): def assert_pkg_version(pkg: str, min_version: str, message: str):
try: try:
installed_version = version(pkg) installed_version = version(pkg)
if pkg_version.parse(installed_version) < pkg_version.parse(min_version): if pkg_version.parse(installed_version) < pkg_version.parse(min_version):
raise Exception( raise Exception(
f"{pkg} is installed with version {installed_version} which " f"{pkg} is installed with version {installed_version}, which "
f"is less than the minimum required version {min_version}" f"is less than the minimum required version {min_version}. " +
message
) )
except PackageNotFoundError: except PackageNotFoundError:
raise Exception( raise Exception(
f"{pkg} with minimum required version {min_version} is not installed" f"{pkg} with minimum required version {min_version} is not installed. " +
message
) )
...@@ -588,4 +590,4 @@ def receive_addrs(model_port_args, server_args): ...@@ -588,4 +590,4 @@ def receive_addrs(model_port_args, server_args):
print(f"Node 0 received from rank {src_rank}: {tensor.tolist()}") print(f"Node 0 received from rank {src_rank}: {tensor.tolist()}")
dist.barrier() dist.barrier()
dist.destroy_process_group() dist.destroy_process_group()
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment