Unverified Commit d93388da authored by zhyncs's avatar zhyncs Committed by GitHub
Browse files

feat: add check_env (#645)

parent 476584cb
...@@ -20,7 +20,7 @@ body: ...@@ -20,7 +20,7 @@ body:
attributes: attributes:
label: Reproduction label: Reproduction
description: | description: |
1. What command or script did you run? What command or script did you run?
placeholder: | placeholder: |
A placeholder for the command. A placeholder for the command.
validations: validations:
...@@ -29,7 +29,7 @@ body: ...@@ -29,7 +29,7 @@ body:
attributes: attributes:
label: Environment label: Environment
description: | description: |
Please provide necessary environment information here. Please provide necessary environment information here with `python3 -m sglang.check_env`.
placeholder: Environment here. placeholder: Environment here.
render: Shell render: Shell
validations: validations:
......
import importlib
import os
import subprocess
import sys
from collections import OrderedDict, defaultdict
import torch
# List of packages to check versions for
PACKAGE_LIST = [
"sglang",
"flashinfer",
"aiohttp",
"fastapi",
"hf_transfer",
"huggingface_hub",
"interegular",
"packaging",
"pillow",
"psutil",
"pydantic",
"rpyc",
"uvicorn",
"uvloop",
"zmq",
"vllm",
"outlines",
"openai",
"tiktoken",
"anthropic",
"litellm",
]
def get_package_versions(packages):
"""
Get versions of specified packages.
"""
versions = {}
for package in packages:
package_name = package.split("==")[0].split(">=")[0].split("<=")[0]
try:
module = importlib.import_module(package_name)
if hasattr(module, "__version__"):
versions[package_name] = module.__version__
except ModuleNotFoundError:
versions[package_name] = "Module Not Found"
return versions
def get_cuda_info():
"""
Get CUDA-related information if available.
"""
cuda_info = {"CUDA available": torch.cuda.is_available()}
if cuda_info["CUDA available"]:
cuda_info.update(_get_gpu_info())
cuda_info.update(_get_cuda_version_info())
return cuda_info
def _get_gpu_info():
"""
Get information about available GPUs.
"""
devices = defaultdict(list)
for k in range(torch.cuda.device_count()):
devices[torch.cuda.get_device_name(k)].append(str(k))
return {f"GPU {','.join(device_ids)}": name for name, device_ids in devices.items()}
def _get_cuda_version_info():
"""
Get CUDA version information.
"""
from torch.utils.cpp_extension import CUDA_HOME
cuda_info = {"CUDA_HOME": CUDA_HOME}
if CUDA_HOME and os.path.isdir(CUDA_HOME):
cuda_info.update(_get_nvcc_info())
cuda_info.update(_get_cuda_driver_version())
return cuda_info
def _get_nvcc_info():
"""
Get NVCC version information.
"""
from torch.utils.cpp_extension import CUDA_HOME
try:
nvcc = os.path.join(CUDA_HOME, "bin/nvcc")
nvcc_output = (
subprocess.check_output(f'"{nvcc}" -V', shell=True).decode("utf-8").strip()
)
return {
"NVCC": nvcc_output[
nvcc_output.rfind("Cuda compilation tools") : nvcc_output.rfind("Build")
].strip()
}
except subprocess.SubprocessError:
return {"NVCC": "Not Available"}
def _get_cuda_driver_version():
"""
Get CUDA driver version.
"""
try:
output = subprocess.check_output(
[
"nvidia-smi",
"--query-gpu=driver_version",
"--format=csv,noheader,nounits",
]
)
return {"CUDA Driver Version": output.decode().strip()}
except subprocess.SubprocessError:
return {"CUDA Driver Version": "Not Available"}
def get_gpu_topology():
"""
Get GPU topology information.
"""
try:
result = subprocess.run(
["nvidia-smi", "topo", "-m"],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
check=True,
)
return "\n" + result.stdout if result.returncode == 0 else None
except subprocess.SubprocessError:
return None
def check_env():
"""
Check and print environment information.
"""
env_info = OrderedDict()
env_info["Python"] = sys.version.replace("\n", "")
env_info.update(get_cuda_info())
env_info["PyTorch"] = torch.__version__
env_info.update(get_package_versions(PACKAGE_LIST))
gpu_topo = get_gpu_topology()
if gpu_topo:
env_info["NVIDIA Topology"] = gpu_topo
for k, v in env_info.items():
print(f"{k}: {v}")
if __name__ == "__main__":
check_env()
...@@ -327,8 +327,10 @@ class Batch: ...@@ -327,8 +327,10 @@ class Batch:
req_pool_indices = self.req_to_token_pool.alloc(bs) req_pool_indices = self.req_to_token_pool.alloc(bs)
if req_pool_indices is None: if req_pool_indices is None:
raise RuntimeError("Out of memory. " raise RuntimeError(
"Please set a smaller number for `--max-running-requests`.") "Out of memory. "
"Please set a smaller number for `--max-running-requests`."
)
req_pool_indices_cpu = req_pool_indices.cpu().numpy() req_pool_indices_cpu = req_pool_indices.cpu().numpy()
for i in range(bs): for i in range(bs):
......
...@@ -168,7 +168,10 @@ class ModelRunner: ...@@ -168,7 +168,10 @@ class ModelRunner:
) )
self.req_to_token_pool = ReqToTokenPool( self.req_to_token_pool = ReqToTokenPool(
max(int(self.max_total_num_tokens / self.model_config.context_len * 512), 2048), max(
int(self.max_total_num_tokens / self.model_config.context_len * 512),
2048,
),
self.model_config.context_len + 8, self.model_config.context_len + 8,
) )
self.token_to_kv_pool = TokenToKVPool( self.token_to_kv_pool = TokenToKVPool(
......
...@@ -44,7 +44,14 @@ class ReqToTokenPool: ...@@ -44,7 +44,14 @@ class ReqToTokenPool:
class TokenToKVPool: class TokenToKVPool:
"""A memory pool that maps a token to its kv cache locations""" """A memory pool that maps a token to its kv cache locations"""
def __init__(self, size: int, dtype: torch.dtype, head_num: int, head_dim: int, layer_num: int): def __init__(
self,
size: int,
dtype: torch.dtype,
head_num: int,
head_dim: int,
layer_num: int,
):
self.size = size self.size = size
# We also add one slot. This slot is used for writing dummy output from padded tokens. # We also add one slot. This slot is used for writing dummy output from padded tokens.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment