Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
d93388da
Unverified
Commit
d93388da
authored
Jul 18, 2024
by
zhyncs
Committed by
GitHub
Jul 17, 2024
Browse files
feat: add check_env (#645)
parent
476584cb
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
181 additions
and
6 deletions
+181
-6
.github/ISSUE_TEMPLATE/1-bug-report.yml
.github/ISSUE_TEMPLATE/1-bug-report.yml
+2
-2
python/sglang/check_env.py
python/sglang/check_env.py
+163
-0
python/sglang/srt/managers/controller/infer_batch.py
python/sglang/srt/managers/controller/infer_batch.py
+4
-2
python/sglang/srt/managers/controller/model_runner.py
python/sglang/srt/managers/controller/model_runner.py
+4
-1
python/sglang/srt/memory_pool.py
python/sglang/srt/memory_pool.py
+8
-1
No files found.
.github/ISSUE_TEMPLATE/1-bug-report.yml
View file @
d93388da
...
@@ -20,7 +20,7 @@ body:
...
@@ -20,7 +20,7 @@ body:
attributes
:
attributes
:
label
:
Reproduction
label
:
Reproduction
description
:
|
description
:
|
1.
What command or script did you run?
What command or script did you run?
placeholder
:
|
placeholder
:
|
A placeholder for the command.
A placeholder for the command.
validations
:
validations
:
...
@@ -29,7 +29,7 @@ body:
...
@@ -29,7 +29,7 @@ body:
attributes
:
attributes
:
label
:
Environment
label
:
Environment
description
:
|
description
:
|
Please provide necessary environment information here.
Please provide necessary environment information here
with `python3 -m sglang.check_env`
.
placeholder
:
Environment here.
placeholder
:
Environment here.
render
:
Shell
render
:
Shell
validations
:
validations
:
...
...
python/sglang/check_env.py
0 → 100644
View file @
d93388da
import
importlib
import
os
import
subprocess
import
sys
from
collections
import
OrderedDict
,
defaultdict
import
torch
# List of packages to check versions for
PACKAGE_LIST
=
[
"sglang"
,
"flashinfer"
,
"aiohttp"
,
"fastapi"
,
"hf_transfer"
,
"huggingface_hub"
,
"interegular"
,
"packaging"
,
"pillow"
,
"psutil"
,
"pydantic"
,
"rpyc"
,
"uvicorn"
,
"uvloop"
,
"zmq"
,
"vllm"
,
"outlines"
,
"openai"
,
"tiktoken"
,
"anthropic"
,
"litellm"
,
]
def
get_package_versions
(
packages
):
"""
Get versions of specified packages.
"""
versions
=
{}
for
package
in
packages
:
package_name
=
package
.
split
(
"=="
)[
0
].
split
(
">="
)[
0
].
split
(
"<="
)[
0
]
try
:
module
=
importlib
.
import_module
(
package_name
)
if
hasattr
(
module
,
"__version__"
):
versions
[
package_name
]
=
module
.
__version__
except
ModuleNotFoundError
:
versions
[
package_name
]
=
"Module Not Found"
return
versions
def
get_cuda_info
():
"""
Get CUDA-related information if available.
"""
cuda_info
=
{
"CUDA available"
:
torch
.
cuda
.
is_available
()}
if
cuda_info
[
"CUDA available"
]:
cuda_info
.
update
(
_get_gpu_info
())
cuda_info
.
update
(
_get_cuda_version_info
())
return
cuda_info
def
_get_gpu_info
():
"""
Get information about available GPUs.
"""
devices
=
defaultdict
(
list
)
for
k
in
range
(
torch
.
cuda
.
device_count
()):
devices
[
torch
.
cuda
.
get_device_name
(
k
)].
append
(
str
(
k
))
return
{
f
"GPU
{
','
.
join
(
device_ids
)
}
"
:
name
for
name
,
device_ids
in
devices
.
items
()}
def
_get_cuda_version_info
():
"""
Get CUDA version information.
"""
from
torch.utils.cpp_extension
import
CUDA_HOME
cuda_info
=
{
"CUDA_HOME"
:
CUDA_HOME
}
if
CUDA_HOME
and
os
.
path
.
isdir
(
CUDA_HOME
):
cuda_info
.
update
(
_get_nvcc_info
())
cuda_info
.
update
(
_get_cuda_driver_version
())
return
cuda_info
def
_get_nvcc_info
():
"""
Get NVCC version information.
"""
from
torch.utils.cpp_extension
import
CUDA_HOME
try
:
nvcc
=
os
.
path
.
join
(
CUDA_HOME
,
"bin/nvcc"
)
nvcc_output
=
(
subprocess
.
check_output
(
f
'"
{
nvcc
}
" -V'
,
shell
=
True
).
decode
(
"utf-8"
).
strip
()
)
return
{
"NVCC"
:
nvcc_output
[
nvcc_output
.
rfind
(
"Cuda compilation tools"
)
:
nvcc_output
.
rfind
(
"Build"
)
].
strip
()
}
except
subprocess
.
SubprocessError
:
return
{
"NVCC"
:
"Not Available"
}
def
_get_cuda_driver_version
():
"""
Get CUDA driver version.
"""
try
:
output
=
subprocess
.
check_output
(
[
"nvidia-smi"
,
"--query-gpu=driver_version"
,
"--format=csv,noheader,nounits"
,
]
)
return
{
"CUDA Driver Version"
:
output
.
decode
().
strip
()}
except
subprocess
.
SubprocessError
:
return
{
"CUDA Driver Version"
:
"Not Available"
}
def
get_gpu_topology
():
"""
Get GPU topology information.
"""
try
:
result
=
subprocess
.
run
(
[
"nvidia-smi"
,
"topo"
,
"-m"
],
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
,
text
=
True
,
check
=
True
,
)
return
"
\n
"
+
result
.
stdout
if
result
.
returncode
==
0
else
None
except
subprocess
.
SubprocessError
:
return
None
def
check_env
():
"""
Check and print environment information.
"""
env_info
=
OrderedDict
()
env_info
[
"Python"
]
=
sys
.
version
.
replace
(
"
\n
"
,
""
)
env_info
.
update
(
get_cuda_info
())
env_info
[
"PyTorch"
]
=
torch
.
__version__
env_info
.
update
(
get_package_versions
(
PACKAGE_LIST
))
gpu_topo
=
get_gpu_topology
()
if
gpu_topo
:
env_info
[
"NVIDIA Topology"
]
=
gpu_topo
for
k
,
v
in
env_info
.
items
():
print
(
f
"
{
k
}
:
{
v
}
"
)
if
__name__
==
"__main__"
:
check_env
()
python/sglang/srt/managers/controller/infer_batch.py
View file @
d93388da
...
@@ -327,8 +327,10 @@ class Batch:
...
@@ -327,8 +327,10 @@ class Batch:
req_pool_indices
=
self
.
req_to_token_pool
.
alloc
(
bs
)
req_pool_indices
=
self
.
req_to_token_pool
.
alloc
(
bs
)
if
req_pool_indices
is
None
:
if
req_pool_indices
is
None
:
raise
RuntimeError
(
"Out of memory. "
raise
RuntimeError
(
"Please set a smaller number for `--max-running-requests`."
)
"Out of memory. "
"Please set a smaller number for `--max-running-requests`."
)
req_pool_indices_cpu
=
req_pool_indices
.
cpu
().
numpy
()
req_pool_indices_cpu
=
req_pool_indices
.
cpu
().
numpy
()
for
i
in
range
(
bs
):
for
i
in
range
(
bs
):
...
...
python/sglang/srt/managers/controller/model_runner.py
View file @
d93388da
...
@@ -168,7 +168,10 @@ class ModelRunner:
...
@@ -168,7 +168,10 @@ class ModelRunner:
)
)
self
.
req_to_token_pool
=
ReqToTokenPool
(
self
.
req_to_token_pool
=
ReqToTokenPool
(
max
(
int
(
self
.
max_total_num_tokens
/
self
.
model_config
.
context_len
*
512
),
2048
),
max
(
int
(
self
.
max_total_num_tokens
/
self
.
model_config
.
context_len
*
512
),
2048
,
),
self
.
model_config
.
context_len
+
8
,
self
.
model_config
.
context_len
+
8
,
)
)
self
.
token_to_kv_pool
=
TokenToKVPool
(
self
.
token_to_kv_pool
=
TokenToKVPool
(
...
...
python/sglang/srt/memory_pool.py
View file @
d93388da
...
@@ -44,7 +44,14 @@ class ReqToTokenPool:
...
@@ -44,7 +44,14 @@ class ReqToTokenPool:
class
TokenToKVPool
:
class
TokenToKVPool
:
"""A memory pool that maps a token to its kv cache locations"""
"""A memory pool that maps a token to its kv cache locations"""
def
__init__
(
self
,
size
:
int
,
dtype
:
torch
.
dtype
,
head_num
:
int
,
head_dim
:
int
,
layer_num
:
int
):
def
__init__
(
self
,
size
:
int
,
dtype
:
torch
.
dtype
,
head_num
:
int
,
head_dim
:
int
,
layer_num
:
int
,
):
self
.
size
=
size
self
.
size
=
size
# We also add one slot. This slot is used for writing dummy output from padded tokens.
# We also add one slot. This slot is used for writing dummy output from padded tokens.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment