Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
zhaoyu6
sglang
Commits
04ec6ba2
"docs/vscode:/vscode.git/clone" did not exist on "0521a0c2fa6e64b63d46e1840f9fdbaec989704e"
Unverified
Commit
04ec6ba2
authored
Jul 25, 2024
by
Liangsheng Yin
Committed by
GitHub
Jul 25, 2024
Browse files
Fix dockerfile and triton cache manager (#720)
parent
d63f13c1
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
50 additions
and
9 deletions
+50
-9
docker/Dockerfile
docker/Dockerfile
+0
-8
python/sglang/srt/server.py
python/sglang/srt/server.py
+6
-0
python/sglang/srt/utils.py
python/sglang/srt/utils.py
+44
-1
No files found.
docker/Dockerfile
View file @
04ec6ba2
...
...
@@ -23,18 +23,10 @@ RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
RUN
apt-get update
-y
\
&&
apt-get
install
-y
python3-pip git curl
sudo
# Workaround for https://github.com/openai/triton/issues/2507 and
# https://github.com/pytorch/pytorch/issues/107960 -- hopefully
# this won't be needed for future versions of this docker image
# or future versions of triton.
RUN
ldconfig /usr/local/cuda-
$(
echo
$CUDA_VERSION
|
cut
-d
.
-f1
,2
)
/compat/
WORKDIR
/sgl-workspace
RUN
pip3
--no-cache-dir
install
--upgrade
pip
\
&&
pip3
--no-cache-dir
install
"sglang[all]"
\
&&
pip3
--no-cache-dir
uninstall
-y
triton triton-nightly
\
&&
pip3
--no-cache-dir
install
--no-deps
--index-url
https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/Triton-Nightly/pypi/simple/ triton-nightly
\
&&
pip3
--no-cache-dir
install
flashinfer
-i
https://flashinfer.ai/whl/cu121/torch2.3/
ENV
DEBIAN_FRONTEND=interactive
python/sglang/srt/server.py
View file @
04ec6ba2
...
...
@@ -52,6 +52,7 @@ from sglang.srt.utils import (
allocate_init_ports
,
assert_pkg_version
,
enable_show_time_cost
,
maybe_set_triton_cache_manager
,
set_ulimit
,
)
from
sglang.utils
import
get_exception_traceback
...
...
@@ -201,6 +202,11 @@ def launch_server(
"reinstall the latest version by following the instructions "
"at https://docs.flashinfer.ai/installation.html."
,
)
if
server_args
.
tp_size
//
server_args
.
dp_size
>
1
:
# FIXME: remove this after https://github.com/triton-lang/triton/pull/4295 is used as a dependency.
maybe_set_triton_cache_manager
()
if
server_args
.
chat_template
:
# TODO: replace this with huggingface transformers template
load_chat_template_for_openai_api
(
server_args
.
chat_template
)
...
...
python/sglang/srt/utils.py
View file @
04ec6ba2
...
...
@@ -18,10 +18,15 @@ import psutil
import
requests
import
torch
import
torch.distributed
as
dist
import
triton
from
fastapi.responses
import
JSONResponse
from
packaging
import
version
as
pkg_version
from
starlette.middleware.base
import
BaseHTTPMiddleware
from
triton.runtime.cache
import
(
FileCacheManager
,
default_cache_dir
,
default_dump_dir
,
default_override_dir
,
)
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -460,6 +465,44 @@ def monkey_patch_vllm_all_gather(reverse: bool = False):
setattr
(
GroupCoordinator
,
"all_gather"
,
all_gather
)
def
maybe_set_triton_cache_manager
()
->
None
:
"""Set environment variable to tell Triton to use a
custom cache manager"""
cache_manger
=
os
.
environ
.
get
(
"TRITON_CACHE_MANAGER"
,
None
)
if
cache_manger
is
None
:
manager
=
"sglang.srt.utils:CustomCacheManager"
logger
.
info
(
"Setting Triton cache manager to: %s"
,
manager
)
os
.
environ
[
"TRITON_CACHE_MANAGER"
]
=
manager
class
CustomCacheManager
(
FileCacheManager
):
# Adapted from: https://github.com/tdoublep/vllm/blob/3307522289fdfefe323b6c00d0db696651989a2f/vllm/triton_utils/custom_cache_manager.py
def
__init__
(
self
,
key
,
override
=
False
,
dump
=
False
):
self
.
key
=
key
self
.
lock_path
=
None
if
dump
:
self
.
cache_dir
=
default_dump_dir
()
self
.
cache_dir
=
os
.
path
.
join
(
self
.
cache_dir
,
self
.
key
)
self
.
lock_path
=
os
.
path
.
join
(
self
.
cache_dir
,
"lock"
)
os
.
makedirs
(
self
.
cache_dir
,
exist_ok
=
True
)
elif
override
:
self
.
cache_dir
=
default_override_dir
()
self
.
cache_dir
=
os
.
path
.
join
(
self
.
cache_dir
,
self
.
key
)
else
:
# create cache directory if it doesn't exist
self
.
cache_dir
=
(
os
.
getenv
(
"TRITON_CACHE_DIR"
,
""
).
strip
()
or
default_cache_dir
()
)
if
self
.
cache_dir
:
self
.
cache_dir
=
f
"
{
self
.
cache_dir
}
_
{
os
.
getpid
()
}
"
self
.
cache_dir
=
os
.
path
.
join
(
self
.
cache_dir
,
self
.
key
)
self
.
lock_path
=
os
.
path
.
join
(
self
.
cache_dir
,
"lock"
)
os
.
makedirs
(
self
.
cache_dir
,
exist_ok
=
True
)
else
:
raise
RuntimeError
(
"Could not create or locate cache dir"
)
API_KEY_HEADER_NAME
=
"X-API-Key"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment