Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
374ee287
Unverified
Commit
374ee287
authored
Mar 19, 2025
by
Alessandro Sangiorgi
Committed by
GitHub
Mar 20, 2025
Browse files
[Frontend] Remove custom_cache_manager (#13791)
Signed-off-by:
fulvius31
<
asangior@redhat.com
>
parent
a4d83661
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
1 addition
and
71 deletions
+1
-71
vllm/executor/multiproc_worker_utils.py
vllm/executor/multiproc_worker_utils.py
+0
-8
vllm/triton_utils/__init__.py
vllm/triton_utils/__init__.py
+1
-8
vllm/triton_utils/custom_cache_manager.py
vllm/triton_utils/custom_cache_manager.py
+0
-55
No files found.
vllm/executor/multiproc_worker_utils.py
View file @
374ee287
...
@@ -16,12 +16,8 @@ import torch
...
@@ -16,12 +16,8 @@ import torch
from
vllm.config
import
VllmConfig
from
vllm.config
import
VllmConfig
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.triton_utils.importing
import
HAS_TRITON
from
vllm.utils
import
_check_multiproc_method
,
get_mp_context
,
run_method
from
vllm.utils
import
_check_multiproc_method
,
get_mp_context
,
run_method
if
HAS_TRITON
:
from
vllm.triton_utils
import
maybe_set_triton_cache_manager
logger
=
init_logger
(
__name__
)
logger
=
init_logger
(
__name__
)
T
=
TypeVar
(
'T'
)
T
=
TypeVar
(
'T'
)
...
@@ -314,7 +310,3 @@ def set_multiprocessing_worker_envs(parallel_config):
...
@@ -314,7 +310,3 @@ def set_multiprocessing_worker_envs(parallel_config):
current_parallelism
,
default_omp_num_threads
)
current_parallelism
,
default_omp_num_threads
)
os
.
environ
[
"OMP_NUM_THREADS"
]
=
str
(
default_omp_num_threads
)
os
.
environ
[
"OMP_NUM_THREADS"
]
=
str
(
default_omp_num_threads
)
torch
.
set_num_threads
(
default_omp_num_threads
)
torch
.
set_num_threads
(
default_omp_num_threads
)
# workaround for https://github.com/vllm-project/vllm/issues/6103
if
HAS_TRITON
and
parallel_config
.
world_size
>
1
:
maybe_set_triton_cache_manager
()
vllm/triton_utils/__init__.py
View file @
374ee287
...
@@ -3,10 +3,3 @@
...
@@ -3,10 +3,3 @@
from
vllm.triton_utils.importing
import
HAS_TRITON
from
vllm.triton_utils.importing
import
HAS_TRITON
__all__
=
[
"HAS_TRITON"
]
__all__
=
[
"HAS_TRITON"
]
if
HAS_TRITON
:
from
vllm.triton_utils.custom_cache_manager
import
(
maybe_set_triton_cache_manager
)
__all__
+=
[
"maybe_set_triton_cache_manager"
]
vllm/triton_utils/custom_cache_manager.py
deleted
100644 → 0
View file @
a4d83661
# SPDX-License-Identifier: Apache-2.0
import
os
from
triton.runtime.cache
import
(
FileCacheManager
,
default_cache_dir
,
default_dump_dir
,
default_override_dir
)
from
vllm.logger
import
init_logger
logger
=
init_logger
(
__name__
)
def
maybe_set_triton_cache_manager
()
->
None
:
"""Set environment variable to tell Triton to use a
custom cache manager"""
cache_manger
=
os
.
environ
.
get
(
"TRITON_CACHE_MANAGER"
,
None
)
if
cache_manger
is
None
:
manager
=
"vllm.triton_utils.custom_cache_manager:CustomCacheManager"
logger
.
info
(
"Setting Triton cache manager to: %s"
,
manager
)
os
.
environ
[
"TRITON_CACHE_MANAGER"
]
=
manager
class
CustomCacheManager
(
FileCacheManager
):
"""Re-implements Triton's cache manager, ensuring that a
unique cache directory is created for each process. This is
needed to avoid collisions when running with tp>1 and
using multi-processing as the distributed backend.
Note this issue was fixed by triton-lang/triton/pull/4295,
but the fix is not yet included in triton==v3.0.0. However,
it should be included in the subsequent version.
"""
def
__init__
(
self
,
key
,
override
=
False
,
dump
=
False
):
self
.
key
=
key
self
.
lock_path
=
None
if
dump
:
self
.
cache_dir
=
default_dump_dir
()
self
.
cache_dir
=
os
.
path
.
join
(
self
.
cache_dir
,
self
.
key
)
self
.
lock_path
=
os
.
path
.
join
(
self
.
cache_dir
,
"lock"
)
os
.
makedirs
(
self
.
cache_dir
,
exist_ok
=
True
)
elif
override
:
self
.
cache_dir
=
default_override_dir
()
self
.
cache_dir
=
os
.
path
.
join
(
self
.
cache_dir
,
self
.
key
)
else
:
# create cache directory if it doesn't exist
self
.
cache_dir
=
os
.
getenv
(
"TRITON_CACHE_DIR"
,
""
).
strip
()
or
default_cache_dir
()
if
self
.
cache_dir
:
self
.
cache_dir
=
f
"
{
self
.
cache_dir
}
_
{
os
.
getpid
()
}
"
self
.
cache_dir
=
os
.
path
.
join
(
self
.
cache_dir
,
self
.
key
)
self
.
lock_path
=
os
.
path
.
join
(
self
.
cache_dir
,
"lock"
)
os
.
makedirs
(
self
.
cache_dir
,
exist_ok
=
True
)
else
:
raise
RuntimeError
(
"Could not create or locate cache dir"
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment