Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
3ddcf460
Unverified
Commit
3ddcf460
authored
Nov 17, 2025
by
Wentao Ye
Committed by
GitHub
Nov 17, 2025
Browse files
[Refactor] Remove Unused Func in Batch Invariant (#28881)
Signed-off-by:
yewentao256
<
zhyanwentao@126.com
>
parent
d0a73620
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
0 additions
and
73 deletions
+0
-73
vllm/model_executor/layers/batch_invariant.py
vllm/model_executor/layers/batch_invariant.py
+0
-73
No files found.
vllm/model_executor/layers/batch_invariant.py
View file @
3ddcf460
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
contextlib
import
os
from
collections
import
namedtuple
from
collections.abc
import
Callable
from
functools
import
cache
from
typing
import
Any
...
...
@@ -725,10 +723,6 @@ _original_cublas_workspace_cfg = None
_original_cublaslt_workspace_size
=
None
def
is_batch_invariant_mode_enabled
():
return
_batch_invariant_MODE
def
enable_batch_invariant_mode
():
global
_batch_invariant_MODE
,
_batch_invariant_LIB
,
_original_torch_bmm
global
_original_fp16_reduction_precision
,
_original_bf16_reduction_precision
...
...
@@ -791,73 +785,6 @@ def enable_batch_invariant_mode():
torch
.
backends
.
cuda
.
preferred_blas_library
(
backend
=
"cublaslt"
)
def
disable_batch_invariant_mode
():
global
_batch_invariant_MODE
,
_batch_invariant_LIB
,
_original_torch_bmm
global
_original_fp16_reduction_precision
,
_original_bf16_reduction_precision
global
_original_cublas_workspace_cfg
,
_original_cublaslt_workspace_size
if
not
_batch_invariant_MODE
:
return
if
_batch_invariant_LIB
is
not
None
:
_batch_invariant_LIB
.
_destroy
()
if
_original_torch_bmm
is
not
None
:
torch
.
bmm
=
_original_torch_bmm
_original_torch_bmm
=
None
if
_original_bf16_reduction_precision
is
not
None
:
torch
.
backends
.
cuda
.
matmul
.
allow_bf16_reduced_precision_reduction
=
(
_original_bf16_reduction_precision
)
_original_bf16_reduction_precision
=
None
if
_original_fp16_reduction_precision
is
not
None
:
torch
.
backends
.
cuda
.
matmul
.
allow_fp16_reduced_precision_reduction
=
(
_original_fp16_reduction_precision
)
_original_fp16_reduction_precision
=
None
torch
.
backends
.
cuda
.
preferred_blas_library
(
backend
=
"default"
)
if
not
is_torch_equal_or_newer
(
"2.10.0.dev"
):
# Set cublas env vars to previous results. If previous results are None,
# that means the env vars were not set, so we should remove them.
if
_original_cublas_workspace_cfg
:
os
.
environ
[
"CUBLAS_WORKSPACE_CONFIG"
]
=
_original_cublas_workspace_cfg
elif
"CUBLAS_WORKSPACE_CONFIG"
in
os
.
environ
:
del
os
.
environ
[
"CUBLAS_WORKSPACE_CONFIG"
]
if
_original_cublaslt_workspace_size
:
os
.
environ
[
"CUBLASLT_WORKSPACE_SIZE"
]
=
_original_cublaslt_workspace_size
elif
"CUBLASLT_WORKSPACE_SIZE"
in
os
.
environ
:
del
os
.
environ
[
"CUBLASLT_WORKSPACE_SIZE"
]
_original_cublas_workspace_cfg
=
None
_original_cublaslt_workspace_size
=
None
_batch_invariant_MODE
=
False
_batch_invariant_LIB
=
None
@
contextlib
.
contextmanager
def
set_batch_invariant_mode
(
enabled
:
bool
=
True
):
global
_batch_invariant_MODE
,
_batch_invariant_LIB
old_data
=
(
_batch_invariant_MODE
,
_batch_invariant_LIB
)
if
enabled
:
enable_batch_invariant_mode
()
else
:
disable_batch_invariant_mode
()
yield
if
_batch_invariant_LIB
is
not
None
:
_batch_invariant_LIB
.
_destroy
()
_batch_invariant_MODE
,
_batch_invariant_LIB
=
old_data
AttentionBlockSize
=
namedtuple
(
"AttentionBlockSize"
,
[
"block_m"
,
"block_n"
])
def
get_batch_invariant_attention_block_size
()
->
AttentionBlockSize
:
return
AttentionBlockSize
(
block_m
=
16
,
block_n
=
16
)
@
cache
def
vllm_is_batch_invariant
():
env_key
=
"VLLM_BATCH_INVARIANT"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment