Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
3ddcf460
Unverified
Commit
3ddcf460
authored
Nov 17, 2025
by
Wentao Ye
Committed by
GitHub
Nov 17, 2025
Browse files
[Refactor] Remove Unused Func in Batch Invariant (#28881)
Signed-off-by:
yewentao256
<
zhyanwentao@126.com
>
parent
d0a73620
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
0 additions
and
73 deletions
+0
-73
vllm/model_executor/layers/batch_invariant.py
vllm/model_executor/layers/batch_invariant.py
+0
-73
No files found.
vllm/model_executor/layers/batch_invariant.py
View file @
3ddcf460
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
contextlib
import
os
import
os
from
collections
import
namedtuple
from
collections.abc
import
Callable
from
collections.abc
import
Callable
from
functools
import
cache
from
functools
import
cache
from
typing
import
Any
from
typing
import
Any
...
@@ -725,10 +723,6 @@ _original_cublas_workspace_cfg = None
...
@@ -725,10 +723,6 @@ _original_cublas_workspace_cfg = None
_original_cublaslt_workspace_size
=
None
_original_cublaslt_workspace_size
=
None
def
is_batch_invariant_mode_enabled
():
return
_batch_invariant_MODE
def
enable_batch_invariant_mode
():
def
enable_batch_invariant_mode
():
global
_batch_invariant_MODE
,
_batch_invariant_LIB
,
_original_torch_bmm
global
_batch_invariant_MODE
,
_batch_invariant_LIB
,
_original_torch_bmm
global
_original_fp16_reduction_precision
,
_original_bf16_reduction_precision
global
_original_fp16_reduction_precision
,
_original_bf16_reduction_precision
...
@@ -791,73 +785,6 @@ def enable_batch_invariant_mode():
...
@@ -791,73 +785,6 @@ def enable_batch_invariant_mode():
torch
.
backends
.
cuda
.
preferred_blas_library
(
backend
=
"cublaslt"
)
torch
.
backends
.
cuda
.
preferred_blas_library
(
backend
=
"cublaslt"
)
def
disable_batch_invariant_mode
():
global
_batch_invariant_MODE
,
_batch_invariant_LIB
,
_original_torch_bmm
global
_original_fp16_reduction_precision
,
_original_bf16_reduction_precision
global
_original_cublas_workspace_cfg
,
_original_cublaslt_workspace_size
if
not
_batch_invariant_MODE
:
return
if
_batch_invariant_LIB
is
not
None
:
_batch_invariant_LIB
.
_destroy
()
if
_original_torch_bmm
is
not
None
:
torch
.
bmm
=
_original_torch_bmm
_original_torch_bmm
=
None
if
_original_bf16_reduction_precision
is
not
None
:
torch
.
backends
.
cuda
.
matmul
.
allow_bf16_reduced_precision_reduction
=
(
_original_bf16_reduction_precision
)
_original_bf16_reduction_precision
=
None
if
_original_fp16_reduction_precision
is
not
None
:
torch
.
backends
.
cuda
.
matmul
.
allow_fp16_reduced_precision_reduction
=
(
_original_fp16_reduction_precision
)
_original_fp16_reduction_precision
=
None
torch
.
backends
.
cuda
.
preferred_blas_library
(
backend
=
"default"
)
if
not
is_torch_equal_or_newer
(
"2.10.0.dev"
):
# Set cublas env vars to previous results. If previous results are None,
# that means the env vars were not set, so we should remove them.
if
_original_cublas_workspace_cfg
:
os
.
environ
[
"CUBLAS_WORKSPACE_CONFIG"
]
=
_original_cublas_workspace_cfg
elif
"CUBLAS_WORKSPACE_CONFIG"
in
os
.
environ
:
del
os
.
environ
[
"CUBLAS_WORKSPACE_CONFIG"
]
if
_original_cublaslt_workspace_size
:
os
.
environ
[
"CUBLASLT_WORKSPACE_SIZE"
]
=
_original_cublaslt_workspace_size
elif
"CUBLASLT_WORKSPACE_SIZE"
in
os
.
environ
:
del
os
.
environ
[
"CUBLASLT_WORKSPACE_SIZE"
]
_original_cublas_workspace_cfg
=
None
_original_cublaslt_workspace_size
=
None
_batch_invariant_MODE
=
False
_batch_invariant_LIB
=
None
@
contextlib
.
contextmanager
def
set_batch_invariant_mode
(
enabled
:
bool
=
True
):
global
_batch_invariant_MODE
,
_batch_invariant_LIB
old_data
=
(
_batch_invariant_MODE
,
_batch_invariant_LIB
)
if
enabled
:
enable_batch_invariant_mode
()
else
:
disable_batch_invariant_mode
()
yield
if
_batch_invariant_LIB
is
not
None
:
_batch_invariant_LIB
.
_destroy
()
_batch_invariant_MODE
,
_batch_invariant_LIB
=
old_data
AttentionBlockSize
=
namedtuple
(
"AttentionBlockSize"
,
[
"block_m"
,
"block_n"
])
def
get_batch_invariant_attention_block_size
()
->
AttentionBlockSize
:
return
AttentionBlockSize
(
block_m
=
16
,
block_n
=
16
)
@
cache
@
cache
def
vllm_is_batch_invariant
():
def
vllm_is_batch_invariant
():
env_key
=
"VLLM_BATCH_INVARIANT"
env_key
=
"VLLM_BATCH_INVARIANT"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment