Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
af9a7ec2
Unverified
Commit
af9a7ec2
authored
Jan 05, 2026
by
Wentao Ye
Committed by
GitHub
Jan 05, 2026
Browse files
[Bug] Revert torch warning fix (#31585)
Signed-off-by:
yewentao256
<
zhyanwentao@126.com
>
parent
276e03b9
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
6 additions
and
8 deletions
+6
-8
tests/v1/e2e/test_async_scheduling.py
tests/v1/e2e/test_async_scheduling.py
+1
-1
vllm/envs.py
vllm/envs.py
+4
-6
vllm/v1/worker/gpu_worker.py
vllm/v1/worker/gpu_worker.py
+1
-1
No files found.
tests/v1/e2e/test_async_scheduling.py
View file @
af9a7ec2
...
...
@@ -154,7 +154,7 @@ def run_tests(
with
monkeypatch
.
context
()
as
m
:
# lock matmul precision to full FP32 (IEEE)
m
.
setenv
(
"VLLM_FLOAT32_MATMUL_PRECISION"
,
"
ieee
"
)
m
.
setenv
(
"VLLM_FLOAT32_MATMUL_PRECISION"
,
"
highest
"
)
# m.setenv("VLLM_BATCH_INVARIANT", "1")
outputs
:
list
[
tuple
[
str
,
list
,
list
]]
=
[]
for
n
,
(
...
...
vllm/envs.py
View file @
af9a7ec2
...
...
@@ -75,7 +75,7 @@ if TYPE_CHECKING:
VLLM_MEDIA_CONNECTOR
:
str
=
"http"
VLLM_TARGET_DEVICE
:
str
=
"cuda"
VLLM_MAIN_CUDA_VERSION
:
str
=
"12.9"
VLLM_FLOAT32_MATMUL_PRECISION
:
Literal
[
"
ieee"
,
"tf32"
]
=
"ieee
"
VLLM_FLOAT32_MATMUL_PRECISION
:
Literal
[
"
highest"
,
"high"
,
"medium"
]
=
"highest
"
MAX_JOBS
:
str
|
None
=
None
NVCC_THREADS
:
str
|
None
=
None
VLLM_USE_PRECOMPILED
:
bool
=
False
...
...
@@ -459,13 +459,11 @@ environment_variables: dict[str, Callable[[], Any]] = {
"VLLM_MAIN_CUDA_VERSION"
:
lambda
:
os
.
getenv
(
"VLLM_MAIN_CUDA_VERSION"
,
""
).
lower
()
or
"12.9"
,
# Controls PyTorch float32 matmul precision mode within vLLM workers.
# Accepted values:
# - "ieee" (default): force full IEEE FP32 matmul precision.
# - "tf32": enable TensorFloat32-based fast matmul.
# Valid options mirror torch.set_float32_matmul_precision
"VLLM_FLOAT32_MATMUL_PRECISION"
:
env_with_choices
(
"VLLM_FLOAT32_MATMUL_PRECISION"
,
"
ieee
"
,
[
"
ieee"
,
"tf32
"
],
"
highest
"
,
[
"
highest"
,
"high"
,
"medium
"
],
case_sensitive
=
False
,
),
# Maximum number of compilation jobs to run in parallel.
...
...
vllm/v1/worker/gpu_worker.py
View file @
af9a7ec2
...
...
@@ -84,7 +84,7 @@ class Worker(WorkerBase):
# configure float32 matmul precision according to vLLM env.
precision
=
envs
.
VLLM_FLOAT32_MATMUL_PRECISION
torch
.
backends
.
cuda
.
matmul
.
fp32
_precision
=
precision
torch
.
set_float32_
matmul_precision
(
precision
)
if
self
.
model_config
.
trust_remote_code
:
# note: lazy import to avoid importing torch before initializing
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment