Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
TransformerEngine
Commits
d8992315
Commit
d8992315
authored
Apr 09, 2025
by
yuguo
Browse files
[DCU] fix
parent
a207db1d
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
6 additions
and
2 deletions
+6
-2
build_tools/pytorch.py
build_tools/pytorch.py
+1
-0
qa/L1_pytorch_distributed_unittest/test.sh
qa/L1_pytorch_distributed_unittest/test.sh
+1
-1
tests/pytorch/distributed/test_comm_gemm_overlap.py
tests/pytorch/distributed/test_comm_gemm_overlap.py
+4
-1
No files found.
build_tools/pytorch.py
View file @
d8992315
...
@@ -59,6 +59,7 @@ def setup_pytorch_extension(
...
@@ -59,6 +59,7 @@ def setup_pytorch_extension(
"-U__HIP_NO_BFLOAT162_OPERATORS__"
,
"-U__HIP_NO_BFLOAT162_OPERATORS__"
,
"-U__HIP_NO_BFLOAT162_CONVERSIONS__"
,
"-U__HIP_NO_BFLOAT162_CONVERSIONS__"
,
"-w"
,
"-w"
,
"-DUSE_ROCM"
,
]
]
else
:
else
:
nvcc_flags
=
[
nvcc_flags
=
[
...
...
qa/L1_pytorch_distributed_unittest/test.sh
View file @
d8992315
...
@@ -23,7 +23,7 @@ pip3 install pytest==8.2.1 || error_exit "Failed to install pytest"
...
@@ -23,7 +23,7 @@ pip3 install pytest==8.2.1 || error_exit "Failed to install pytest"
python3
-m
pytest
-v
-s
$TE_PATH
/tests/pytorch/distributed/test_numerics.py
||
test_fail
"test_numerics.py"
python3
-m
pytest
-v
-s
$TE_PATH
/tests/pytorch/distributed/test_numerics.py
||
test_fail
"test_numerics.py"
python3
-m
pytest
-v
-s
$TE_PATH
/tests/pytorch/distributed/test_fusible_ops.py
||
test_fail
"test_fusible_ops.py"
python3
-m
pytest
-v
-s
$TE_PATH
/tests/pytorch/distributed/test_fusible_ops.py
||
test_fail
"test_fusible_ops.py"
python3
-m
pytest
-v
-s
$TE_PATH
/tests/pytorch/distributed/test_torch_fsdp2.py
||
test_fail
"test_torch_fsdp2.py"
python3
-m
pytest
-v
-s
$TE_PATH
/tests/pytorch/distributed/test_torch_fsdp2.py
||
test_fail
"test_torch_fsdp2.py"
python3
-m
pytest
-v
-s
$TE_PATH
/tests/pytorch/distributed/test_comm_gemm_overlap.py
||
test_fail
"test_comm_gemm_overlap.py"
python3
-m
pytest
-v
-s
--log-cli-level
=
INFO
$TE_PATH
/tests/pytorch/distributed/test_comm_gemm_overlap.py
||
test_fail
"test_comm_gemm_overlap.py"
# python3 -m pytest -v -s $TE_PATH/tests/pytorch/distributed/test_fusible_ops_with_userbuffers.py || test_fail "test_fusible_ops_with_userbuffers.py" ### TODO Debug UB support with te.Sequential
# python3 -m pytest -v -s $TE_PATH/tests/pytorch/distributed/test_fusible_ops_with_userbuffers.py || test_fail "test_fusible_ops_with_userbuffers.py" ### TODO Debug UB support with te.Sequential
python3
-m
pytest
-v
-s
$TE_PATH
/tests/pytorch/fused_attn/test_fused_attn_with_cp.py
||
test_fail
"test_fused_attn_with_cp.py"
python3
-m
pytest
-v
-s
$TE_PATH
/tests/pytorch/fused_attn/test_fused_attn_with_cp.py
||
test_fail
"test_fused_attn_with_cp.py"
python3
-m
pytest
-v
-s
$TE_PATH
/tests/pytorch/distributed/test_cast_master_weights_to_fp8.py
||
test_fail
"test_cast_master_weights_to_fp8.py"
python3
-m
pytest
-v
-s
$TE_PATH
/tests/pytorch/distributed/test_cast_master_weights_to_fp8.py
||
test_fail
"test_cast_master_weights_to_fp8.py"
...
...
tests/pytorch/distributed/test_comm_gemm_overlap.py
View file @
d8992315
# Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
#
# See LICENSE for license information.
# See LICENSE for license information.
# mpirun -np 4 --allow-run-as-root --oversubscribe --quiet python3 /home/TransformerEngine/tests/pytorch/distributed/run_gemm_with_overlap.py --check-numerics --seed=42 --seq-length=1024 --batch-size=2 --num-heads=16 --head-dim=48 --comm-type=AG --p2p
import
os
import
os
import
subprocess
import
subprocess
from
pathlib
import
Path
from
pathlib
import
Path
...
@@ -10,6 +11,7 @@ import torch
...
@@ -10,6 +11,7 @@ import torch
import
transformer_engine.pytorch
as
te
import
transformer_engine.pytorch
as
te
import
transformer_engine.pytorch.cpp_extensions
as
tex
import
transformer_engine.pytorch.cpp_extensions
as
tex
from
transformer_engine.pytorch.fp8
import
FP8GlobalStateManager
from
transformer_engine.pytorch.fp8
import
FP8GlobalStateManager
import
logging
if
torch
.
cuda
.
device_count
()
<
2
:
if
torch
.
cuda
.
device_count
()
<
2
:
pytest
.
skip
(
"Comm+GEMM overlap requires at least 2 GPUs."
)
pytest
.
skip
(
"Comm+GEMM overlap requires at least 2 GPUs."
)
...
@@ -37,7 +39,7 @@ TEST_ROOT = Path(__file__).parent.resolve()
...
@@ -37,7 +39,7 @@ TEST_ROOT = Path(__file__).parent.resolve()
NUM_PROCS
:
int
=
min
(
torch
.
cuda
.
device_count
(),
MAX_GPUS_TO_USE
)
NUM_PROCS
:
int
=
min
(
torch
.
cuda
.
device_count
(),
MAX_GPUS_TO_USE
)
LAUNCH_CMD
=
[
"torchrun"
,
f
"--nproc_per_node=
{
NUM_PROCS
}
"
]
LAUNCH_CMD
=
[
"torchrun"
,
f
"--nproc_per_node=
{
NUM_PROCS
}
"
]
if
tex
.
ubuf_built_with_mpi
():
if
tex
.
ubuf_built_with_mpi
():
LAUNCH_CMD
=
[
"mpirun"
,
"-np"
,
str
(
NUM_PROCS
),
"--oversubscribe"
,
"--quiet"
,
"python3"
]
LAUNCH_CMD
=
[
"mpirun"
,
"-np"
,
str
(
NUM_PROCS
),
"--allow-run-as-root"
,
"--oversubscribe"
,
"--quiet"
,
"python3"
]
# Fall back on CUDA IPC if the platform does not support CUDA multicast
# Fall back on CUDA IPC if the platform does not support CUDA multicast
if
not
tex
.
device_supports_multicast
():
if
not
tex
.
device_supports_multicast
():
...
@@ -77,6 +79,7 @@ def _run_gemm_with_overlap(comm_type, bulk, p2p, atomic, fp8):
...
@@ -77,6 +79,7 @@ def _run_gemm_with_overlap(comm_type, bulk, p2p, atomic, fp8):
pytest
.
skip
(
"Atomic GEMM is requires device compute capability 9.x (Hopper)."
)
pytest
.
skip
(
"Atomic GEMM is requires device compute capability 9.x (Hopper)."
)
test_cmd
.
append
(
"--atomic"
)
test_cmd
.
append
(
"--atomic"
)
logging
.
info
(
f
"test_cmd:
{
test_cmd
}
"
)
result
=
subprocess
.
run
(
test_cmd
,
env
=
os
.
environ
,
capture_output
=
True
,
check
=
False
)
result
=
subprocess
.
run
(
test_cmd
,
env
=
os
.
environ
,
capture_output
=
True
,
check
=
False
)
if
(
if
(
result
.
returncode
!=
0
result
.
returncode
!=
0
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment