Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
TransformerEngine
Commits
fdb21575
Commit
fdb21575
authored
Jun 10, 2025
by
yuguo
Browse files
[DCU] avoid rtc trans kernel bug (need fix)
parent
7d2b9c77
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
4 additions
and
4 deletions
+4
-4
build_tools/pytorch.py
build_tools/pytorch.py
+2
-2
tests/pytorch/distributed/test_cast_master_weights_to_fp8.py
tests/pytorch/distributed/test_cast_master_weights_to_fp8.py
+1
-1
transformer_engine/pytorch/tensor/utils.py
transformer_engine/pytorch/tensor/utils.py
+1
-1
No files found.
build_tools/pytorch.py
View file @
fdb21575
...
@@ -79,11 +79,11 @@ def setup_pytorch_extension(
...
@@ -79,11 +79,11 @@ def setup_pytorch_extension(
]
]
)
)
if
bool
(
int
(
os
.
getenv
(
"NVTE_BUILD_SUPPRESS_RETURN_TYPE_WARNING"
,
"
0
"
))):
if
bool
(
int
(
os
.
getenv
(
"NVTE_BUILD_SUPPRESS_RETURN_TYPE_WARNING"
,
"
1
"
))):
nvcc_flags
.
append
(
"-Wno-return-type"
)
nvcc_flags
.
append
(
"-Wno-return-type"
)
cxx_flags
.
append
(
"-Wno-return-type"
)
cxx_flags
.
append
(
"-Wno-return-type"
)
if
bool
(
int
(
os
.
getenv
(
"NVTE_BUILD_SUPPRESS_SIGN_COMPARE"
,
"
0
"
))):
if
bool
(
int
(
os
.
getenv
(
"NVTE_BUILD_SUPPRESS_SIGN_COMPARE"
,
"
1
"
))):
nvcc_flags
.
append
(
"-Wno-sign-compare"
)
nvcc_flags
.
append
(
"-Wno-sign-compare"
)
cxx_flags
.
append
(
"-Wno-sign-compare"
)
cxx_flags
.
append
(
"-Wno-sign-compare"
)
...
...
tests/pytorch/distributed/test_cast_master_weights_to_fp8.py
View file @
fdb21575
...
@@ -9,7 +9,7 @@ from pathlib import Path
...
@@ -9,7 +9,7 @@ from pathlib import Path
import
pytest
import
pytest
import
torch
import
torch
from
transformer_engine.pytorch.fp8
import
FP8GlobalStateManager
from
transformer_engine.pytorch.fp8
import
FP8GlobalStateManager
# NVTE_INT8_SIM_FP8=1 torchrun --nproc_per_node=4 run_cast_master_weights_to_fp8.py --quantization fp8_block
#
NVTE_DISABLE_NVRTC=1
NVTE_INT8_SIM_FP8=1 torchrun --nproc_per_node=4 run_cast_master_weights_to_fp8.py --quantization fp8_block
if
torch
.
cuda
.
device_count
()
<
2
:
if
torch
.
cuda
.
device_count
()
<
2
:
pytest
.
skip
(
"cast_master_weights_to_fp8 test needs at least 2 GPUs."
)
pytest
.
skip
(
"cast_master_weights_to_fp8 test needs at least 2 GPUs."
)
...
...
transformer_engine/pytorch/tensor/utils.py
View file @
fdb21575
...
@@ -437,7 +437,7 @@ def _cast_master_weights_to_fp8_blockwise_scaling(
...
@@ -437,7 +437,7 @@ def _cast_master_weights_to_fp8_blockwise_scaling(
# We cannot create columnwise data here because users (like megatron) may want to overlap
# We cannot create columnwise data here because users (like megatron) may want to overlap
# the all-gather of model weights and forward process, so the model weight is not updated
# the all-gather of model weights and forward process, so the model weight is not updated
# at this moment.
# at this moment.
model_weight
.
update_usage
(
rowwise_usage
=
True
,
columnwise_usage
=
False
)
# May cause core dump in iter 2
model_weight
.
update_usage
(
rowwise_usage
=
True
,
columnwise_usage
=
False
)
# If master weight is None, it means that the master weight of the current model weight
# If master weight is None, it means that the master weight of the current model weight
# is in other DP ranks.
# is in other DP ranks.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment