Commit bfd4074f authored by yuguo's avatar yuguo
Browse files

Merge branch 'develop_v2.4' into 'main'

[DCU] avoid rtc trans kernel bug (need fix)

See merge request dcutoolkit/deeplearing/TransformerEngine!26
parents 84e198a3 fdb21575
...@@ -79,11 +79,11 @@ def setup_pytorch_extension( ...@@ -79,11 +79,11 @@ def setup_pytorch_extension(
] ]
) )
if bool(int(os.getenv("NVTE_BUILD_SUPPRESS_RETURN_TYPE_WARNING", "0"))): if bool(int(os.getenv("NVTE_BUILD_SUPPRESS_RETURN_TYPE_WARNING", "1"))):
nvcc_flags.append("-Wno-return-type") nvcc_flags.append("-Wno-return-type")
cxx_flags.append("-Wno-return-type") cxx_flags.append("-Wno-return-type")
if bool(int(os.getenv("NVTE_BUILD_SUPPRESS_SIGN_COMPARE", "0"))): if bool(int(os.getenv("NVTE_BUILD_SUPPRESS_SIGN_COMPARE", "1"))):
nvcc_flags.append("-Wno-sign-compare") nvcc_flags.append("-Wno-sign-compare")
cxx_flags.append("-Wno-sign-compare") cxx_flags.append("-Wno-sign-compare")
......
...@@ -9,7 +9,7 @@ from pathlib import Path ...@@ -9,7 +9,7 @@ from pathlib import Path
import pytest import pytest
import torch import torch
from transformer_engine.pytorch.fp8 import FP8GlobalStateManager from transformer_engine.pytorch.fp8 import FP8GlobalStateManager
# NVTE_INT8_SIM_FP8=1 torchrun --nproc_per_node=4 run_cast_master_weights_to_fp8.py --quantization fp8_block # NVTE_DISABLE_NVRTC=1 NVTE_INT8_SIM_FP8=1 torchrun --nproc_per_node=4 run_cast_master_weights_to_fp8.py --quantization fp8_block
if torch.cuda.device_count() < 2: if torch.cuda.device_count() < 2:
pytest.skip("cast_master_weights_to_fp8 test needs at least 2 GPUs.") pytest.skip("cast_master_weights_to_fp8 test needs at least 2 GPUs.")
......
...@@ -437,7 +437,7 @@ def _cast_master_weights_to_fp8_blockwise_scaling( ...@@ -437,7 +437,7 @@ def _cast_master_weights_to_fp8_blockwise_scaling(
# We cannot create columnwise data here because users (like megatron) may want to overlap # We cannot create columnwise data here because users (like megatron) may want to overlap
# the all-gather of model weights and forward process, so the model weight is not updated # the all-gather of model weights and forward process, so the model weight is not updated
# at this moment. # at this moment.
model_weight.update_usage(rowwise_usage=True, columnwise_usage=False) # May cause core dump in iter 2 model_weight.update_usage(rowwise_usage=True, columnwise_usage=False)
# If master weight is None, it means that the master weight of the current model weight # If master weight is None, it means that the master weight of the current model weight
# is in other DP ranks. # is in other DP ranks.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment