Temporay work-around for rocm 7.0.0 alpha with enabling data-parallel issue (#10434)

Co-authored-by: wunhuang <wunhuang@amd.com> Co-authored-by: Sai Enduri <saimanas.enduri@amd.com>

Temporay work-around for rocm 7.0.0 alpha with enabling data-parallel issue (#10434)
Co-authored-by: wunhuang <wunhuang@amd.com> Co-authored-by: Sai Enduri <saimanas.enduri@amd.com>
ec272dda · kk · GitHub · a220c14f · ec272dda
Unverified Commit ec272dda authored Sep 16, 2025 by kk Committed by GitHub Sep 15, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 10 additions and 1 deletion

python/sglang/srt/layers/dp_attention.py python/sglang/srt/layers/dp_attention.py +10 -1

No files found.
--- a/python/sglang/srt/layers/dp_attention.py
+++ b/python/sglang/srt/layers/dp_attention.py
@@ -17,6 +17,7 @@ from sglang.srt.distributed import (
    get_tp_group,
    tensor_model_parallel_all_reduce,
 )
+from sglang.srt.utils import get_bool_env_var, is_hip
 if TYPE_CHECKING:
    from sglang.srt.configs.model_config import ModelConfig
@@ -36,6 +37,9 @@ _LOCAL_ATTN_DP_SIZE: Optional[int] = None
 _LOCAL_ATTN_DP_RANK: Optional[int] = None
 _ENABLE_DP_ATTENTION_FLAG: bool = False
+_is_hip = is_hip()
+_USE_ROCM700A_WA = _is_hip and get_bool_env_var("SGLANG_USE_ROCM700A")
 class DpPaddingMode(IntEnum):
@@ -67,7 +71,12 @@ class DpPaddingMode(IntEnum):
    @classmethod
    def get_default_mode_in_cuda_graph(cls) -> DpPaddingMode:
-        return cls.MAX_LEN
+        # TODO(kkhuang-amd): noqa, temporary work-around for rocm 7.0.0 alpha
+        # it can be safely removed later, once RCCL fixed
+        if _USE_ROCM700A_WA:
+            return cls.SUM_LEN
+        else:
+            return cls.MAX_LEN
 class _DpGatheredBufferWrapper: