Unverified Commit 59d0bf01 authored by fzyzcjy's avatar fzyzcjy Committed by GitHub
Browse files

Tiny add warnings for DeepEP when it is suboptimal (#8426)

parent 7df2c0c2
...@@ -157,6 +157,20 @@ class DeepEPBuffer: ...@@ -157,6 +157,20 @@ class DeepEPBuffer:
else: else:
raise NotImplementedError raise NotImplementedError
total_num_sms = torch.cuda.get_device_properties(
device="cuda"
).multi_processor_count
if (
(deepep_mode != DeepEPMode.low_latency)
and not global_server_args_dict["enable_two_batch_overlap"]
and (DeepEPConfig.get_instance().num_sms < total_num_sms // 2)
):
logger.warning(
f"Only use {DeepEPConfig.get_instance().num_sms} SMs for DeepEP communication. "
f"This may result in highly suboptimal performance. "
f"Consider using --deepep-config to change the behavior."
)
cls._buffer = Buffer( cls._buffer = Buffer(
group, group,
num_nvl_bytes, num_nvl_bytes,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment