Unverified Commit 7e5071c9 authored by fzyzcjy's avatar fzyzcjy Committed by GitHub
Browse files

Super tiny enable sole usage of expert distribution metrics and update doc (#6680)

parent 78689d33
......@@ -27,7 +27,8 @@ class EPLBManager:
<= self._server_args.expert_distribution_recorder_buffer_size
), "eplb_rebalance_num_iterations must be less than expert_distribution_recorder_buffer_size"
get_global_expert_distribution_recorder().start_record()
if not get_global_expert_distribution_recorder().recording:
get_global_expert_distribution_recorder().start_record()
logger.info(
f"[EPLBManager] system started, will rebalance per {self._server_args.eplb_rebalance_num_iterations} iterations."
......
......@@ -91,6 +91,10 @@ class ExpertDistributionRecorder(ABC):
def dump_record(self, output_mode: _OutputMode = "file"):
self._on_not_implemented()
@property
def recording(self):
return False
def _on_not_implemented(self):
raise Exception(
"Please set ServerArgs.expert_distribution_recorder_mode to use ExpertDistributionRecorder."
......@@ -123,6 +127,12 @@ class _ExpertDistributionRecorderReal(ExpertDistributionRecorder):
for k in self._accumulator.get_single_pass_gatherer_keys()
}
if server_args.enable_expert_distribution_metrics:
logger.info(
"ExpertDistributionRecorder auto start record since enable_expert_distribution_metrics"
)
self.start_record()
def with_current_layer(self, layer_idx):
return self._current_layer_idx.with_value(layer_idx)
......@@ -221,6 +231,10 @@ class _ExpertDistributionRecorderReal(ExpertDistributionRecorder):
self._reset()
return output
@property
def recording(self):
return self._recording
_global_expert_distribution_recorder: Optional[ExpertDistributionRecorder] = (
_ExpertDistributionRecorderNoop()
......
......@@ -1355,7 +1355,7 @@ class ServerArgs:
"--deepep-config",
type=str,
default=ServerArgs.deepep_config,
help="Tuned DeepEP config suitable for your own cluster.",
help="Tuned DeepEP config suitable for your own cluster. It can be either a string with JSON content or a file path.",
)
parser.add_argument(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment