Unverified Commit 8f87eb46 authored by Wentao Ye's avatar Wentao Ye Committed by GitHub
Browse files

[Refactor] Clean up log once `scope="local"` (#40540)


Signed-off-by: default avataryewentao256 <zhyanwentao@126.com>
Signed-off-by: default avatarWentao Ye <44945378+yewentao256@users.noreply.github.com>
Co-authored-by: default avatargemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
parent cfa49213
......@@ -29,7 +29,6 @@ def _get_device_and_group(parallel_config: ParallelConfig):
if parallel_config.disable_nccl_for_dp_synchronization:
logger.info_once(
"Using CPU all reduce to synchronize DP padding between ranks.",
scope="local",
)
device = "cpu"
group = get_dp_group().cpu_group
......
......@@ -92,9 +92,7 @@ class EPLBController:
if not is_mixture_of_experts(model):
return False
logger.info_once(
"EPLB is enabled for model %s.", model_config.model, scope="local"
)
logger.info_once("EPLB is enabled for model %s.", model_config.model)
assert self.state is not None
self.state.add_model(model, model_config)
self._has_registered_models = True
......
......@@ -4855,7 +4855,6 @@ class GPUModelRunner(
"Model loading took %s GiB memory and %.6f seconds",
format_gib(self.model_memory_usage),
time_after_load - time_before_load,
scope="local",
)
if not load_dummy_weights:
prepare_communication_buffer_for_model(self.model)
......@@ -4989,7 +4988,7 @@ class GPUModelRunner(
)
# begin loading weights
logger.info_once("Reloading weights inplace...", scope="local")
logger.info_once("Reloading weights inplace...")
if is_checkpoint_format:
# load weights from checkpoint/ original model format
initialize_layerwise_reload(model)
......@@ -5001,7 +5000,6 @@ class GPUModelRunner(
logger.warning_once(
"Reloading with `is_checkpoint_format=True` requires that "
"weights be in kernel format and already sharded",
scope="local",
)
loaded_weights = set()
for name, loaded_weight in weights_iterator:
......@@ -5015,7 +5013,6 @@ class GPUModelRunner(
logger.info_once(
"Reloading and processing weights took %.2f seconds",
diff_seconds,
scope="local",
)
if self.model_config.quantization is None and loaded_weights is not None:
weights_not_loaded = weights_to_load - loaded_weights
......@@ -5802,7 +5799,6 @@ class GPUModelRunner(
encoder_budget,
max_mm_items_per_batch,
dummy_modality,
scope="local",
)
# Create dummy batch of multimodal inputs.
......@@ -6099,7 +6095,6 @@ class GPUModelRunner(
"Graph capturing finished in %.0f secs, took %.2f GiB",
elapsed_time,
cuda_graph_size / (1 << 30),
scope="local",
)
return cuda_graph_size
......
......@@ -269,7 +269,7 @@ class Worker(WorkerBase):
)
if self.use_v2_model_runner:
logger.info_once("Using V2 Model Runner", scope="local")
logger.info_once("Using V2 Model Runner")
# Set random seed.
set_random_seed(self.model_config.seed)
......@@ -440,7 +440,6 @@ class Worker(WorkerBase):
logger.info_once(
"Available KV cache memory: %s GiB",
format_gib(self.available_kv_cache_memory_bytes),
scope="local",
)
if cudagraph_memory_estimate > 0:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment