Unverified Commit 8f87eb46 authored by Wentao Ye's avatar Wentao Ye Committed by GitHub
Browse files

[Refactor] Clean up log once `scope="local"` (#40540)


Signed-off-by: default avataryewentao256 <zhyanwentao@126.com>
Signed-off-by: default avatarWentao Ye <44945378+yewentao256@users.noreply.github.com>
Co-authored-by: default avatargemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
parent cfa49213
...@@ -29,7 +29,6 @@ def _get_device_and_group(parallel_config: ParallelConfig): ...@@ -29,7 +29,6 @@ def _get_device_and_group(parallel_config: ParallelConfig):
if parallel_config.disable_nccl_for_dp_synchronization: if parallel_config.disable_nccl_for_dp_synchronization:
logger.info_once( logger.info_once(
"Using CPU all reduce to synchronize DP padding between ranks.", "Using CPU all reduce to synchronize DP padding between ranks.",
scope="local",
) )
device = "cpu" device = "cpu"
group = get_dp_group().cpu_group group = get_dp_group().cpu_group
......
...@@ -92,9 +92,7 @@ class EPLBController: ...@@ -92,9 +92,7 @@ class EPLBController:
if not is_mixture_of_experts(model): if not is_mixture_of_experts(model):
return False return False
logger.info_once( logger.info_once("EPLB is enabled for model %s.", model_config.model)
"EPLB is enabled for model %s.", model_config.model, scope="local"
)
assert self.state is not None assert self.state is not None
self.state.add_model(model, model_config) self.state.add_model(model, model_config)
self._has_registered_models = True self._has_registered_models = True
......
...@@ -4855,7 +4855,6 @@ class GPUModelRunner( ...@@ -4855,7 +4855,6 @@ class GPUModelRunner(
"Model loading took %s GiB memory and %.6f seconds", "Model loading took %s GiB memory and %.6f seconds",
format_gib(self.model_memory_usage), format_gib(self.model_memory_usage),
time_after_load - time_before_load, time_after_load - time_before_load,
scope="local",
) )
if not load_dummy_weights: if not load_dummy_weights:
prepare_communication_buffer_for_model(self.model) prepare_communication_buffer_for_model(self.model)
...@@ -4989,7 +4988,7 @@ class GPUModelRunner( ...@@ -4989,7 +4988,7 @@ class GPUModelRunner(
) )
# begin loading weights # begin loading weights
logger.info_once("Reloading weights inplace...", scope="local") logger.info_once("Reloading weights inplace...")
if is_checkpoint_format: if is_checkpoint_format:
# load weights from checkpoint/ original model format # load weights from checkpoint/ original model format
initialize_layerwise_reload(model) initialize_layerwise_reload(model)
...@@ -5001,7 +5000,6 @@ class GPUModelRunner( ...@@ -5001,7 +5000,6 @@ class GPUModelRunner(
logger.warning_once( logger.warning_once(
"Reloading with `is_checkpoint_format=True` requires that " "Reloading with `is_checkpoint_format=True` requires that "
"weights be in kernel format and already sharded", "weights be in kernel format and already sharded",
scope="local",
) )
loaded_weights = set() loaded_weights = set()
for name, loaded_weight in weights_iterator: for name, loaded_weight in weights_iterator:
...@@ -5015,7 +5013,6 @@ class GPUModelRunner( ...@@ -5015,7 +5013,6 @@ class GPUModelRunner(
logger.info_once( logger.info_once(
"Reloading and processing weights took %.2f seconds", "Reloading and processing weights took %.2f seconds",
diff_seconds, diff_seconds,
scope="local",
) )
if self.model_config.quantization is None and loaded_weights is not None: if self.model_config.quantization is None and loaded_weights is not None:
weights_not_loaded = weights_to_load - loaded_weights weights_not_loaded = weights_to_load - loaded_weights
...@@ -5802,7 +5799,6 @@ class GPUModelRunner( ...@@ -5802,7 +5799,6 @@ class GPUModelRunner(
encoder_budget, encoder_budget,
max_mm_items_per_batch, max_mm_items_per_batch,
dummy_modality, dummy_modality,
scope="local",
) )
# Create dummy batch of multimodal inputs. # Create dummy batch of multimodal inputs.
...@@ -6099,7 +6095,6 @@ class GPUModelRunner( ...@@ -6099,7 +6095,6 @@ class GPUModelRunner(
"Graph capturing finished in %.0f secs, took %.2f GiB", "Graph capturing finished in %.0f secs, took %.2f GiB",
elapsed_time, elapsed_time,
cuda_graph_size / (1 << 30), cuda_graph_size / (1 << 30),
scope="local",
) )
return cuda_graph_size return cuda_graph_size
......
...@@ -269,7 +269,7 @@ class Worker(WorkerBase): ...@@ -269,7 +269,7 @@ class Worker(WorkerBase):
) )
if self.use_v2_model_runner: if self.use_v2_model_runner:
logger.info_once("Using V2 Model Runner", scope="local") logger.info_once("Using V2 Model Runner")
# Set random seed. # Set random seed.
set_random_seed(self.model_config.seed) set_random_seed(self.model_config.seed)
...@@ -440,7 +440,6 @@ class Worker(WorkerBase): ...@@ -440,7 +440,6 @@ class Worker(WorkerBase):
logger.info_once( logger.info_once(
"Available KV cache memory: %s GiB", "Available KV cache memory: %s GiB",
format_gib(self.available_kv_cache_memory_bytes), format_gib(self.available_kv_cache_memory_bytes),
scope="local",
) )
if cudagraph_memory_estimate > 0: if cudagraph_memory_estimate > 0:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment