"tests/test_layers_utils.py" did not exist on "0183bf13c73b34be2ff1ff1853bc53970046d700"
Commit c0a84df5 authored by Anthony Chen's avatar Anthony Chen Committed by Facebook GitHub Bot
Browse files

disable memory profiler by default + remove force disable + add logging

Summary: Pull Request resolved: https://github.com/facebookresearch/d2go/pull/581

Reviewed By: wat3rBro

Differential Revision: D46913792

fbshipit-source-id: cf3c3812c455091fbf63842443644d2571976017
parent 7f17bbf0
......@@ -343,8 +343,7 @@ class Detectron2GoRunner(D2GoDataAPIMixIn, BaseRunner):
def build_model(self, cfg, eval_only=False):
# Attach memory profiler to GPU OOM events
# Disabled since it can cause ranks to die
if False and cfg.get("MEMORY_PROFILER", CfgNode()).get("ENABLED", False):
if cfg.get("MEMORY_PROFILER", CfgNode()).get("ENABLED", False):
attach_oom_logger(
cfg.OUTPUT_DIR, trace_max_entries=cfg.MEMORY_PROFILER.TRACE_MAX_ENTRIES
)
......
......@@ -51,6 +51,9 @@ class D2GoGpuMemorySnapshot(HookBase):
self.log_n_steps = log_n_steps
self.log_during_train_at = log_during_train_at
self.trace_max_entries = trace_max_entries
logger.warning(
"WARNING: Memory snapshot profiler is enabled. This may cause ranks to die and training jobs to get stuck. Please use with caution."
)
def before_step(self):
if self.trainer.iter == self.log_during_train_at:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment