Commit 24b7c3c0 authored by Lawrence McAfee's avatar Lawrence McAfee
Browse files

added memory stats (allocated/reserved) to tensorboard logging

parent 90e0a0dd
...@@ -328,6 +328,10 @@ def _add_logging_args(parser): ...@@ -328,6 +328,10 @@ def _add_logging_args(parser):
action='store_true', action='store_true',
help='If set, write validation perplexity to ' help='If set, write validation perplexity to '
'tensorboard.') 'tensorboard.')
group.add_argument('--no-log-memory-to-tensorboard',
action='store_false',
help='Disable memory logging to tensorboard.',
dest='log_memory_to_tensorboard')
return parser return parser
......
...@@ -531,6 +531,23 @@ def training_log(loss_dict, total_loss_dict, learning_rate, iteration, ...@@ -531,6 +531,23 @@ def training_log(loss_dict, total_loss_dict, learning_rate, iteration,
if args.log_timers_to_tensorboard: if args.log_timers_to_tensorboard:
timers.write(timers_to_log, writer, iteration, timers.write(timers_to_log, writer, iteration,
normalizer=total_iterations) normalizer=total_iterations)
if args.log_memory_to_tensorboard:
mem_stats = torch.cuda.memory_stats()
writer.add_scalar(
"mem-reserved-bytes",
mem_stats["reserved_bytes.all.current"],
iteration,
)
writer.add_scalar(
"mem-allocated-bytes",
mem_stats["allocated_bytes.all.current"],
iteration,
)
writer.add_scalar(
"mem-allocated-count",
mem_stats["allocation.all.current"],
iteration,
)
if iteration % args.log_interval == 0: if iteration % args.log_interval == 0:
elapsed_time = timers('interval-time').elapsed() elapsed_time = timers('interval-time').elapsed()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment