Commit 35449bf1 authored by Marta's avatar Marta
Browse files

perf callback

parent 5a004f04
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import operator
import time
import dllogger as logger
import numpy as np
import torch.cuda.profiler as profiler
from dllogger import JSONStreamBackend, StdOutBackend, Verbosity
from pytorch_lightning import Callback
def is_main_process():
return int(os.getenv("LOCAL_RANK", "0")) == 0
class PerformanceLoggingCallback(Callback):
def __init__(self, log_dir, global_batch_size, warmup_steps: int = 0, profile: bool = False):
logger.init(backends=[JSONStreamBackend(Verbosity.VERBOSE, log_dir), StdOutBackend(Verbosity.VERBOSE)])
self.warmup_steps = warmup_steps
self.global_batch_size = global_batch_size
self.step = 0
self.profile = profile
self.timestamps = []
def do_step(self):
self.step += 1
if self.profile and self.step == self.warmup_steps:
profiler.start()
if self.step > self.warmup_steps:
self.timestamps.append(time.time())
def on_train_batch_start(self, trainer, pl_module, batch, batch_idx, dataloader_idx):
if trainer.current_epoch == 1:
self.do_step()
def on_test_batch_start(self, trainer, pl_module, batch, batch_idx, dataloader_idx):
if trainer.current_epoch == 1:
self.do_step()
def process_performance_stats(self, deltas):
def _round3(val):
return round(val, 3)
throughput_imgps = _round3(self.global_batch_size / np.mean(deltas))
timestamps_ms = 1000 * deltas
stats = {
f"throughput": throughput_imgps,
f"latency_mean": _round3(timestamps_ms.mean()),
}
for level in [90, 95, 99]:
stats.update({f"latency_{level}": _round3(np.percentile(timestamps_ms, level))})
return stats
def _log(self):
if is_main_process():
diffs = list(map(operator.sub, self.timestamps[1:], self.timestamps[:-1]))
deltas = np.array(diffs)
stats = self.process_performance_stats(deltas)
logger.log(step=(), data=stats)
logger.flush()
def on_train_end(self, trainer, pl_module):
if self.profile:
profiler.stop()
self._log()
def on_epoch_end(self, trainer, pl_module):
self._log()
......@@ -34,6 +34,8 @@ from scripts.zero_to_fp32 import (
get_fp32_state_dict_from_zero_checkpoint
)
from openfold.utils.logger import PerformanceLoggingCallback
class OpenFoldWrapper(pl.LightningModule):
def __init__(self, config):
......@@ -147,6 +149,13 @@ def main(args):
strict=True,
)
callbacks.append(es)
if args.log_performance:
global_batch_size = args.num_nodes * args.gpus
perf = PerformanceLoggingCallback(
log_dir=args.output_dir,
global_batch_size=global_batch_size,
)
callbacks.append(perf)
if(args.deepspeed_config_path is not None):
strategy = DeepSpeedPlugin(config=args.deepspeed_config_path)
......@@ -271,6 +280,10 @@ if __name__ == "__main__":
"--resume_model_weights_only", type=bool, default=False,
help="Whether to load just model weights as opposed to training state"
)
parser.add_argument(
"--log_performance", action='store_true',
help="Measure performance"
)
parser = pl.Trainer.add_argparse_args(parser)
# Disable the initial validation pass
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment