perf callback

35449bf1 · Marta · 5a004f04 · 35449bf1 · 35449bf1
Commit 35449bf1 authored Oct 29, 2021 by Marta
Hide whitespace changes
Inline Side-by-side

Showing with 95 additions and 0 deletions

openfold/utils/logger.py openfold/utils/logger.py +82 -0

train_openfold.py train_openfold.py +13 -0

No files found.
--- a/openfold/utils/logger.py
+++ b/openfold/utils/logger.py
+# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import operator
+import time
+import dllogger as logger
+import numpy as np
+import torch.cuda.profiler as profiler
+from dllogger import JSONStreamBackend, StdOutBackend, Verbosity
+from pytorch_lightning import Callback
+def is_main_process():
+    return int(os.getenv("LOCAL_RANK", "0")) == 0
+class PerformanceLoggingCallback(Callback):
+    def __init__(self, log_dir, global_batch_size, warmup_steps: int = 0, profile: bool = False):
+        logger.init(backends=[JSONStreamBackend(Verbosity.VERBOSE, log_dir), StdOutBackend(Verbosity.VERBOSE)])
+        self.warmup_steps = warmup_steps
+        self.global_batch_size = global_batch_size
+        self.step = 0
+        self.profile = profile
+        self.timestamps = []
+    def do_step(self):
+        self.step += 1
+        if self.profile and self.step == self.warmup_steps:
+            profiler.start()
+        if self.step > self.warmup_steps:
+            self.timestamps.append(time.time())
+    def on_train_batch_start(self, trainer, pl_module, batch, batch_idx, dataloader_idx):
+        if trainer.current_epoch == 1:
+            self.do_step()
+    def on_test_batch_start(self, trainer, pl_module, batch, batch_idx, dataloader_idx):
+        if trainer.current_epoch == 1:
+            self.do_step()
+    def process_performance_stats(self, deltas):
+        def _round3(val):
+            return round(val, 3)
+        throughput_imgps = _round3(self.global_batch_size / np.mean(deltas))
+        timestamps_ms = 1000 * deltas
+        stats = {
+            f"throughput": throughput_imgps,
+            f"latency_mean": _round3(timestamps_ms.mean()),
+        }
+        for level in [90, 95, 99]:
+            stats.update({f"latency_{level}": _round3(np.percentile(timestamps_ms, level))})
+        return stats
+    def _log(self):
+        if is_main_process():
+            diffs = list(map(operator.sub, self.timestamps[1:], self.timestamps[:-1]))
+            deltas = np.array(diffs)
+            stats = self.process_performance_stats(deltas)
+            logger.log(step=(), data=stats)
+            logger.flush()
+    def on_train_end(self, trainer, pl_module):
+        if self.profile:
+            profiler.stop()
+        self._log()
+    def on_epoch_end(self, trainer, pl_module):
+        self._log()
--- a/train_openfold.py
+++ b/train_openfold.py
@@ -34,6 +34,8 @@ from scripts.zero_to_fp32 import (
    get_fp32_state_dict_from_zero_checkpoint
 )
+from openfold.utils.logger import PerformanceLoggingCallback
 class OpenFoldWrapper(pl.LightningModule):
    def __init__(self, config):
@@ -147,6 +149,13 @@ def main(args):
            strict=True,
        )
        callbacks.append(es)
+    if args.log_performance:
+        global_batch_size = args.num_nodes * args.gpus
+        perf = PerformanceLoggingCallback(
+            log_dir=args.output_dir,
+            global_batch_size=global_batch_size,
+        )
+        callbacks.append(perf)
    if(args.deepspeed_config_path is not None):
        strategy = DeepSpeedPlugin(config=args.deepspeed_config_path)
@@ -271,6 +280,10 @@ if __name__ == "__main__":
        "--resume_model_weights_only", type=bool, default=False,
        help="Whether to load just model weights as opposed to training state"
    )
+    parser.add_argument(
+        "--log_performance", action='store_true',
+        help="Measure performance"
+    )
    parser = pl.Trainer.add_argparse_args(parser)
    # Disable the initial validation pass