Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
OpenFold
Commits
35449bf1
Commit
35449bf1
authored
Oct 29, 2021
by
Marta
Browse files
perf callback
parent
5a004f04
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
95 additions
and
0 deletions
+95
-0
openfold/utils/logger.py
openfold/utils/logger.py
+82
-0
train_openfold.py
train_openfold.py
+13
-0
No files found.
openfold/utils/logger.py
0 → 100644
View file @
35449bf1
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
operator
import
time
import
dllogger
as
logger
import
numpy
as
np
import
torch.cuda.profiler
as
profiler
from
dllogger
import
JSONStreamBackend
,
StdOutBackend
,
Verbosity
from
pytorch_lightning
import
Callback
def
is_main_process
():
return
int
(
os
.
getenv
(
"LOCAL_RANK"
,
"0"
))
==
0
class
PerformanceLoggingCallback
(
Callback
):
def
__init__
(
self
,
log_dir
,
global_batch_size
,
warmup_steps
:
int
=
0
,
profile
:
bool
=
False
):
logger
.
init
(
backends
=
[
JSONStreamBackend
(
Verbosity
.
VERBOSE
,
log_dir
),
StdOutBackend
(
Verbosity
.
VERBOSE
)])
self
.
warmup_steps
=
warmup_steps
self
.
global_batch_size
=
global_batch_size
self
.
step
=
0
self
.
profile
=
profile
self
.
timestamps
=
[]
def
do_step
(
self
):
self
.
step
+=
1
if
self
.
profile
and
self
.
step
==
self
.
warmup_steps
:
profiler
.
start
()
if
self
.
step
>
self
.
warmup_steps
:
self
.
timestamps
.
append
(
time
.
time
())
def
on_train_batch_start
(
self
,
trainer
,
pl_module
,
batch
,
batch_idx
,
dataloader_idx
):
if
trainer
.
current_epoch
==
1
:
self
.
do_step
()
def
on_test_batch_start
(
self
,
trainer
,
pl_module
,
batch
,
batch_idx
,
dataloader_idx
):
if
trainer
.
current_epoch
==
1
:
self
.
do_step
()
def
process_performance_stats
(
self
,
deltas
):
def
_round3
(
val
):
return
round
(
val
,
3
)
throughput_imgps
=
_round3
(
self
.
global_batch_size
/
np
.
mean
(
deltas
))
timestamps_ms
=
1000
*
deltas
stats
=
{
f
"throughput"
:
throughput_imgps
,
f
"latency_mean"
:
_round3
(
timestamps_ms
.
mean
()),
}
for
level
in
[
90
,
95
,
99
]:
stats
.
update
({
f
"latency_
{
level
}
"
:
_round3
(
np
.
percentile
(
timestamps_ms
,
level
))})
return
stats
def
_log
(
self
):
if
is_main_process
():
diffs
=
list
(
map
(
operator
.
sub
,
self
.
timestamps
[
1
:],
self
.
timestamps
[:
-
1
]))
deltas
=
np
.
array
(
diffs
)
stats
=
self
.
process_performance_stats
(
deltas
)
logger
.
log
(
step
=
(),
data
=
stats
)
logger
.
flush
()
def
on_train_end
(
self
,
trainer
,
pl_module
):
if
self
.
profile
:
profiler
.
stop
()
self
.
_log
()
def
on_epoch_end
(
self
,
trainer
,
pl_module
):
self
.
_log
()
train_openfold.py
View file @
35449bf1
...
@@ -34,6 +34,8 @@ from scripts.zero_to_fp32 import (
...
@@ -34,6 +34,8 @@ from scripts.zero_to_fp32 import (
get_fp32_state_dict_from_zero_checkpoint
get_fp32_state_dict_from_zero_checkpoint
)
)
from
openfold.utils.logger
import
PerformanceLoggingCallback
class
OpenFoldWrapper
(
pl
.
LightningModule
):
class
OpenFoldWrapper
(
pl
.
LightningModule
):
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
...
@@ -147,6 +149,13 @@ def main(args):
...
@@ -147,6 +149,13 @@ def main(args):
strict
=
True
,
strict
=
True
,
)
)
callbacks
.
append
(
es
)
callbacks
.
append
(
es
)
if
args
.
log_performance
:
global_batch_size
=
args
.
num_nodes
*
args
.
gpus
perf
=
PerformanceLoggingCallback
(
log_dir
=
args
.
output_dir
,
global_batch_size
=
global_batch_size
,
)
callbacks
.
append
(
perf
)
if
(
args
.
deepspeed_config_path
is
not
None
):
if
(
args
.
deepspeed_config_path
is
not
None
):
strategy
=
DeepSpeedPlugin
(
config
=
args
.
deepspeed_config_path
)
strategy
=
DeepSpeedPlugin
(
config
=
args
.
deepspeed_config_path
)
...
@@ -271,6 +280,10 @@ if __name__ == "__main__":
...
@@ -271,6 +280,10 @@ if __name__ == "__main__":
"--resume_model_weights_only"
,
type
=
bool
,
default
=
False
,
"--resume_model_weights_only"
,
type
=
bool
,
default
=
False
,
help
=
"Whether to load just model weights as opposed to training state"
help
=
"Whether to load just model weights as opposed to training state"
)
)
parser
.
add_argument
(
"--log_performance"
,
action
=
'store_true'
,
help
=
"Measure performance"
)
parser
=
pl
.
Trainer
.
add_argparse_args
(
parser
)
parser
=
pl
.
Trainer
.
add_argparse_args
(
parser
)
# Disable the initial validation pass
# Disable the initial validation pass
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment