Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
FastFold
Commits
ad1bbc52
"...git@developer.sourcefind.cn:chenpangpang/diffusers.git" did not exist on "bb4d605dfc86421e7f0eb432cb3cb3ff92c494c3"
Unverified
Commit
ad1bbc52
authored
Jun 03, 2022
by
shenggan
Committed by
GitHub
Jun 03, 2022
Browse files
fix dap init in benchmark (#24)
parent
259b6c87
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
10 additions
and
22 deletions
+10
-22
benchmark/perf.py
benchmark/perf.py
+0
-13
fastfold/distributed/core.py
fastfold/distributed/core.py
+10
-9
No files found.
benchmark/perf.py
View file @
ad1bbc52
...
@@ -35,19 +35,6 @@ def main():
...
@@ -35,19 +35,6 @@ def main():
args
=
parser
.
parse_args
()
args
=
parser
.
parse_args
()
args
.
distributed
=
False
if
'WORLD_SIZE'
in
os
.
environ
:
args
.
distributed
=
int
(
os
.
environ
[
'WORLD_SIZE'
])
>
1
args
.
local_rank
=
int
(
os
.
environ
[
'LOCAL_RANK'
])
torch
.
cuda
.
set_device
(
args
.
local_rank
)
torch
.
distributed
.
init_process_group
(
backend
=
'nccl'
,
init_method
=
'env://'
)
args
.
world_size
=
torch
.
distributed
.
get_world_size
()
args
.
global_rank
=
torch
.
distributed
.
get_rank
()
print
(
'Training in distributed mode with multiple processes, 1 GPU per process. Process %d, total %d.'
%
(
args
.
global_rank
,
args
.
world_size
))
init_dap
(
args
.
dap_size
)
init_dap
(
args
.
dap_size
)
precision
=
torch
.
bfloat16
precision
=
torch
.
bfloat16
...
...
fastfold/distributed/core.py
View file @
ad1bbc52
...
@@ -8,8 +8,10 @@ def ensure_divisibility(numerator, denominator):
...
@@ -8,8 +8,10 @@ def ensure_divisibility(numerator, denominator):
"""Ensure that numerator is divisible by the denominator."""
"""Ensure that numerator is divisible by the denominator."""
assert
numerator
%
denominator
==
0
,
'{} is not divisible by {}'
.
format
(
numerator
,
denominator
)
assert
numerator
%
denominator
==
0
,
'{} is not divisible by {}'
.
format
(
numerator
,
denominator
)
def
set_distributed_environ
(
key
,
value
):
os
.
environ
[
str
(
key
)]
=
str
(
value
)
def
set_missing_distributed_environ
(
key
,
value
):
if
key
not
in
os
.
environ
:
os
.
environ
[
str
(
key
)]
=
str
(
value
)
def
init_dap
(
tensor_model_parallel_size_
=
None
):
def
init_dap
(
tensor_model_parallel_size_
=
None
):
...
@@ -21,19 +23,18 @@ def init_dap(tensor_model_parallel_size_=None):
...
@@ -21,19 +23,18 @@ def init_dap(tensor_model_parallel_size_=None):
else
:
else
:
tensor_model_parallel_size_
=
1
tensor_model_parallel_size_
=
1
if
torch
.
torch
.
distributed
.
is_initialized
():
if
torch
.
distributed
.
is_initialized
():
_logger
=
colossalai
.
logging
.
get_dist_logger
()
_logger
=
colossalai
.
logging
.
get_dist_logger
()
_logger
.
error
(
_logger
.
error
(
"use fastfold.distributed.init_dap instead of torch.distributed.init_process_group!"
)
"use fastfold.distributed.init_dap instead of torch.distributed.init_process_group!"
)
exit
(
-
1
)
exit
(
-
1
)
# set distributed environ for single device launch
# set distributed environ for single device launch
if
'RANK'
not
in
os
.
environ
:
set_missing_distributed_environ
(
'WORLD_SIZE'
,
1
)
set_distributed_environ
(
'WORLD_SIZE'
,
1
)
set_missing_distributed_environ
(
'RANK'
,
0
)
set_distributed_environ
(
'RANK'
,
0
)
set_missing_distributed_environ
(
'LOCAL_RANK'
,
0
)
set_distributed_environ
(
'LOCAL_RANK'
,
0
)
set_missing_distributed_environ
(
'MASTER_ADDR'
,
"localhost"
)
set_distributed_environ
(
'MASTER_ADDR'
,
"localhost"
)
set_missing_distributed_environ
(
'MASTER_PORT'
,
-
1
)
set_distributed_environ
(
'MASTER_PORT'
,
10045
)
colossalai
.
launch_from_torch
(
colossalai
.
launch_from_torch
(
config
=
{
"parallel"
:
dict
(
tensor
=
dict
(
size
=
tensor_model_parallel_size_
))})
config
=
{
"parallel"
:
dict
(
tensor
=
dict
(
size
=
tensor_model_parallel_size_
))})
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment