Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ColossalAI
Commits
1d7ca023
Commit
1d7ca023
authored
Dec 29, 2022
by
oahzxl
Browse files
add benchmark
parent
fff493c2
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
89 additions
and
6 deletions
+89
-6
autochunk_benchmark.py
autochunk_benchmark.py
+79
-0
chunk_codegen.py
chunk_codegen.py
+10
-6
No files found.
autochunk_benchmark.py
0 → 100644
View file @
1d7ca023
import
copy
import
torch
import
torch.nn.functional
as
F
import
pytest
import
torch.fx
import
torch.multiprocessing
as
mp
from
torch.fx
import
GraphModule
from
colossalai.fx
import
ColoTracer
import
colossalai
from
colossalai.utils
import
free_port
from
colossalai.core
import
global_context
as
gpc
from
colossalai.fx.graph_module
import
ColoGraphModule
from
colossalai.fx.passes.meta_info_prop
import
MetaInfoProp
,
TensorMetadata
from
colossalai.fx.profiler
import
MetaTensor
from
evoformer.evoformer
import
evoformer_base
from
chunk_codegen
import
ChunkCodeGen
import
time
def
_benchmark_evoformer
(
model
:
torch
.
nn
.
Module
,
node
,
pair
):
loop
=
10
with
torch
.
no_grad
():
for
_
in
range
(
loop
//
4
):
model
(
node
,
pair
)
torch
.
cuda
.
synchronize
()
time1
=
time
.
time
()
for
_
in
range
(
loop
):
model
(
node
,
pair
)
torch
.
cuda
.
synchronize
()
time2
=
time
.
time
()
return
(
time2
-
time1
)
/
loop
def
benchmark_evoformer
():
# data
msa_len
=
300
pair_len
=
800
node
=
torch
.
randn
(
1
,
msa_len
,
pair_len
,
256
).
cuda
()
pair
=
torch
.
randn
(
1
,
pair_len
,
pair_len
,
128
).
cuda
()
# build gm model
max_memory
=
3000
# MB
model
=
evoformer_base
().
cuda
()
# trace the module and replace codegen
graph
=
ColoTracer
().
trace
(
model
,
meta_args
=
{
"node"
:
node
.
to
(
torch
.
device
(
"meta"
)),
"pair"
:
pair
.
to
(
torch
.
device
(
"meta"
)),
},
)
gm_prop
=
torch
.
fx
.
symbolic_trace
(
model
)
# must use symbolic_trace
interp
=
MetaInfoProp
(
gm_prop
)
interp
.
propagate
(
MetaTensor
(
node
,
fake_device
=
"cuda:0"
),
MetaTensor
(
pair
,
fake_device
=
"cuda:0"
)
)
# now run it twice to get meta info in graph module, not necessary
gm
=
torch
.
fx
.
GraphModule
(
model
,
graph
)
interp
=
MetaInfoProp
(
gm
)
interp
.
propagate
(
MetaTensor
(
node
,
fake_device
=
"cuda:0"
),
MetaTensor
(
pair
,
fake_device
=
"cuda:0"
)
)
# set code_gen
codegen
=
ChunkCodeGen
(
gm_prop
,
max_memory
)
graph
.
set_codegen
(
codegen
)
gm
=
ColoGraphModule
(
model
,
graph
)
gm
.
recompile
()
# print
code
=
graph
.
python_code
(
"self"
).
src
print
(
code
)
time_gm
=
_benchmark_evoformer
(
gm
,
node
,
pair
)
print
(
"gm %.4fs"
%
time_gm
)
time_openfold
=
_benchmark_evoformer
(
model
,
node
,
pair
)
print
(
"openfold %.4fs"
%
time_openfold
)
if
__name__
==
"__main__"
:
benchmark_evoformer
()
chunk_codegen.py
View file @
1d7ca023
...
...
@@ -1398,13 +1398,14 @@ class MemoryEstimator(object):
class
ChunkSelector
(
object
):
def
__init__
(
self
,
index_tracer
:
IndexTracer
,
memory_estimator
:
MemoryEstimator
,
stratge
self
,
index_tracer
:
IndexTracer
,
memory_estimator
:
MemoryEstimator
,
stratge
,
max_memory
=
None
):
self
.
index_tracer
=
index_tracer
self
.
memory_estimator
=
memory_estimator
assert
stratge
in
[
"min_memory"
,
"fit_memory"
]
assert
(
stratge
==
"fit_memory"
and
max_memory
is
not
None
)
or
stratge
!=
"fit_memory"
self
.
stratge
=
stratge
self
.
max_memory
=
600
# MB
self
.
max_memory
=
max_memory
# MB
def
_select_best_chunk_region
(
self
,
possible_chunk_regions
,
chunk_infos
,
peak_node
,
max_chunk_region
,
mem_peak
...
...
@@ -1556,13 +1557,13 @@ class ChunkSelector(object):
class
ChunkRegionSearch
(
object
):
def
__init__
(
self
,
gm
)
->
None
:
def
__init__
(
self
,
gm
,
max_memory
=
None
)
->
None
:
self
.
gm
=
gm
self
.
index_tracer
=
IndexTracer
(
list
(
gm
.
graph
.
nodes
))
self
.
index_tracer
.
trace_index
()
self
.
memory_estimator
=
MemoryEstimator
(
self
.
index_tracer
)
self
.
chunk_selector
=
ChunkSelector
(
self
.
index_tracer
,
self
.
memory_estimator
,
stratge
=
"fit_memory"
self
.
index_tracer
,
self
.
memory_estimator
,
stratge
=
"fit_memory"
,
max_memory
=
max_memory
)
def
_find_peak_node
(
self
,
mem_peak
):
...
...
@@ -1897,6 +1898,7 @@ def emit_code_with_chunk(
delete_unused_value_func
,
meta_nodes
,
meta_graph
,
max_memory
=
None
,
):
"""Emit code with nested activation checkpoint
When we detect some of the node.activation_checkpoint is a List, we will use
...
...
@@ -1912,7 +1914,7 @@ def emit_code_with_chunk(
node_list
=
list
(
nodes
)
# find the chunk regions
chunk_region_search
=
ChunkRegionSearch
(
meta_graph
)
chunk_region_search
=
ChunkRegionSearch
(
meta_graph
,
max_memory
)
chunk_search
=
chunk_region_search
.
search_region
()
chunk_regions
=
[
i
[
"region"
]
for
i
in
chunk_search
]
...
...
@@ -1989,9 +1991,10 @@ def emit_code_with_chunk(
if
CODEGEN_AVAILABLE
:
class
ChunkCodeGen
(
CodeGen
):
def
__init__
(
self
,
meta_graph
):
def
__init__
(
self
,
meta_graph
,
max_memory
=
None
):
super
().
__init__
()
self
.
meta_graph
=
meta_graph
self
.
max_memory
=
max_memory
self
.
meta_node
=
list
(
meta_graph
.
graph
.
nodes
)
def
_gen_python_code
(
...
...
@@ -2230,6 +2233,7 @@ if CODEGEN_AVAILABLE:
delete_unused_values
,
self
.
meta_node
,
self
.
meta_graph
,
self
.
max_memory
)
if
len
(
body
)
==
0
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment