Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ColossalAI
Commits
4b40fbd7
Unverified
Commit
4b40fbd7
authored
Dec 04, 2022
by
Boyuan Yao
Committed by
GitHub
Dec 04, 2022
Browse files
[autoparallel] fix forward memory calculation (#2062)
parent
44ea4618
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
30 additions
and
25 deletions
+30
-25
colossalai/auto_parallel/meta_profiler/meta_registry/activation.py
...i/auto_parallel/meta_profiler/meta_registry/activation.py
+6
-4
colossalai/auto_parallel/meta_profiler/meta_registry/conv.py
colossalai/auto_parallel/meta_profiler/meta_registry/conv.py
+13
-13
colossalai/auto_parallel/meta_profiler/meta_registry/linear.py
...salai/auto_parallel/meta_profiler/meta_registry/linear.py
+7
-6
colossalai/auto_parallel/meta_profiler/meta_registry/norm.py
colossalai/auto_parallel/meta_profiler/meta_registry/norm.py
+2
-1
colossalai/auto_parallel/meta_profiler/meta_registry/pooling.py
...alai/auto_parallel/meta_profiler/meta_registry/pooling.py
+2
-1
No files found.
colossalai/auto_parallel/meta_profiler/meta_registry/activation.py
View file @
4b40fbd7
...
...
@@ -49,10 +49,12 @@ def relu_meta_info(*args, **kwargs) -> Tuple[TrainCycleItem, TrainCycleItem, Lis
# calculate memory cost
# NOTE: the inplace ReLU don't have forward memory cost
fwd_memory_cost
=
MemoryCost
(
activation
=
0
if
inplace
else
activation_size
(
output_tensor
),
parameter
=
0
,
temp
=
0
,
buffer
=
0
)
# NOTE: currently in SPMD solver we always believe that there will be a new tensor created in forward
fwd_memory_cost
=
MemoryCost
(
activation
=
activation_size
(
input_tensor
)
if
inplace
else
activation_size
([
output_tensor
,
input_tensor
]),
parameter
=
0
,
temp
=
0
,
buffer
=
0
)
bwd_memory_cost
=
MemoryCost
(
activation
=
activation_size
(
input_tensor
),
parameter
=
0
,
temp
=
0
,
buffer
=
0
)
...
...
colossalai/auto_parallel/meta_profiler/meta_registry/conv.py
View file @
4b40fbd7
...
...
@@ -96,19 +96,19 @@ def convnd_meta_info(*args, **kwargs) -> Tuple[TrainCycleItem, TrainCycleItem, L
# calculate memory cost
# TODO: use profiler to check conv temp memory
fwd_memory_cost
=
MemoryCost
(
activation
=
activation_size
(
output_tensor
),
parameter
=
activation_size
(
weight_tensor
)
+
activation_size
(
bias_tensor
)
if
has_bias
else
activation_size
(
weigh
t_tensor
),
temp
=
0
,
buffer
=
0
)
bwd_memory_cost
=
MemoryCost
(
activation
=
activation_size
(
input_tensor
)
+
activation_size
(
weight_tensor
)
+
activation_size
(
bias_tensor
)
if
has_bias
else
activation_size
(
input_tensor
)
+
activation_size
(
weight_tensor
),
parameter
=
activation_size
(
weight_tensor
)
+
activation_size
(
bias_tensor
)
if
has_bias
else
activation_size
(
weight_tensor
),
temp
=
0
,
buffer
=
0
)
# NOTE: currently in SPMD solver we always believe that there will be a new tensor created in forward
fwd_memory_cost
=
MemoryCost
(
activation
=
activation_size
([
input_tensor
,
outpu
t_tensor
]
),
parameter
=
activation_size
([
weight_tensor
,
bias_tensor
])
if
has_bias
else
activation_size
(
weight_tensor
)
,
temp
=
0
,
buffer
=
0
)
bwd_memory_cost
=
MemoryCost
(
activation
=
activation_size
([
input_tensor
,
weight_tensor
,
bias_tensor
])
if
has_bias
else
activation_size
(
[
input_tensor
,
weight_tensor
]),
parameter
=
activation_size
([
weight_tensor
,
bias_tensor
]
)
if
has_bias
else
activation_size
(
weight_tensor
),
temp
=
0
,
buffer
=
0
)
# total cost is the sum of forward and backward cost
total_cost
=
MemoryCost
(
activation
=
fwd_memory_cost
.
activation
+
bwd_memory_cost
.
activation
,
...
...
colossalai/auto_parallel/meta_profiler/meta_registry/linear.py
View file @
4b40fbd7
...
...
@@ -106,15 +106,15 @@ def linear_meta_info(*args, **kwargs) -> Tuple[TrainCycleItem, TrainCycleItem, L
# calculate memory cost
# NOTE: Linear don't have buffer and temp in forward and backward phase
# the forward activation cost is the size of output_tensor, parameter cost is the size of weight_tensor and bias_tensor
fwd_memory_cost
=
MemoryCost
(
activation
=
activation_size
(
output_tensor
),
parameter
=
activation_size
(
weight_tensor
)
+
activation_size
(
bias_tensor
),
# NOTE: currently in SPMD solver we always believe that there will be a new tensor created in forward
fwd_memory_cost
=
MemoryCost
(
activation
=
activation_size
([
input_tensor
,
output_tensor
]),
parameter
=
activation_size
([
weight_tensor
,
bias_tensor
]),
temp
=
0
,
buffer
=
0
)
# the backward activation cost is the size of input_tensor, weight_tensor and bias_tensor, parameter cost is 0
bwd_memory_cost
=
MemoryCost
(
activation
=
activation_size
(
input_tensor
)
+
activation_size
(
weight_tensor
)
+
activation_size
(
bias_tensor
),
parameter
=
activation_size
(
weight_tensor
)
+
activation_size
(
bias_tensor
),
bwd_memory_cost
=
MemoryCost
(
activation
=
activation_size
([
input_tensor
,
weight_tensor
,
bias_tensor
]),
parameter
=
activation_size
([
weight_tensor
,
bias_tensor
]),
temp
=
0
,
buffer
=
0
)
...
...
@@ -142,13 +142,14 @@ def linear_meta_info(*args, **kwargs) -> Tuple[TrainCycleItem, TrainCycleItem, L
# calculate memory cost
# NOTE: Linear don't have buffer and temp in forward and backward phase
# the forward activation cost is the size of output_tensor, parameter cost is the size of weight_tensor
# NOTE: currently in SPMD solver we always believe that there will be a new tensor created in forward
fwd_memory_cost
=
MemoryCost
(
activation
=
activation_size
(
output_tensor
),
parameter
=
activation_size
(
weight_tensor
),
temp
=
0
,
buffer
=
0
)
# the backward activation cost is the size of input_tensor and weight_tensor, parameter cost is 0
bwd_memory_cost
=
MemoryCost
(
activation
=
activation_size
(
input_tensor
)
+
activation_size
(
weight_tensor
),
bwd_memory_cost
=
MemoryCost
(
activation
=
activation_size
(
[
input_tensor
,
weight_tensor
]
),
parameter
=
activation_size
(
weight_tensor
),
temp
=
0
,
buffer
=
0
)
...
...
colossalai/auto_parallel/meta_profiler/meta_registry/norm.py
View file @
4b40fbd7
...
...
@@ -76,7 +76,8 @@ def batchnormnd_meta_info(*args, **kwargs) -> Tuple[TrainCycleItem, TrainCycleIt
# calculate memory cost
# the fwd activation cost is output plus saved mean and saved inv std
fwd_memory_cost
=
MemoryCost
(
activation
=
activation_size
([
output_tensor
,
mean_tensor
,
var_tensor
]),
# NOTE: currently in SPMD solver we always believe that there will be a new tensor created in forward
fwd_memory_cost
=
MemoryCost
(
activation
=
activation_size
([
input_tensor
,
output_tensor
,
mean_tensor
,
var_tensor
]),
parameter
=
activation_size
([
weight_tensor
,
bias_tensor
]),
temp
=
0
,
buffer
=
activation_size
([
mean_tensor
,
var_tensor
]))
...
...
colossalai/auto_parallel/meta_profiler/meta_registry/pooling.py
View file @
4b40fbd7
...
...
@@ -110,7 +110,8 @@ def maxpool_meta_info(*args, **kwargs) -> Tuple[TrainCycleItem, TrainCycleItem,
# calculate memory cost
# NOTE: the index matrix will be discarded in backward phase
fwd_mem_cost
=
MemoryCost
(
activation
=
activation_size
(
output_tensor
)
+
activation_size
(
index_matrix
))
# NOTE: currently in SPMD solver we always believe that there will be a new tensor created in forward
fwd_mem_cost
=
MemoryCost
(
activation
=
activation_size
([
input_tensor
,
output_tensor
,
index_matrix
]))
# temp memory for backward is the index matrix to be discarded
bwd_mem_cost
=
MemoryCost
(
activation
=
activation_size
(
input_tensor
)
-
activation_size
(
index_matrix
),
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment