Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ColossalAI
Commits
7c70bfbe
Unverified
Commit
7c70bfbe
authored
Jul 14, 2022
by
ver217
Committed by
GitHub
Jul 14, 2022
Browse files
[hotfix] fix PipelineSharedModuleGradientHandler (#1314)
parent
85f933b5
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
7 additions
and
2 deletions
+7
-2
colossalai/engine/gradient_handler/_pipeline_parallel_gradient_handler.py
...e/gradient_handler/_pipeline_parallel_gradient_handler.py
+7
-2
No files found.
colossalai/engine/gradient_handler/_pipeline_parallel_gradient_handler.py
View file @
7c70bfbe
...
@@ -33,14 +33,19 @@ class PipelineSharedModuleGradientHandler(BaseGradientHandler):
...
@@ -33,14 +33,19 @@ class PipelineSharedModuleGradientHandler(BaseGradientHandler):
# Pack the buckets.
# Pack the buckets.
for
param
in
self
.
_model
.
parameters
():
for
param
in
self
.
_model
.
parameters
():
group
=
getattr
(
param
,
'pipeline_shared_module_pg'
,
None
)
group
=
getattr
(
param
,
'pipeline_shared_module_pg'
,
None
)
if
param
.
requires_grad
and
param
.
grad
is
not
None
and
group
is
not
None
:
if
param
.
requires_grad
and
group
is
not
None
and
(
(
hasattr
(
param
,
'colo_attr'
)
and
not
param
.
colo_attr
.
saved_grad
.
is_null
())
or
param
.
grad
is
not
None
):
tp
=
param
.
data
.
type
()
tp
=
param
.
data
.
type
()
buckets
[
group
][
tp
].
append
(
param
)
buckets
[
group
][
tp
].
append
(
param
)
# For each bucket, all-reduce and copy all-reduced grads.
# For each bucket, all-reduce and copy all-reduced grads.
for
group
,
group_buckets
in
buckets
.
items
():
for
group
,
group_buckets
in
buckets
.
items
():
for
tp
,
bucket
in
group_buckets
.
items
():
for
tp
,
bucket
in
group_buckets
.
items
():
grads
=
[
param
.
grad
.
data
for
param
in
bucket
]
grads
=
[
param
.
colo_attr
.
grad_payload
if
hasattr
(
param
,
'colo_attr'
)
else
param
.
grad
.
data
for
param
in
bucket
]
coalesced
=
_flatten_dense_tensors
(
grads
).
to
(
torch
.
cuda
.
current_device
())
coalesced
=
_flatten_dense_tensors
(
grads
).
to
(
torch
.
cuda
.
current_device
())
dist
.
all_reduce
(
coalesced
,
op
=
dist
.
ReduceOp
.
SUM
,
group
=
group
)
dist
.
all_reduce
(
coalesced
,
op
=
dist
.
ReduceOp
.
SUM
,
group
=
group
)
for
buf
,
synced
in
zip
(
grads
,
_unflatten_dense_tensors
(
coalesced
,
grads
)):
for
buf
,
synced
in
zip
(
grads
,
_unflatten_dense_tensors
(
coalesced
,
grads
)):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment