Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ColossalAI
Commits
e99af94a
Commit
e99af94a
authored
Mar 18, 2022
by
ver217
Browse files
rename variables
parent
46add4a5
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
6 additions
and
6 deletions
+6
-6
colossalai/zero/sharded_optim/sharded_optim_v2.py
colossalai/zero/sharded_optim/sharded_optim_v2.py
+6
-6
No files found.
colossalai/zero/sharded_optim/sharded_optim_v2.py
View file @
e99af94a
...
...
@@ -135,18 +135,18 @@ class ShardedOptimizerV2(ColossalaiOptimizer):
# assign master param pointers to p.data.
# We will not trigger data copy here.
for
group
in
self
.
optim
izer
.
param_groups
:
for
group
in
self
.
optim
.
param_groups
:
for
p
in
group
[
'params'
]:
p
.
data
=
self
.
master_params
[
p
]
# Now p.data is sharded
# So optimizer states are sharded naturally
ret
=
self
.
optim
izer
.
step
(
*
args
,
**
kwargs
)
ret
=
self
.
optim
.
step
(
*
args
,
**
kwargs
)
# Copy master param data (fp32) to payload of col_attr (fp16)
# TODO() improve efficiency by gathering tensors into a chunk and transfering
# a chunk.
for
group
in
self
.
optim
izer
.
param_groups
:
for
group
in
self
.
optim
.
param_groups
:
for
p
in
group
[
'params'
]:
is_param_sharded
=
p
.
col_attr
.
data
.
is_sharded
if
not
is_param_sharded
:
...
...
@@ -190,7 +190,7 @@ class ShardedOptimizerV2(ColossalaiOptimizer):
self
.
_found_overflow
.
fill_
(
0.0
)
# check for overflow
for
group
in
self
.
optim
izer
.
param_groups
:
for
group
in
self
.
optim
.
param_groups
:
for
p
in
group
[
'params'
]:
if
has_inf_or_nan
(
p
.
grad
):
self
.
_found_overflow
.
fill_
(
1.0
)
...
...
@@ -206,7 +206,7 @@ class ShardedOptimizerV2(ColossalaiOptimizer):
def
_unscale_grads
(
self
):
assert
self
.
optim_state
==
OptimState
.
SCALED
for
group
in
self
.
optim
izer
.
param_groups
:
for
group
in
self
.
optim
.
param_groups
:
for
p
in
group
[
'params'
]:
if
p
.
grad
is
not
None
:
p
.
grad
.
data
.
div_
(
self
.
loss_scale
)
...
...
@@ -216,7 +216,7 @@ class ShardedOptimizerV2(ColossalaiOptimizer):
# We must set grad to None
# Because we will judge whether local grad accumulation
# is enabled by wheter grad is None
self
.
optim
izer
.
zero_grad
(
set_to_none
=
True
)
self
.
optim
.
zero_grad
(
set_to_none
=
True
)
def
sync_grad
(
self
):
pass
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment