Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
apex
Commits
3c02784b
Commit
3c02784b
authored
May 30, 2020
by
Thor Johnsen
Browse files
Bug fix
parent
9773218c
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
7 additions
and
7 deletions
+7
-7
apex/contrib/optimizers/distributed_fused_lamb.py
apex/contrib/optimizers/distributed_fused_lamb.py
+7
-7
No files found.
apex/contrib/optimizers/distributed_fused_lamb.py
View file @
3c02784b
...
@@ -424,20 +424,20 @@ class DistributedFusedLAMB(torch.optim.Optimizer):
...
@@ -424,20 +424,20 @@ class DistributedFusedLAMB(torch.optim.Optimizer):
def
__compute_contrib_param_norm
(
self
):
def
__compute_contrib_param_norm
(
self
):
if
self
.
_contrib_model_param_for_norm_fp16
is
not
None
and
self
.
_contrib_model_param_for_norm_fp32
is
not
None
:
if
self
.
_contrib_model_param_for_norm_fp16
is
not
None
and
self
.
_contrib_model_param_for_norm_fp32
is
not
None
:
gnorm_fp16
=
multi_tensor_applier
(
self
.
multi_tensor_l2norm
,
self
.
_
dummy_
overflow_buf
,
[
self
.
_contrib_model_param_for_norm_fp16
],
True
)
gnorm_fp16
=
multi_tensor_applier
(
self
.
multi_tensor_l2norm
,
self
.
_overflow_buf
,
[
self
.
_contrib_model_param_for_norm_fp16
],
True
)
gnorm_fp32
=
multi_tensor_applier
(
self
.
multi_tensor_l2norm
,
self
.
_
dummy_
overflow_buf
,
[
self
.
_contrib_model_param_for_norm_fp32
],
True
)
gnorm_fp32
=
multi_tensor_applier
(
self
.
multi_tensor_l2norm
,
self
.
_overflow_buf
,
[
self
.
_contrib_model_param_for_norm_fp32
],
True
)
gnorm
=
torch
.
empty
(
size
=
[
self
.
_contrib_model_param_for_norm_num
],
dtype
=
torch
.
bool
,
device
=
'cuda'
)
gnorm
=
torch
.
empty
(
size
=
[
self
.
_contrib_model_param_for_norm_num
],
dtype
=
torch
.
bool
,
device
=
'cuda'
)
gnorm
.
masked_scatter
(
self
.
_contrib_model_param_for_norm_is_fp16
,
gnorm_fp16
)
gnorm
.
masked_scatter
(
self
.
_contrib_model_param_for_norm_is_fp16
,
gnorm_fp16
)
gnorm
.
masked_scatter
(
self
.
_contrib_model_param_for_norm_is_fp32
,
gnorm_fp32
)
gnorm
.
masked_scatter
(
self
.
_contrib_model_param_for_norm_is_fp32
,
gnorm_fp32
)
elif
self
.
_contrib_model_param_for_norm_fp16
is
not
None
:
elif
self
.
_contrib_model_param_for_norm_fp16
is
not
None
:
gnorm
=
multi_tensor_applier
(
self
.
multi_tensor_l2norm
,
self
.
_
dummy_
overflow_buf
,
[
self
.
_contrib_model_param_for_norm_fp16
],
True
)
gnorm
=
multi_tensor_applier
(
self
.
multi_tensor_l2norm
,
self
.
_overflow_buf
,
[
self
.
_contrib_model_param_for_norm_fp16
],
True
)
elif
self
.
_contrib_model_param_for_norm_fp32
is
not
None
:
elif
self
.
_contrib_model_param_for_norm_fp32
is
not
None
:
gnorm
=
multi_tensor_applier
(
self
.
multi_tensor_l2norm
,
self
.
_
dummy_
overflow_buf
,
[
self
.
_contrib_model_param_for_norm_fp32
],
True
)
gnorm
=
multi_tensor_applier
(
self
.
multi_tensor_l2norm
,
self
.
_overflow_buf
,
[
self
.
_contrib_model_param_for_norm_fp32
],
True
)
return
gnorm
return
gnorm
def
__compute_contrib_update_norm
(
self
):
def
__compute_contrib_update_norm
(
self
):
l2_norm
=
torch
.
zeros
(
size
=
[
self
.
_model_params_num
],
dtype
=
torch
.
float32
,
device
=
'cuda'
)
l2_norm
=
torch
.
zeros
(
size
=
[
self
.
_model_params_num
],
dtype
=
torch
.
float32
,
device
=
'cuda'
)
local_contrib_l2_norm
=
multi_tensor_applier
(
self
.
multi_tensor_l2norm
,
self
.
_
dummy_
overflow_buf
,
[
self
.
_contrib_update_frag_for_norm
],
True
)
**
2
local_contrib_l2_norm
=
multi_tensor_applier
(
self
.
multi_tensor_l2norm
,
self
.
_overflow_buf
,
[
self
.
_contrib_update_frag_for_norm
],
True
)
**
2
contrib_l2_norm
=
l2_norm
[
self
.
_contrib_min_param_i
:
self
.
_contrib_max_param_i
+
1
]
contrib_l2_norm
=
l2_norm
[
self
.
_contrib_min_param_i
:
self
.
_contrib_max_param_i
+
1
]
contrib_l2_norm
.
copy_
(
local_contrib_l2_norm
)
contrib_l2_norm
.
copy_
(
local_contrib_l2_norm
)
torch
.
distributed
.
allreduce
(
l2_norm
,
group
=
self
.
_ag_pg
[
0
])
torch
.
distributed
.
allreduce
(
l2_norm
,
group
=
self
.
_ag_pg
[
0
])
...
@@ -453,7 +453,7 @@ class DistributedFusedLAMB(torch.optim.Optimizer):
...
@@ -453,7 +453,7 @@ class DistributedFusedLAMB(torch.optim.Optimizer):
param_norm
=
self
.
__compute_contrib_param_norm
()
param_norm
=
self
.
__compute_contrib_param_norm
()
max_grad_norm
=
self
.
defaults
[
'max_grad_norm'
]
max_grad_norm
=
self
.
defaults
[
'max_grad_norm'
]
multi_tensor_applier
(
self
.
multi_tensor_lamb_compute_update_term
,
multi_tensor_applier
(
self
.
multi_tensor_lamb_compute_update_term
,
self
.
_
dummy_
overflow_buf
,
self
.
_overflow_buf
,
self
.
_contrib_compute_update_term_tensor_list
,
# g, p, m, v, u
self
.
_contrib_compute_update_term_tensor_list
,
# g, p, m, v, u
self
.
_contrib_beta1
,
self
.
_contrib_beta1
,
self
.
_contrib_beta2
,
self
.
_contrib_beta2
,
...
@@ -467,7 +467,7 @@ class DistributedFusedLAMB(torch.optim.Optimizer):
...
@@ -467,7 +467,7 @@ class DistributedFusedLAMB(torch.optim.Optimizer):
max_grad_norm
)
max_grad_norm
)
upd_norm
=
self
.
__compute_contrib_update_norm
()
upd_norm
=
self
.
__compute_contrib_update_norm
()
multi_tensor_applier
(
self
.
multi_tensor_lamb_update_weights
,
multi_tensor_applier
(
self
.
multi_tensor_lamb_update_weights
,
self
.
_
dummy_
overflow_buf
,
self
.
_overflow_buf
,
self
.
_contrib_update_weights_tensor_list
,
# u, p, p_copy
self
.
_contrib_update_weights_tensor_list
,
# u, p, p_copy
param_norm
,
param_norm
,
upd_norm
,
upd_norm
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment