Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Megatron-LM
Commits
203235fd
Commit
203235fd
authored
Jan 04, 2021
by
mohammad
Browse files
added backward compatibility
parent
43529f78
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
60 additions
and
1 deletion
+60
-1
megatron/checkpointing.py
megatron/checkpointing.py
+5
-1
megatron/fp16_deprecated/__init__.py
megatron/fp16_deprecated/__init__.py
+16
-0
megatron/fp16_deprecated/loss_scaler.py
megatron/fp16_deprecated/loss_scaler.py
+39
-0
No files found.
megatron/checkpointing.py
View file @
203235fd
...
@@ -205,12 +205,16 @@ def load_checkpoint(model, optimizer, lr_scheduler, load_arg='load'):
...
@@ -205,12 +205,16 @@ def load_checkpoint(model, optimizer, lr_scheduler, load_arg='load'):
try
:
try
:
state_dict
=
torch
.
load
(
checkpoint_name
,
map_location
=
'cpu'
)
state_dict
=
torch
.
load
(
checkpoint_name
,
map_location
=
'cpu'
)
except
ModuleNotFoundError
:
except
ModuleNotFoundError
:
from
megatron.fp16_deprecated
import
loss_scaler
# For backward compatibility.
# For backward compatibility.
print_rank_0
(
' > deserializing using the old code structure ...'
)
print_rank_0
(
' > deserializing using the old code structure ...'
)
sys
.
modules
[
'fp16.loss_scaler'
]
=
sys
.
modules
[
sys
.
modules
[
'fp16.loss_scaler'
]
=
sys
.
modules
[
'megatron.fp16.loss_scaler'
]
'megatron.fp16_deprecated.loss_scaler'
]
sys
.
modules
[
'megatron.fp16.loss_scaler'
]
=
sys
.
modules
[
'megatron.fp16_deprecated.loss_scaler'
]
state_dict
=
torch
.
load
(
checkpoint_name
,
map_location
=
'cpu'
)
state_dict
=
torch
.
load
(
checkpoint_name
,
map_location
=
'cpu'
)
sys
.
modules
.
pop
(
'fp16.loss_scaler'
,
None
)
sys
.
modules
.
pop
(
'fp16.loss_scaler'
,
None
)
sys
.
modules
.
pop
(
'megatron.fp16.loss_scaler'
,
None
)
except
BaseException
:
except
BaseException
:
print_rank_0
(
'could not load the checkpoint'
)
print_rank_0
(
'could not load the checkpoint'
)
sys
.
exit
()
sys
.
exit
()
...
...
megatron/fp16_deprecated/__init__.py
0 → 100644
View file @
203235fd
# coding=utf-8
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
.loss_scaler
import
*
megatron/fp16_deprecated/loss_scaler.py
0 → 100755
View file @
203235fd
# coding=utf-8
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
torch
class
LossScaler
:
def
__init__
(
self
,
scale
=
1
):
self
.
cur_scale
=
scale
class
DynamicLossScaler
:
def
__init__
(
self
,
init_scale
=
2
**
32
,
scale_factor
=
2.
,
scale_window
=
1000
,
min_scale
=
1
,
delayed_shift
=
1
,
consecutive_hysteresis
=
False
):
self
.
cur_scale
=
init_scale
self
.
cur_iter
=
0
self
.
last_overflow_iter
=
-
1
self
.
scale_factor
=
scale_factor
self
.
scale_window
=
scale_window
self
.
min_scale
=
min_scale
self
.
delayed_shift
=
delayed_shift
self
.
cur_hysteresis
=
delayed_shift
self
.
consecutive_hysteresis
=
consecutive_hysteresis
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment