Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Megatron-LM
Commits
b886b7bb
Commit
b886b7bb
authored
Oct 08, 2019
by
Mohammad Shoeybi
Browse files
created megatron package
parent
4947002d
Changes
45
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
30 additions
and
30 deletions
+30
-30
configure_data.py
configure_data.py
+2
-2
evaluate_gpt2.py
evaluate_gpt2.py
+14
-14
generate_samples.py
generate_samples.py
+8
-8
gpt2_data_loader.py
gpt2_data_loader.py
+3
-3
megatron/data_utils/__init__.py
megatron/data_utils/__init__.py
+0
-0
megatron/data_utils/corpora.py
megatron/data_utils/corpora.py
+0
-0
megatron/data_utils/datasets.py
megatron/data_utils/datasets.py
+0
-0
megatron/data_utils/file_utils.py
megatron/data_utils/file_utils.py
+0
-0
megatron/data_utils/lazy_loader.py
megatron/data_utils/lazy_loader.py
+0
-0
megatron/data_utils/samplers.py
megatron/data_utils/samplers.py
+0
-0
megatron/data_utils/tf_dl.py
megatron/data_utils/tf_dl.py
+0
-0
megatron/data_utils/tokenization.py
megatron/data_utils/tokenization.py
+0
-0
megatron/data_utils/tokenization_gpt2.py
megatron/data_utils/tokenization_gpt2.py
+0
-0
megatron/data_utils/wordpiece.py
megatron/data_utils/wordpiece.py
+0
-0
megatron/fp16/__init__.py
megatron/fp16/__init__.py
+0
-0
megatron/fp16/fp16.py
megatron/fp16/fp16.py
+0
-0
megatron/fp16/fp16util.py
megatron/fp16/fp16util.py
+1
-1
megatron/fp16/loss_scaler.py
megatron/fp16/loss_scaler.py
+1
-1
megatron/learning_rates.py
megatron/learning_rates.py
+1
-1
megatron/model/__init__.py
megatron/model/__init__.py
+0
-0
No files found.
configure_data.py
View file @
b886b7bb
...
@@ -17,9 +17,9 @@
...
@@ -17,9 +17,9 @@
import
copy
import
copy
import
torch
import
torch
import
data_utils
import
mpu
from
megatron
import
data_utils
from
megatron
import
mpu
class
DataConfig
:
class
DataConfig
:
...
...
evaluate_gpt2.py
View file @
b886b7bb
...
@@ -25,21 +25,21 @@ import torch
...
@@ -25,21 +25,21 @@ import torch
from
arguments
import
get_args
from
arguments
import
get_args
from
configure_data
import
configure_data
from
configure_data
import
configure_data
from
fp16
import
FP16_Module
from
megatron.
fp16
import
FP16_Module
from
fp16
import
FP16_Optimizer
from
megatron.
fp16
import
FP16_Optimizer
from
learning_rates
import
AnnealingLR
from
megatron.
learning_rates
import
AnnealingLR
from
model
import
GPT2Model
from
megatron.
model
import
GPT2Model
from
model
import
gpt2_get_params_for_weight_decay_optimization
from
megatron.
model
import
gpt2_get_params_for_weight_decay_optimization
from
model
import
DistributedDataParallel
as
DDP
from
megatron.
model
import
DistributedDataParallel
as
DDP
import
mpu
from
megatron
import
mpu
from
apex.optimizers
import
FusedAdam
as
Adam
from
apex.optimizers
import
FusedAdam
as
Adam
from
utils
import
Timers
from
megatron.
utils
import
Timers
from
utils
import
load_checkpoint
from
megatron.
utils
import
load_checkpoint
from
utils
import
report_memory
from
megatron.
utils
import
report_memory
from
utils
import
print_params_min_max_norm
from
megatron.
utils
import
print_params_min_max_norm
from
utils
import
print_rank_0
from
megatron.
utils
import
print_rank_0
from
data_utils
import
make_tokenizer
from
megatron.
data_utils
import
make_tokenizer
from
detokenizer
import
*
from
detokenizer
import
*
...
@@ -539,7 +539,7 @@ def main():
...
@@ -539,7 +539,7 @@ def main():
model
=
GPT2LMHeadModel
.
from_pretrained
(
'gpt2'
,
cache_dir
=
'gpt2_weights'
).
cuda
()
model
=
GPT2LMHeadModel
.
from_pretrained
(
'gpt2'
,
cache_dir
=
'gpt2_weights'
).
cuda
()
else
:
else
:
if
args
.
load_openai
:
if
args
.
load_openai
:
from
utils
import
move_weights
from
megatron.
utils
import
move_weights
model_path
=
args
.
load
model_path
=
args
.
load
args
.
load
=
None
args
.
load
=
None
model
=
setup_model
(
args
)
model
=
setup_model
(
args
)
...
...
generate_samples.py
View file @
b886b7bb
...
@@ -25,20 +25,20 @@ import torch.nn.functional as F
...
@@ -25,20 +25,20 @@ import torch.nn.functional as F
import
argparse
import
argparse
import
time
import
time
from
arguments
import
get_args
from
arguments
import
get_args
from
utils
import
Timers
from
megatron.
utils
import
Timers
from
pretrain_gpt2
import
initialize_distributed
from
pretrain_gpt2
import
initialize_distributed
from
pretrain_gpt2
import
set_random_seed
from
pretrain_gpt2
import
set_random_seed
from
pretrain_gpt2
import
get_train_val_test_data
from
pretrain_gpt2
import
get_train_val_test_data
from
pretrain_gpt2
import
get_masks_and_position_ids
from
pretrain_gpt2
import
get_masks_and_position_ids
from
utils
import
load_checkpoint
from
megatron.
utils
import
load_checkpoint
from
data_utils
import
make_tokenizer
from
megatron.
data_utils
import
make_tokenizer
from
configure_data
import
configure_data
from
configure_data
import
configure_data
import
mpu
from
megatron
import
mpu
from
fp16
import
FP16_Module
from
megatron.
fp16
import
FP16_Module
from
model
import
GPT2Model
from
megatron.
model
import
GPT2Model
from
model
import
DistributedDataParallel
as
DDP
from
megatron.
model
import
DistributedDataParallel
as
DDP
from
utils
import
print_rank_0
from
megatron.
utils
import
print_rank_0
def
get_model
(
args
):
def
get_model
(
args
):
"""Build the model."""
"""Build the model."""
...
...
gpt2_data_loader.py
View file @
b886b7bb
...
@@ -21,9 +21,9 @@ import torch
...
@@ -21,9 +21,9 @@ import torch
from
torch.multiprocessing
import
Lock
from
torch.multiprocessing
import
Lock
from
torch.utils.data
import
Dataset
from
torch.utils.data
import
Dataset
import
mpu
from
megatron
import
mpu
from
data_utils.samplers
import
DistributedBatchSampler
from
megatron.
data_utils.samplers
import
DistributedBatchSampler
from
data_utils.tokenization_gpt2
import
GPT2Tokenizer
from
megatron.
data_utils.tokenization_gpt2
import
GPT2Tokenizer
def
make_gpt2_dataloaders
(
args
):
def
make_gpt2_dataloaders
(
args
):
...
...
data_utils/__init__.py
→
megatron/
data_utils/__init__.py
View file @
b886b7bb
File moved
data_utils/corpora.py
→
megatron/
data_utils/corpora.py
View file @
b886b7bb
File moved
data_utils/datasets.py
→
megatron/
data_utils/datasets.py
View file @
b886b7bb
File moved
data_utils/file_utils.py
→
megatron/
data_utils/file_utils.py
View file @
b886b7bb
File moved
data_utils/lazy_loader.py
→
megatron/
data_utils/lazy_loader.py
View file @
b886b7bb
File moved
data_utils/samplers.py
→
megatron/
data_utils/samplers.py
View file @
b886b7bb
File moved
data_utils/tf_dl.py
→
megatron/
data_utils/tf_dl.py
View file @
b886b7bb
File moved
data_utils/tokenization.py
→
megatron/
data_utils/tokenization.py
View file @
b886b7bb
File moved
data_utils/tokenization_gpt2.py
→
megatron/
data_utils/tokenization_gpt2.py
View file @
b886b7bb
File moved
data_utils/wordpiece.py
→
megatron/
data_utils/wordpiece.py
View file @
b886b7bb
File moved
fp16/__init__.py
→
megatron/
fp16/__init__.py
View file @
b886b7bb
File moved
fp16/fp16.py
→
megatron/
fp16/fp16.py
View file @
b886b7bb
File moved
fp16/fp16util.py
→
megatron/
fp16/fp16util.py
View file @
b886b7bb
...
@@ -18,7 +18,7 @@ import torch.nn as nn
...
@@ -18,7 +18,7 @@ import torch.nn as nn
from
torch.autograd
import
Variable
from
torch.autograd
import
Variable
from
torch._utils
import
_flatten_dense_tensors
,
_unflatten_dense_tensors
from
torch._utils
import
_flatten_dense_tensors
,
_unflatten_dense_tensors
import
mpu
from
megatron
import
mpu
class
tofp16
(
nn
.
Module
):
class
tofp16
(
nn
.
Module
):
...
...
fp16/loss_scaler.py
→
megatron/
fp16/loss_scaler.py
View file @
b886b7bb
...
@@ -14,7 +14,7 @@
...
@@ -14,7 +14,7 @@
# limitations under the License.
# limitations under the License.
import
torch
import
torch
import
mpu
from
megatron
import
mpu
# item() is a recent addition, so this helps with backward compatibility.
# item() is a recent addition, so this helps with backward compatibility.
def
to_python_float
(
t
):
def
to_python_float
(
t
):
...
...
learning_rates.py
→
megatron/
learning_rates.py
View file @
b886b7bb
...
@@ -18,7 +18,7 @@ import torch
...
@@ -18,7 +18,7 @@ import torch
from
torch.optim.lr_scheduler
import
_LRScheduler
from
torch.optim.lr_scheduler
import
_LRScheduler
import
math
import
math
from
utils
import
print_rank_0
from
megatron.
utils
import
print_rank_0
class
AnnealingLR
(
_LRScheduler
):
class
AnnealingLR
(
_LRScheduler
):
...
...
model/__init__.py
→
megatron/
model/__init__.py
View file @
b886b7bb
File moved
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment