"docs/source/vscode:/vscode.git/clone" did not exist on "8db2dd335558f896880cbe268ab8970311e11ddb"
Commit b886b7bb authored by Mohammad Shoeybi's avatar Mohammad Shoeybi
Browse files

created megatron package

parent 4947002d
...@@ -17,9 +17,9 @@ ...@@ -17,9 +17,9 @@
import copy import copy
import torch import torch
import data_utils
import mpu from megatron import data_utils
from megatron import mpu
class DataConfig: class DataConfig:
......
...@@ -25,21 +25,21 @@ import torch ...@@ -25,21 +25,21 @@ import torch
from arguments import get_args from arguments import get_args
from configure_data import configure_data from configure_data import configure_data
from fp16 import FP16_Module from megatron.fp16 import FP16_Module
from fp16 import FP16_Optimizer from megatron.fp16 import FP16_Optimizer
from learning_rates import AnnealingLR from megatron.learning_rates import AnnealingLR
from model import GPT2Model from megatron.model import GPT2Model
from model import gpt2_get_params_for_weight_decay_optimization from megatron.model import gpt2_get_params_for_weight_decay_optimization
from model import DistributedDataParallel as DDP from megatron.model import DistributedDataParallel as DDP
import mpu from megatron import mpu
from apex.optimizers import FusedAdam as Adam from apex.optimizers import FusedAdam as Adam
from utils import Timers from megatron.utils import Timers
from utils import load_checkpoint from megatron.utils import load_checkpoint
from utils import report_memory from megatron.utils import report_memory
from utils import print_params_min_max_norm from megatron.utils import print_params_min_max_norm
from utils import print_rank_0 from megatron.utils import print_rank_0
from data_utils import make_tokenizer from megatron.data_utils import make_tokenizer
from detokenizer import * from detokenizer import *
...@@ -539,7 +539,7 @@ def main(): ...@@ -539,7 +539,7 @@ def main():
model = GPT2LMHeadModel.from_pretrained('gpt2', cache_dir='gpt2_weights').cuda() model = GPT2LMHeadModel.from_pretrained('gpt2', cache_dir='gpt2_weights').cuda()
else: else:
if args.load_openai: if args.load_openai:
from utils import move_weights from megatron.utils import move_weights
model_path = args.load model_path = args.load
args.load = None args.load = None
model = setup_model(args) model = setup_model(args)
......
...@@ -25,20 +25,20 @@ import torch.nn.functional as F ...@@ -25,20 +25,20 @@ import torch.nn.functional as F
import argparse import argparse
import time import time
from arguments import get_args from arguments import get_args
from utils import Timers from megatron.utils import Timers
from pretrain_gpt2 import initialize_distributed from pretrain_gpt2 import initialize_distributed
from pretrain_gpt2 import set_random_seed from pretrain_gpt2 import set_random_seed
from pretrain_gpt2 import get_train_val_test_data from pretrain_gpt2 import get_train_val_test_data
from pretrain_gpt2 import get_masks_and_position_ids from pretrain_gpt2 import get_masks_and_position_ids
from utils import load_checkpoint from megatron.utils import load_checkpoint
from data_utils import make_tokenizer from megatron.data_utils import make_tokenizer
from configure_data import configure_data from configure_data import configure_data
import mpu from megatron import mpu
from fp16 import FP16_Module from megatron.fp16 import FP16_Module
from model import GPT2Model from megatron.model import GPT2Model
from model import DistributedDataParallel as DDP from megatron.model import DistributedDataParallel as DDP
from utils import print_rank_0 from megatron.utils import print_rank_0
def get_model(args): def get_model(args):
"""Build the model.""" """Build the model."""
......
...@@ -21,9 +21,9 @@ import torch ...@@ -21,9 +21,9 @@ import torch
from torch.multiprocessing import Lock from torch.multiprocessing import Lock
from torch.utils.data import Dataset from torch.utils.data import Dataset
import mpu from megatron import mpu
from data_utils.samplers import DistributedBatchSampler from megatron.data_utils.samplers import DistributedBatchSampler
from data_utils.tokenization_gpt2 import GPT2Tokenizer from megatron.data_utils.tokenization_gpt2 import GPT2Tokenizer
def make_gpt2_dataloaders(args): def make_gpt2_dataloaders(args):
......
...@@ -18,7 +18,7 @@ import torch.nn as nn ...@@ -18,7 +18,7 @@ import torch.nn as nn
from torch.autograd import Variable from torch.autograd import Variable
from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors
import mpu from megatron import mpu
class tofp16(nn.Module): class tofp16(nn.Module):
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
# limitations under the License. # limitations under the License.
import torch import torch
import mpu from megatron import mpu
# item() is a recent addition, so this helps with backward compatibility. # item() is a recent addition, so this helps with backward compatibility.
def to_python_float(t): def to_python_float(t):
......
...@@ -18,7 +18,7 @@ import torch ...@@ -18,7 +18,7 @@ import torch
from torch.optim.lr_scheduler import _LRScheduler from torch.optim.lr_scheduler import _LRScheduler
import math import math
from utils import print_rank_0 from megatron.utils import print_rank_0
class AnnealingLR(_LRScheduler): class AnnealingLR(_LRScheduler):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment