Commit b886b7bb authored by Mohammad Shoeybi's avatar Mohammad Shoeybi
Browse files

created megatron package

parent 4947002d
......@@ -17,9 +17,9 @@
import copy
import torch
import data_utils
import mpu
from megatron import data_utils
from megatron import mpu
class DataConfig:
......
......@@ -25,21 +25,21 @@ import torch
from arguments import get_args
from configure_data import configure_data
from fp16 import FP16_Module
from fp16 import FP16_Optimizer
from learning_rates import AnnealingLR
from model import GPT2Model
from model import gpt2_get_params_for_weight_decay_optimization
from model import DistributedDataParallel as DDP
import mpu
from megatron.fp16 import FP16_Module
from megatron.fp16 import FP16_Optimizer
from megatron.learning_rates import AnnealingLR
from megatron.model import GPT2Model
from megatron.model import gpt2_get_params_for_weight_decay_optimization
from megatron.model import DistributedDataParallel as DDP
from megatron import mpu
from apex.optimizers import FusedAdam as Adam
from utils import Timers
from utils import load_checkpoint
from utils import report_memory
from utils import print_params_min_max_norm
from utils import print_rank_0
from megatron.utils import Timers
from megatron.utils import load_checkpoint
from megatron.utils import report_memory
from megatron.utils import print_params_min_max_norm
from megatron.utils import print_rank_0
from data_utils import make_tokenizer
from megatron.data_utils import make_tokenizer
from detokenizer import *
......@@ -539,7 +539,7 @@ def main():
model = GPT2LMHeadModel.from_pretrained('gpt2', cache_dir='gpt2_weights').cuda()
else:
if args.load_openai:
from utils import move_weights
from megatron.utils import move_weights
model_path = args.load
args.load = None
model = setup_model(args)
......
......@@ -25,20 +25,20 @@ import torch.nn.functional as F
import argparse
import time
from arguments import get_args
from utils import Timers
from megatron.utils import Timers
from pretrain_gpt2 import initialize_distributed
from pretrain_gpt2 import set_random_seed
from pretrain_gpt2 import get_train_val_test_data
from pretrain_gpt2 import get_masks_and_position_ids
from utils import load_checkpoint
from data_utils import make_tokenizer
from megatron.utils import load_checkpoint
from megatron.data_utils import make_tokenizer
from configure_data import configure_data
import mpu
from megatron import mpu
from fp16 import FP16_Module
from model import GPT2Model
from model import DistributedDataParallel as DDP
from utils import print_rank_0
from megatron.fp16 import FP16_Module
from megatron.model import GPT2Model
from megatron.model import DistributedDataParallel as DDP
from megatron.utils import print_rank_0
def get_model(args):
"""Build the model."""
......
......@@ -21,9 +21,9 @@ import torch
from torch.multiprocessing import Lock
from torch.utils.data import Dataset
import mpu
from data_utils.samplers import DistributedBatchSampler
from data_utils.tokenization_gpt2 import GPT2Tokenizer
from megatron import mpu
from megatron.data_utils.samplers import DistributedBatchSampler
from megatron.data_utils.tokenization_gpt2 import GPT2Tokenizer
def make_gpt2_dataloaders(args):
......
......@@ -18,7 +18,7 @@ import torch.nn as nn
from torch.autograd import Variable
from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors
import mpu
from megatron import mpu
class tofp16(nn.Module):
......
......@@ -14,7 +14,7 @@
# limitations under the License.
import torch
import mpu
from megatron import mpu
# item() is a recent addition, so this helps with backward compatibility.
def to_python_float(t):
......
......@@ -18,7 +18,7 @@ import torch
from torch.optim.lr_scheduler import _LRScheduler
import math
from utils import print_rank_0
from megatron.utils import print_rank_0
class AnnealingLR(_LRScheduler):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment