Commit 9ee197d0 authored by アマデウス's avatar アマデウス Committed by Frank Lee
Browse files

moved env variables to global variables; (#215)

added branch context;
added vocab parallel layers;
moved split_batch from load_batch to tensor parallel embedding layers;
updated gpt model;
updated unit test cases;
fixed few collective communicator bugs
parent b82d60be
...@@ -10,6 +10,7 @@ HIDDEN_SIZE = 8 ...@@ -10,6 +10,7 @@ HIDDEN_SIZE = 8
NUM_CLASSES = 8 NUM_CLASSES = 8
NUM_BLOCKS = 2 NUM_BLOCKS = 2
IMG_SIZE = 16 IMG_SIZE = 16
VOCAB_SIZE = 16
def check_equal(A, B): def check_equal(A, B):
eq = torch.allclose(A, B, rtol=1e-3, atol=1e-2) eq = torch.allclose(A, B, rtol=1e-3, atol=1e-2)
......
...@@ -7,9 +7,14 @@ import torch ...@@ -7,9 +7,14 @@ import torch
import torch.multiprocessing as mp import torch.multiprocessing as mp
from colossalai.core import global_context as gpc from colossalai.core import global_context as gpc
from colossalai.initialize import launch from colossalai.initialize import launch
from colossalai.logging import disable_existing_loggers
from colossalai.utils import free_port from colossalai.utils import free_port
from checks_3d.check_layer_3d import * from checks_3d.check_layer_3d import (check_classifier_given_embed_weight, check_classifier_no_given_weight,
check_embed, check_layernorm, check_linear, check_loss, check_patch_embed,
check_vocab_parallel_classifier_given_embed_weight,
check_vocab_parallel_classifier_no_given_weight, check_vocab_parallel_embed,
check_vocab_parallel_loss)
CONFIG = dict( CONFIG = dict(
parallel=dict( parallel=dict(
...@@ -23,13 +28,23 @@ CONFIG = dict( ...@@ -23,13 +28,23 @@ CONFIG = dict(
def check_layer(): def check_layer():
check_linear() check_linear()
check_layernorm() check_layernorm()
check_classifier() check_classifier_no_given_weight()
# check_embed() check_vocab_parallel_classifier_no_given_weight()
# check_loss() check_classifier_given_embed_weight()
check_vocab_parallel_classifier_given_embed_weight()
check_embed()
check_patch_embed()
check_vocab_parallel_embed()
check_loss()
check_vocab_parallel_loss()
def check_layer_and_operation(rank, world_size, port): def check_layer_and_operation(rank, world_size, port):
disable_existing_loggers()
launch(config=CONFIG, rank=rank, world_size=world_size, host='localhost', port=port, backend='nccl') launch(config=CONFIG, rank=rank, world_size=world_size, host='localhost', port=port, backend='nccl')
torch.backends.cuda.matmul.allow_tf32 = False
torch.backends.cudnn.allow_tf32 = False
torch.backends.cudnn.deterministic = True
check_layer() check_layer()
gpc.destroy() gpc.destroy()
torch.cuda.empty_cache() torch.cuda.empty_cache()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment