moved env variables to global variables; (#215)

added branch context; added vocab parallel layers; moved split_batch from load_batch to tensor parallel embedding layers; updated gpt model; updated unit test cases; fixed few collective communicator bugs

moved env variables to global variables; (#215)
added branch context; added vocab parallel layers; moved split_batch from load_batch to tensor parallel embedding layers; updated gpt model; updated unit test cases; fixed few collective communicator bugs
9ee197d0 · アマデウス · Frank Lee · b82d60be · 9ee197d0 · 9ee197d0
Commit 9ee197d0 authored Feb 14, 2022 by アマデウス Committed by Frank Lee Feb 15, 2022
3 changed files
--- a/tests/test_layers/test_3d/checks_3d/check_layer_3d.py
+++ b/tests/test_layers/test_3d/checks_3d/check_layer_3d.py
--- a/tests/test_layers/test_3d/checks_3d/common.py
+++ b/tests/test_layers/test_3d/checks_3d/common.py
@@ -10,6 +10,7 @@ HIDDEN_SIZE = 8
 NUM_CLASSES = 8
 NUM_BLOCKS = 2
 IMG_SIZE = 16
+VOCAB_SIZE = 16
 def check_equal(A, B):
    eq = torch.allclose(A, B, rtol=1e-3, atol=1e-2)

--- a/tests/test_layers/test_3d/test_3d.py
+++ b/tests/test_layers/test_3d/test_3d.py
@@ -7,9 +7,14 @@ import torch
 import torch.multiprocessing as mp
 from colossalai.core import global_context as gpc
 from colossalai.initialize import launch
+from colossalai.logging import disable_existing_loggers
 from colossalai.utils import free_port
-from checks_3d.check_layer_3d import *
+from checks_3d.check_layer_3d import (check_classifier_given_embed_weight, check_classifier_no_given_weight,
+                                      check_embed, check_layernorm, check_linear, check_loss, check_patch_embed,
+                                      check_vocab_parallel_classifier_given_embed_weight,
+                                      check_vocab_parallel_classifier_no_given_weight, check_vocab_parallel_embed,
+                                      check_vocab_parallel_loss)
 CONFIG = dict(
    parallel=dict(
@@ -23,13 +28,23 @@ CONFIG = dict(
 def check_layer():
    check_linear()
    check_layernorm()
-    check_classifier()
+    check_classifier_no_given_weight()
-    # check_embed()
+    check_vocab_parallel_classifier_no_given_weight()
-    # check_loss()
+    check_classifier_given_embed_weight()
+    check_vocab_parallel_classifier_given_embed_weight()
+    check_embed()
+    check_patch_embed()
+    check_vocab_parallel_embed()
+    check_loss()
+    check_vocab_parallel_loss()
 def check_layer_and_operation(rank, world_size, port):
+    disable_existing_loggers()
    launch(config=CONFIG, rank=rank, world_size=world_size, host='localhost', port=port, backend='nccl')
+    torch.backends.cuda.matmul.allow_tf32 = False
+    torch.backends.cudnn.allow_tf32 = False
+    torch.backends.cudnn.deterministic = True
    check_layer()
    gpc.destroy()
    torch.cuda.empty_cache()