[legacy] move communication and nn to legacy and refactor logger (#4671)

* [legacy] move communication to legacy (#4640) * [legacy] refactor logger and clean up legacy codes (#4654) * [legacy] make logger independent to gpc * [legacy] make optim independent to registry * [legacy] move test engine to legacy * [legacy] move nn to legacy (#4656) * [legacy] move nn to legacy * [checkpointio] fix save hf config * [test] remove useledd rpc pp test * [legacy] fix nn init * [example] skip tutorial hybriad parallel example * [devops] test doc check * [devops] test doc check

[legacy] move communication and nn to legacy and refactor logger (#4671)
* [legacy] move communication to legacy (#4640) * [legacy] refactor logger and clean up legacy codes (#4654) * [legacy] make logger independent to gpc * [legacy] make optim independent to registry * [legacy] move test engine to legacy * [legacy] move nn to legacy (#4656) * [legacy] move nn to legacy * [checkpointio] fix save hf config * [test] remove useledd rpc pp test * [legacy] fix nn init * [example] skip tutorial hybriad parallel example * [devops] test doc check * [devops] test doc check
554aa959 · Hongxin Liu · GitHub · 536397cc · 554aa959 · 554aa959
Unverified Commit 554aa959 authored Sep 11, 2023 by Hongxin Liu Committed by GitHub Sep 11, 2023
20 changed files
--- a/tests/test_layers/test_1d/checks_1d/__init__.py
+++ b/tests/test_layers/test_1d/checks_1d/__init__.py
--- a/tests/test_layers/test_1d/checks_1d/check_layer_1d.py
+++ b/tests/test_layers/test_1d/checks_1d/check_layer_1d.py
@@ -5,7 +5,7 @@ from torch.nn import Parameter
 from colossalai.context.parallel_mode import ParallelMode
 from colossalai.core import global_context as gpc
 from colossalai.global_variables import tensor_parallel_env as env
-from colossalai.nn import (
+from colossalai.legacy.nn import (
    Classifier1D,
    Embedding1D,
    Linear1D_Col,

--- a/tests/test_layers/test_1d/checks_1d/common.py
+++ b/tests/test_layers/test_1d/checks_1d/common.py
-#!/usr/bin/env python
-# -*- encoding: utf-8 -*-
-
-import torch
-
-DEPTH = 4
-BATCH_SIZE = 8
-SEQ_LENGTH = 8
-IMG_SIZE = 16
-HIDDEN_SIZE = 8
-NUM_CLASSES = 8
-VOCAB_SIZE = 16
-
-def check_equal(A, B):
-    assert torch.allclose(A, B, rtol=1e-3, atol=1e-1) == True
+#!/usr/bin/env python
+# -*- encoding: utf-8 -*-
+
+import torch
+
+DEPTH = 4
+BATCH_SIZE = 8
+SEQ_LENGTH = 8
+IMG_SIZE = 16
+HIDDEN_SIZE = 8
+NUM_CLASSES = 8
+VOCAB_SIZE = 16
+
+
+def check_equal(A, B):
+    assert torch.allclose(A, B, rtol=1e-3, atol=1e-1) == True
--- a/tests/test_layers/test_1d/test_1d.py
+++ b/tests/test_layers/test_1d/test_1d.py
--- a/tests/test_layers/test_2d/checks_2d/__init__.py
+++ b/tests/test_layers/test_2d/checks_2d/__init__.py
--- a/tests/test_layers/test_2d/checks_2d/check_layer_2d.py
+++ b/tests/test_layers/test_2d/checks_2d/check_layer_2d.py
 import torch
+
 from colossalai.context.parallel_mode import ParallelMode
 from colossalai.core import global_context as gpc
-from colossalai.nn import (Classifier2D, CrossEntropyLoss2D, Embedding2D, LayerNorm2D, Linear2D, PatchEmbedding2D,
-                           VanillaClassifier, VanillaPatchEmbedding, VocabParallelClassifier2D,
-                           VocabParallelCrossEntropyLoss2D, VocabParallelEmbedding2D)
+from colossalai.legacy.nn import (
+    Classifier2D,
+    CrossEntropyLoss2D,
+    Embedding2D,
+    LayerNorm2D,
+    Linear2D,
+    PatchEmbedding2D,
+    VanillaClassifier,
+    VanillaPatchEmbedding,
+    VocabParallelClassifier2D,
+    VocabParallelCrossEntropyLoss2D,
+    VocabParallelEmbedding2D,
+)
 from colossalai.utils import get_current_device, print_rank_0

-from .common import (BATCH_SIZE, DEPTH, HIDDEN_SIZE, IMG_SIZE, NUM_CLASSES, SEQ_LENGTH, VOCAB_SIZE, check_equal)
+from .common import BATCH_SIZE, DEPTH, HIDDEN_SIZE, IMG_SIZE, NUM_CLASSES, SEQ_LENGTH, VOCAB_SIZE, check_equal


 def check_linear():
@@ -336,7 +347,7 @@ def check_classifier_no_given_weight():
    layer.weight.data.copy_(W)
    # W.requires_grad = True

-    B_shape = (OUTPUT_SIZE, )
+    B_shape = (OUTPUT_SIZE,)
    B_master = torch.randint(5, B_shape, dtype=dtype, device=device)
    torch.distributed.broadcast(B_master, src=0)
    # B = torch.chunk(B_master, DEPTH, dim=0)[j]
@@ -572,7 +583,7 @@ def check_loss():

    out_shape = (BATCH_SIZE, NUM_CLASSES)
    out_master = torch.randn(out_shape, dtype=dtype, device=device)
-    target_master = torch.randint(NUM_CLASSES, (BATCH_SIZE, ), dtype=torch.long, device=device)
+    target_master = torch.randint(NUM_CLASSES, (BATCH_SIZE,), dtype=torch.long, device=device)
    torch.distributed.broadcast(out_master, src=0)
    torch.distributed.broadcast(target_master, src=0)
    out = torch.chunk(out_master, DEPTH, dim=0)[i]
@@ -607,7 +618,7 @@ def check_vocab_parallel_loss():

    out_shape = (BATCH_SIZE, NUM_CLASSES)
    out_master = torch.randn(out_shape, dtype=dtype, device=device)
-    target_master = torch.randint(NUM_CLASSES, (BATCH_SIZE, ), dtype=torch.long, device=device)
+    target_master = torch.randint(NUM_CLASSES, (BATCH_SIZE,), dtype=torch.long, device=device)
    torch.distributed.broadcast(out_master, src=0)
    torch.distributed.broadcast(target_master, src=0)
    out = torch.chunk(out_master, DEPTH, dim=0)[i]

--- a/tests/test_layers/test_2d/checks_2d/check_operation_2d.py
+++ b/tests/test_layers/test_2d/checks_2d/check_operation_2d.py
@@ -5,10 +5,10 @@ import torch

 from colossalai.context.parallel_mode import ParallelMode
 from colossalai.core import global_context as gpc
-from colossalai.nn.layer.parallel_2d._operation import Matmul_AB_2D, Matmul_ABT_2D, Matmul_ATB_2D
-from colossalai.utils import get_current_device
-from colossalai.utils import print_rank_0
-from .common import check_equal, BATCH_SIZE, SEQ_LENGTH, HIDDEN_SIZE, DEPTH
+from colossalai.legacy.nn.layer.parallel_2d._operation import Matmul_AB_2D, Matmul_ABT_2D, Matmul_ATB_2D
+from colossalai.utils import get_current_device, print_rank_0
+
+from .common import BATCH_SIZE, DEPTH, HIDDEN_SIZE, SEQ_LENGTH, check_equal


 def check_AB():

--- a/tests/test_layers/test_2d/checks_2d/common.py
+++ b/tests/test_layers/test_2d/checks_2d/common.py
--- a/tests/test_layers/test_2d/test_2d.py
+++ b/tests/test_layers/test_2d/test_2d.py
--- a/tests/test_layers/test_2p5d/checks_2p5d/__init__.py
+++ b/tests/test_layers/test_2p5d/checks_2p5d/__init__.py
--- a/tests/test_layers/test_2p5d/checks_2p5d/check_layer_2p5d.py
+++ b/tests/test_layers/test_2p5d/checks_2p5d/check_layer_2p5d.py
 import torch
+from torch.nn import Parameter
+
 from colossalai.context.parallel_mode import ParallelMode
 from colossalai.core import global_context as gpc
-from colossalai.nn import (Classifier2p5D, CrossEntropyLoss2p5D, Embedding2p5D, LayerNorm2p5D, Linear2p5D,
-                           PatchEmbedding2p5D, VanillaClassifier, VanillaPatchEmbedding, VocabParallelClassifier2p5D,
-                           VocabParallelCrossEntropyLoss2p5D, VocabParallelEmbedding2p5D)
+from colossalai.legacy.nn import (
+    Classifier2p5D,
+    CrossEntropyLoss2p5D,
+    Embedding2p5D,
+    LayerNorm2p5D,
+    Linear2p5D,
+    PatchEmbedding2p5D,
+    VanillaClassifier,
+    VanillaPatchEmbedding,
+    VocabParallelClassifier2p5D,
+    VocabParallelCrossEntropyLoss2p5D,
+    VocabParallelEmbedding2p5D,
+)
 from colossalai.utils import get_current_device, print_rank_0
-from torch.nn import Parameter

 from .common import *

@@ -342,7 +353,7 @@ def check_classifier_no_given_weight():
    layer.weight.data.copy_(W)
    # W.requires_grad = True

-    B_shape = (OUTPUT_SIZE, )
+    B_shape = (OUTPUT_SIZE,)
    B_master = torch.randint(5, B_shape, dtype=dtype, device=device)
    torch.distributed.broadcast(B_master, src=0)
    # B = torch.chunk(B_master, TESSERACT_DIM, dim=0)[j]
@@ -577,7 +588,7 @@ def check_loss():

    out_shape = (BATCH_SIZE, NUM_CLASSES)
    out_master = torch.randn(out_shape, dtype=dtype, device=device)
-    target_master = torch.randint(NUM_CLASSES, (BATCH_SIZE, ), dtype=torch.long, device=device)
+    target_master = torch.randint(NUM_CLASSES, (BATCH_SIZE,), dtype=torch.long, device=device)
    torch.distributed.broadcast(out_master, src=0)
    torch.distributed.broadcast(target_master, src=0)
    out = torch.chunk(out_master, TESSERACT_DIM, dim=0)[i]
@@ -612,7 +623,7 @@ def check_vocab_parallel_loss():

    out_shape = (BATCH_SIZE, NUM_CLASSES)
    out_master = torch.randn(out_shape, dtype=dtype, device=device)
-    target_master = torch.randint(NUM_CLASSES, (BATCH_SIZE, ), dtype=torch.long, device=device)
+    target_master = torch.randint(NUM_CLASSES, (BATCH_SIZE,), dtype=torch.long, device=device)
    torch.distributed.broadcast(out_master, src=0)
    torch.distributed.broadcast(target_master, src=0)
    out = torch.chunk(out_master, TESSERACT_DIM, dim=0)[i]

--- a/tests/test_layers/test_2p5d/checks_2p5d/check_operation_2p5d.py
+++ b/tests/test_layers/test_2p5d/checks_2p5d/check_operation_2p5d.py
@@ -2,10 +2,9 @@ import torch

 from colossalai.context import ParallelMode
 from colossalai.core import global_context as gpc
-from colossalai.nn.layer.parallel_2p5d._operation import Matmul_AB_2p5D, Matmul_ABT_2p5D, \
-    Matmul_ATB_2p5D
-from colossalai.utils import get_current_device
-from colossalai.utils import print_rank_0
+from colossalai.legacy.nn.layer.parallel_2p5d._operation import Matmul_AB_2p5D, Matmul_ABT_2p5D, Matmul_ATB_2p5D
+from colossalai.utils import get_current_device, print_rank_0
+
 from .common import *



--- a/tests/test_layers/test_2p5d/checks_2p5d/common.py
+++ b/tests/test_layers/test_2p5d/checks_2p5d/common.py
@@ -11,4 +11,4 @@ IMG_SIZE = 16


 def check_equal(A, B):
-    assert torch.allclose(A, B, rtol=1e-5, atol=1e-2)
\ No newline at end of file
+    assert torch.allclose(A, B, rtol=1e-5, atol=1e-2)
--- a/tests/test_layers/test_2p5d/test_2p5d.py
+++ b/tests/test_layers/test_2p5d/test_2p5d.py
--- a/tests/test_layers/test_3d/checks_3d/__init__.py
+++ b/tests/test_layers/test_3d/checks_3d/__init__.py
--- a/tests/test_layers/test_3d/checks_3d/check_layer_3d.py
+++ b/tests/test_layers/test_3d/checks_3d/check_layer_3d.py
@@ -7,8 +7,7 @@ import torch

 from colossalai.constants import INPUT_GROUP_3D, OUTPUT_GROUP_3D, WEIGHT_GROUP_3D
 from colossalai.core import global_context
-from colossalai.logging import get_dist_logger
-from colossalai.nn import (
+from colossalai.legacy.nn import (
    Classifier3D,
    CrossEntropyLoss3D,
    Embedding3D,
@@ -21,7 +20,8 @@ from colossalai.nn import (
    VocabParallelCrossEntropyLoss3D,
    VocabParallelEmbedding3D,
 )
-from colossalai.nn.layer.parallel_3d._utils import get_parallel_mode_from_env
+from colossalai.legacy.nn.layer.parallel_3d._utils import get_parallel_mode_from_env
+from colossalai.logging import get_dist_logger
 from colossalai.utils import get_current_device, print_rank_0

 from .common import BATCH_SIZE, DEPTH, HIDDEN_SIZE, IMG_SIZE, NUM_CLASSES, SEQ_LENGTH, VOCAB_SIZE, check_equal

--- a/tests/test_layers/test_3d/checks_3d/common.py
+++ b/tests/test_layers/test_3d/checks_3d/common.py
@@ -16,4 +16,4 @@ VOCAB_SIZE = 16
 def check_equal(A, B):
    eq = torch.allclose(A, B, rtol=1e-3, atol=1e-2)
    assert eq, f"\nA = {A}\nB = {B}"
-    return eq
\ No newline at end of file
+    return eq
--- a/tests/test_layers/test_3d/test_3d.py
+++ b/tests/test_layers/test_3d/test_3d.py
--- a/tests/test_layers/test_cache_embedding.py
+++ b/tests/test_layers/test_cache_embedding.py
@@ -6,7 +6,7 @@ import pytest
 import torch

 import colossalai
-from colossalai.nn.parallel.layers import (
+from colossalai.legacy.nn.parallel.layers import (
    CachedEmbeddingBag,
    CachedParamMgr,
    EvictionStrategy,

--- a/tests/test_layers/test_sequence/checks_seq/__init__.py
+++ b/tests/test_layers/test_sequence/checks_seq/__init__.py