"vscode:/vscode.git/clone" did not exist on "73eb3e88624160cc293b60b13caa4587553efc50"
Unverified Commit fae6c92e authored by Hongxin Liu's avatar Hongxin Liu Committed by GitHub
Browse files

Merge branch 'main' into feature/shardformer

parents bd186784 ac178ca5
#!/usr/bin/env python #!/usr/bin/env python
# -*- encoding: utf-8 -*- # -*- encoding: utf-8 -*-
import torch import torch
from colossalai.logging import get_dist_logger
from colossalai.registry import HOOKS from colossalai.legacy.registry import HOOKS
from colossalai.trainer.hooks import BaseHook from colossalai.legacy.trainer.hooks import BaseHook
from colossalai.logging import get_dist_logger
from colossalai.utils.checkpointing import save_checkpoint from colossalai.utils.checkpointing import save_checkpoint
from ._lr_scheduler_hook import LRSchedulerHook from ._lr_scheduler_hook import LRSchedulerHook
......
...@@ -3,17 +3,17 @@ ...@@ -3,17 +3,17 @@
import os import os
import os.path as osp import os.path as osp
from typing import List from typing import List
from colossalai.context import ParallelMode from colossalai.context import ParallelMode
from colossalai.core import global_context as gpc from colossalai.core import global_context as gpc
from colossalai.registry import HOOKS from colossalai.legacy.registry import HOOKS
from colossalai.legacy.trainer.hooks._metric_hook import ThroughputMetric
from colossalai.logging import DistributedLogger from colossalai.logging import DistributedLogger
from colossalai.utils import report_memory_usage, is_dp_rank_0, \ from colossalai.utils import MultiTimer, is_dp_rank_0, is_no_pp_or_last_stage, is_tp_rank_0, report_memory_usage
is_tp_rank_0, is_no_pp_or_last_stage, MultiTimer
from ._base_hook import BaseHook from ._base_hook import BaseHook
from ._commons_ import _format_number from ._commons_ import _format_number
from colossalai.trainer.hooks._metric_hook import ThroughputMetric
class LogByEpochHook(BaseHook): class LogByEpochHook(BaseHook):
......
from colossalai.registry import HOOKS
from torch import Tensor from torch import Tensor
from colossalai.legacy.registry import HOOKS
from ._metric_hook import LearningRateMetric, MetricHook from ._metric_hook import LearningRateMetric, MetricHook
......
...@@ -6,10 +6,11 @@ from typing import Callable ...@@ -6,10 +6,11 @@ from typing import Callable
import torch import torch
import torch.distributed as dist import torch.distributed as dist
from colossalai.communication import all_reduce from colossalai.communication import all_reduce
from colossalai.context import ParallelMode from colossalai.context import ParallelMode
from colossalai.core import global_context as gpc from colossalai.core import global_context as gpc
from colossalai.registry import HOOKS from colossalai.legacy.registry import HOOKS
from colossalai.utils import get_current_device, is_no_pp_or_last_stage from colossalai.utils import get_current_device, is_no_pp_or_last_stage
from ._base_hook import BaseHook from ._base_hook import BaseHook
......
...@@ -15,8 +15,8 @@ from colossalai.context import ParallelMode, seed ...@@ -15,8 +15,8 @@ from colossalai.context import ParallelMode, seed
from colossalai.core import global_context as gpc from colossalai.core import global_context as gpc
from colossalai.global_variables import tensor_parallel_env as env from colossalai.global_variables import tensor_parallel_env as env
from colossalai.kernel import LayerNorm from colossalai.kernel import LayerNorm
from colossalai.legacy.registry import LAYERS
from colossalai.nn import init as init from colossalai.nn import init as init
from colossalai.registry import LAYERS
from colossalai.utils.checkpointing import ( from colossalai.utils.checkpointing import (
broadcast_state_dict, broadcast_state_dict,
gather_tensor_parallel_state_dict, gather_tensor_parallel_state_dict,
......
...@@ -5,21 +5,30 @@ from typing import Callable ...@@ -5,21 +5,30 @@ from typing import Callable
import torch import torch
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
from torch import Tensor
from torch.nn import Parameter
from colossalai.communication import broadcast from colossalai.communication import broadcast
from colossalai.context import ParallelMode, seed from colossalai.context import ParallelMode, seed
from colossalai.core import global_context as gpc from colossalai.core import global_context as gpc
from colossalai.global_variables import tensor_parallel_env as env from colossalai.global_variables import tensor_parallel_env as env
from colossalai.legacy.registry import LAYERS
from colossalai.nn import init as init from colossalai.nn import init as init
from colossalai.registry import LAYERS
from colossalai.utils.checkpointing import gather_tensor_parallel_state_dict, partition_tensor_parallel_state_dict from colossalai.utils.checkpointing import gather_tensor_parallel_state_dict, partition_tensor_parallel_state_dict
from colossalai.utils.cuda import get_current_device from colossalai.utils.cuda import get_current_device
from torch import Tensor
from torch.nn import Parameter
from ..base_layer import ParallelLayer from ..base_layer import ParallelLayer
from ..utils import divide, set_tensor_parallel_attribute_by_partition, to_2tuple from ..utils import divide, set_tensor_parallel_attribute_by_partition, to_2tuple
from ._operation import (Matmul_AB_2D, Matmul_ABT_2D, add_bias_2d, all_gather_tensor_2d, classifier_2d, layernorm_2d, from ._operation import (
reduce_scatter_tensor_2d, split_batch_2d) Matmul_AB_2D,
Matmul_ABT_2D,
add_bias_2d,
all_gather_tensor_2d,
classifier_2d,
layernorm_2d,
reduce_scatter_tensor_2d,
split_batch_2d,
)
from ._utils import assert_summa_initialization, get_summa_dim_from_env from ._utils import assert_summa_initialization, get_summa_dim_from_env
......
...@@ -5,22 +5,34 @@ from typing import Callable ...@@ -5,22 +5,34 @@ from typing import Callable
import torch import torch
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
from torch import Tensor
from torch.nn import Parameter
from colossalai.communication import broadcast from colossalai.communication import broadcast
from colossalai.context import ParallelMode, seed from colossalai.context import ParallelMode, seed
from colossalai.core import global_context as gpc from colossalai.core import global_context as gpc
from colossalai.global_variables import tensor_parallel_env as env from colossalai.global_variables import tensor_parallel_env as env
from colossalai.legacy.registry import LAYERS
from colossalai.nn import init as init from colossalai.nn import init as init
from colossalai.registry import LAYERS from colossalai.utils.checkpointing import (
from colossalai.utils.checkpointing import (broadcast_state_dict, gather_tensor_parallel_state_dict, broadcast_state_dict,
partition_tensor_parallel_state_dict) gather_tensor_parallel_state_dict,
partition_tensor_parallel_state_dict,
)
from colossalai.utils.cuda import get_current_device from colossalai.utils.cuda import get_current_device
from torch import Tensor
from torch.nn import Parameter
from ..base_layer import ParallelLayer from ..base_layer import ParallelLayer
from ..utils import divide, set_tensor_parallel_attribute_by_partition, to_2tuple from ..utils import divide, set_tensor_parallel_attribute_by_partition, to_2tuple
from ._operation import (Matmul_AB_2p5D, Matmul_ABT_2p5D, add_bias_2p5d, all_gather_tensor_2p5d, classifier_2p5d, from ._operation import (
layernorm_2p5d, reduce_scatter_tensor_2p5d, split_batch_2p5d) Matmul_AB_2p5D,
Matmul_ABT_2p5D,
add_bias_2p5d,
all_gather_tensor_2p5d,
classifier_2p5d,
layernorm_2p5d,
reduce_scatter_tensor_2p5d,
split_batch_2p5d,
)
from ._utils import assert_tesseract_initialization, get_tesseract_dim_dep_from_env from ._utils import assert_tesseract_initialization, get_tesseract_dim_dep_from_env
......
...@@ -13,9 +13,9 @@ from colossalai.constants import INPUT_GROUP_3D, INPUT_X_WEIGHT_3D, OUTPUT_GROUP ...@@ -13,9 +13,9 @@ from colossalai.constants import INPUT_GROUP_3D, INPUT_X_WEIGHT_3D, OUTPUT_GROUP
from colossalai.context import ParallelMode, seed from colossalai.context import ParallelMode, seed
from colossalai.core import global_context as gpc from colossalai.core import global_context as gpc
from colossalai.global_variables import tensor_parallel_env as env from colossalai.global_variables import tensor_parallel_env as env
from colossalai.legacy.registry import LAYERS
from colossalai.nn import init as init from colossalai.nn import init as init
from colossalai.nn.layer.base_layer import ParallelLayer from colossalai.nn.layer.base_layer import ParallelLayer
from colossalai.registry import LAYERS
from colossalai.utils.checkpointing import ( from colossalai.utils.checkpointing import (
broadcast_state_dict, broadcast_state_dict,
gather_tensor_parallel_state_dict, gather_tensor_parallel_state_dict,
......
...@@ -2,20 +2,20 @@ ...@@ -2,20 +2,20 @@
# -*- encoding: utf-8 -*- # -*- encoding: utf-8 -*-
import math import math
import colossalai
import torch import torch
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
from torch.nn import Parameter from torch.nn import Parameter
import colossalai
from colossalai.context import seed
from colossalai.context.parallel_mode import ParallelMode from colossalai.context.parallel_mode import ParallelMode
from colossalai.core import global_context as gpc from colossalai.core import global_context as gpc
from colossalai.nn.layer.parallel_sequence._operation import RingQK, RingAV
from colossalai.registry import LAYERS
from colossalai.kernel.cuda_native.scaled_softmax import AttnMaskType
from colossalai.kernel import FusedScaleMaskSoftmax from colossalai.kernel import FusedScaleMaskSoftmax
from colossalai.context import seed from colossalai.kernel.cuda_native.scaled_softmax import AttnMaskType
from colossalai.legacy.registry import LAYERS
from colossalai.nn.layer.parallel_sequence._operation import RingAV, RingQK
@LAYERS.register_module @LAYERS.register_module
......
...@@ -8,8 +8,8 @@ from torch import nn as nn ...@@ -8,8 +8,8 @@ from torch import nn as nn
from torch.nn.parameter import Parameter from torch.nn.parameter import Parameter
from colossalai.context import seed from colossalai.context import seed
from colossalai.legacy.registry import LAYERS
from colossalai.nn import init as init from colossalai.nn import init as init
from colossalai.registry import LAYERS
from colossalai.utils.cuda import get_current_device from colossalai.utils.cuda import get_current_device
from ..utils import to_2tuple from ..utils import to_2tuple
......
import torch import torch
import torch.distributed as dist import torch.distributed as dist
from colossalai.context import ParallelMode
from colossalai.core import global_context as gpc
from colossalai.registry import LOSSES
from torch.cuda.amp import custom_bwd, custom_fwd from torch.cuda.amp import custom_bwd, custom_fwd
from torch.nn.modules.loss import _Loss from torch.nn.modules.loss import _Loss
from colossalai.context import ParallelMode
from colossalai.core import global_context as gpc
from colossalai.legacy.registry import LOSSES
class _VocabParallelCrossEntropy1D(torch.autograd.Function): class _VocabParallelCrossEntropy1D(torch.autograd.Function):
......
import torch import torch
import torch.distributed as dist import torch.distributed as dist
from torch.cuda.amp import custom_bwd, custom_fwd
from torch.nn.functional import cross_entropy
from torch.nn.modules.loss import _Loss
from colossalai.context import ParallelMode from colossalai.context import ParallelMode
from colossalai.core import global_context as gpc from colossalai.core import global_context as gpc
from colossalai.legacy.registry import LOSSES
from colossalai.nn.layer.parallel_2d import reduce_by_batch_2d, split_batch_2d from colossalai.nn.layer.parallel_2d import reduce_by_batch_2d, split_batch_2d
from colossalai.nn.layer.parallel_2d._utils import assert_summa_initialization from colossalai.nn.layer.parallel_2d._utils import assert_summa_initialization
from colossalai.registry import LOSSES
from colossalai.utils import get_current_device from colossalai.utils import get_current_device
from torch.cuda.amp import custom_bwd, custom_fwd
from torch.nn.functional import cross_entropy
from torch.nn.modules.loss import _Loss
@LOSSES.register_module @LOSSES.register_module
......
import torch import torch
import torch.distributed as dist import torch.distributed as dist
from torch.cuda.amp import custom_bwd, custom_fwd
from torch.nn.functional import cross_entropy
from torch.nn.modules.loss import _Loss
from colossalai.context import ParallelMode from colossalai.context import ParallelMode
from colossalai.core import global_context as gpc from colossalai.core import global_context as gpc
from colossalai.legacy.registry import LOSSES
from colossalai.nn.layer.parallel_2p5d import reduce_by_batch_2p5d, split_batch_2p5d from colossalai.nn.layer.parallel_2p5d import reduce_by_batch_2p5d, split_batch_2p5d
from colossalai.nn.layer.parallel_2p5d._utils import assert_tesseract_initialization from colossalai.nn.layer.parallel_2p5d._utils import assert_tesseract_initialization
from colossalai.registry import LOSSES
from colossalai.utils import get_current_device from colossalai.utils import get_current_device
from torch.cuda.amp import custom_bwd, custom_fwd
from torch.nn.functional import cross_entropy
from torch.nn.modules.loss import _Loss
@LOSSES.register_module @LOSSES.register_module
......
import torch import torch
import torch.distributed as dist import torch.distributed as dist
from colossalai.constants import INPUT_GROUP_3D, WEIGHT_GROUP_3D, OUTPUT_GROUP_3D from torch.cuda.amp import custom_bwd, custom_fwd
from torch.nn.functional import cross_entropy
from torch.nn.modules.loss import _Loss
from colossalai.constants import INPUT_GROUP_3D, OUTPUT_GROUP_3D, WEIGHT_GROUP_3D
from colossalai.core import global_context as gpc from colossalai.core import global_context as gpc
from colossalai.legacy.registry import LOSSES
from colossalai.nn.layer.parallel_3d import reduce_by_batch_3d, split_tensor_3d from colossalai.nn.layer.parallel_3d import reduce_by_batch_3d, split_tensor_3d
from colossalai.nn.layer.parallel_3d._utils import get_parallel_mode_from_env from colossalai.nn.layer.parallel_3d._utils import get_parallel_mode_from_env
from colossalai.registry import LOSSES
from colossalai.utils import get_current_device from colossalai.utils import get_current_device
from torch.cuda.amp import custom_bwd, custom_fwd
from torch.nn.functional import cross_entropy
from torch.nn.modules.loss import _Loss
@LOSSES.register_module @LOSSES.register_module
......
import torch.nn as nn import torch.nn as nn
from colossalai.registry import LOSSES
from torch.nn.modules.loss import _Loss from torch.nn.modules.loss import _Loss
from colossalai.context.moe_context import MOE_CONTEXT from colossalai.context.moe_context import MOE_CONTEXT
from colossalai.legacy.registry import LOSSES
@LOSSES.register_module @LOSSES.register_module
......
from torch.optim.lr_scheduler import CosineAnnealingLR as _CosineAnnealingLR from torch.optim.lr_scheduler import CosineAnnealingLR as _CosineAnnealingLR
from colossalai.registry import LR_SCHEDULERS from colossalai.legacy.registry import LR_SCHEDULERS
from .delayed import DelayerScheduler, WarmupDelayerScheduler, WarmupScheduler from .delayed import DelayerScheduler, WarmupDelayerScheduler, WarmupScheduler
......
from torch.optim.lr_scheduler import _LRScheduler from torch.optim.lr_scheduler import _LRScheduler
from colossalai.registry import LR_SCHEDULERS from colossalai.legacy.registry import LR_SCHEDULERS
@LR_SCHEDULERS.register_module @LR_SCHEDULERS.register_module
......
...@@ -2,7 +2,8 @@ from typing import List ...@@ -2,7 +2,8 @@ from typing import List
from torch.optim.lr_scheduler import MultiStepLR as _MultiStepLR from torch.optim.lr_scheduler import MultiStepLR as _MultiStepLR
from colossalai.registry import LR_SCHEDULERS from colossalai.legacy.registry import LR_SCHEDULERS
from .delayed import WarmupScheduler from .delayed import WarmupScheduler
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment