Unverified Commit b5f9e37c authored by Hongxin Liu's avatar Hongxin Liu Committed by GitHub
Browse files

[legacy] clean up legacy code (#4743)

* [legacy] remove outdated codes of pipeline (#4692)

* [legacy] remove cli of benchmark and update optim (#4690)

* [legacy] remove cli of benchmark and update optim

* [doc] fix cli doc test

* [legacy] fix engine clip grad norm

* [legacy] remove outdated colo tensor (#4694)

* [legacy] remove outdated colo tensor

* [test] fix test import

* [legacy] move outdated zero to legacy (#4696)

* [legacy] clean up utils (#4700)

* [legacy] clean up utils

* [example] update examples

* [legacy] clean up amp

* [legacy] fix amp module

* [legacy] clean up gpc (#4742)

* [legacy] clean up context

* [legacy] clean core, constants and global vars

* [legacy] refactor initialize

* [example] fix examples ci

* [example] fix examples ci

* [legacy] fix tests

* [example] fix gpt example

* [example] fix examples ci

* [devops] fix ci installation

* [example] fix examples ci
parent 32e7f994
...@@ -3,10 +3,10 @@ from .initializer_2d import Initializer_2D ...@@ -3,10 +3,10 @@ from .initializer_2d import Initializer_2D
from .initializer_2p5d import Initializer_2p5D from .initializer_2p5d import Initializer_2p5D
from .initializer_3d import Initializer_3D from .initializer_3d import Initializer_3D
from .initializer_data import Initializer_Data from .initializer_data import Initializer_Data
from .initializer_model import Initializer_Model
from .initializer_pipeline import Initializer_Pipeline from .initializer_pipeline import Initializer_Pipeline
from .initializer_sequence import Initializer_Sequence from .initializer_sequence import Initializer_Sequence
from .initializer_tensor import Initializer_Tensor from .initializer_tensor import Initializer_Tensor
from .initializer_model import Initializer_Model
from .process_group_initializer import ProcessGroupInitializer from .process_group_initializer import ProcessGroupInitializer
__all__ = [ __all__ = [
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
import torch.distributed as dist import torch.distributed as dist
from colossalai.global_variables import tensor_parallel_env as env from colossalai.legacy.global_variables import tensor_parallel_env as env
from colossalai.legacy.registry import DIST_GROUP_INITIALIZER from colossalai.legacy.registry import DIST_GROUP_INITIALIZER
from ..parallel_mode import ParallelMode from ..parallel_mode import ParallelMode
......
...@@ -2,7 +2,7 @@ import math ...@@ -2,7 +2,7 @@ import math
import torch.distributed as dist import torch.distributed as dist
from colossalai.global_variables import tensor_parallel_env as env from colossalai.legacy.global_variables import tensor_parallel_env as env
from colossalai.legacy.registry import DIST_GROUP_INITIALIZER from colossalai.legacy.registry import DIST_GROUP_INITIALIZER
from ..parallel_mode import ParallelMode from ..parallel_mode import ParallelMode
......
...@@ -6,7 +6,7 @@ import math ...@@ -6,7 +6,7 @@ import math
import torch.distributed as dist import torch.distributed as dist
from colossalai.context import Config from colossalai.context import Config
from colossalai.global_variables import tensor_parallel_env as env from colossalai.legacy.global_variables import tensor_parallel_env as env
from colossalai.legacy.registry import DIST_GROUP_INITIALIZER from colossalai.legacy.registry import DIST_GROUP_INITIALIZER
from ..parallel_mode import ParallelMode from ..parallel_mode import ParallelMode
......
...@@ -5,7 +5,7 @@ import math ...@@ -5,7 +5,7 @@ import math
import torch.distributed as dist import torch.distributed as dist
from colossalai.global_variables import tensor_parallel_env as env from colossalai.legacy.global_variables import tensor_parallel_env as env
from colossalai.legacy.registry import DIST_GROUP_INITIALIZER from colossalai.legacy.registry import DIST_GROUP_INITIALIZER
from ..parallel_mode import ParallelMode from ..parallel_mode import ParallelMode
......
...@@ -7,8 +7,8 @@ from contextlib import contextmanager ...@@ -7,8 +7,8 @@ from contextlib import contextmanager
import torch.cuda import torch.cuda
from torch import Tensor from torch import Tensor
from .seed_manager import SeedManager
from ..parallel_mode import ParallelMode from ..parallel_mode import ParallelMode
from .seed_manager import SeedManager
_SEED_MANAGER = SeedManager() _SEED_MANAGER = SeedManager()
...@@ -53,11 +53,11 @@ def add_seed(parallel_mode: ParallelMode, seed: int, overwrite: bool = False): ...@@ -53,11 +53,11 @@ def add_seed(parallel_mode: ParallelMode, seed: int, overwrite: bool = False):
"""Adds a seed to the seed manager for `parallel_mode`. """Adds a seed to the seed manager for `parallel_mode`.
Args: Args:
parallel_mode (:class:`colossalai.context.ParallelMode`): The chosen parallel mode. parallel_mode (:class:`colossalai.legacy.context.ParallelMode`): The chosen parallel mode.
seed (int): The seed to be added seed (int): The seed to be added
Raises: Raises:
AssertionError: Raises an AssertionError if `parallel_mode` is not an instance of AssertionError: Raises an AssertionError if `parallel_mode` is not an instance of
:class:`colossalai.context.ParallelMode` or the seed for `parallel_mode` has been added. :class:`colossalai.legacy.context.ParallelMode` or the seed for `parallel_mode` has been added.
Note: Note:
The parallel_mode should be concluded in ``ParallelMode``. More details about ``ParallelMode`` could be found The parallel_mode should be concluded in ``ParallelMode``. More details about ``ParallelMode`` could be found
...@@ -70,7 +70,7 @@ def set_mode(parallel_mode: ParallelMode): ...@@ -70,7 +70,7 @@ def set_mode(parallel_mode: ParallelMode):
"""Sets the current mode of the seed manager. """Sets the current mode of the seed manager.
Args: Args:
parallel_mode (:class:`colossalai.context.ParallelMode`): The chosen parallel mode. parallel_mode (:class:`colossalai.legacy.context.ParallelMode`): The chosen parallel mode.
Note: Note:
The parallel_mode should be concluded in ``ParallelMode``. More details about ``ParallelMode`` could be found The parallel_mode should be concluded in ``ParallelMode``. More details about ``ParallelMode`` could be found
...@@ -83,7 +83,7 @@ def set_seed_states(parallel_mode: ParallelMode, state: Tensor): ...@@ -83,7 +83,7 @@ def set_seed_states(parallel_mode: ParallelMode, state: Tensor):
"""Sets the state of the seed manager for `parallel_mode`. """Sets the state of the seed manager for `parallel_mode`.
Args: Args:
parallel_mode (:class:`colossalai.context.ParallelMode`): The chosen parallel mode. parallel_mode (:class:`colossalai.legacy.context.ParallelMode`): The chosen parallel mode.
state (:class:`torch.Tensor`): the state to be set. state (:class:`torch.Tensor`): the state to be set.
Raises: Raises:
...@@ -161,7 +161,7 @@ def with_seed(func, parallel_mode: ParallelMode): ...@@ -161,7 +161,7 @@ def with_seed(func, parallel_mode: ParallelMode):
def moe_set_seed(seed): def moe_set_seed(seed):
if torch.cuda.is_available(): if torch.cuda.is_available():
from colossalai.core import global_context as gpc from colossalai.legacy.core import global_context as gpc
global_rank = gpc.get_global_rank() global_rank = gpc.get_global_rank()
diff_seed = seed + global_rank diff_seed = seed + global_rank
add_seed(ParallelMode.TENSOR, diff_seed, True) add_seed(ParallelMode.TENSOR, diff_seed, True)
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
import torch import torch
from torch import Tensor from torch import Tensor
from colossalai.context.parallel_mode import ParallelMode from colossalai.legacy.context.parallel_mode import ParallelMode
class SeedManager: class SeedManager:
...@@ -36,7 +36,7 @@ class SeedManager: ...@@ -36,7 +36,7 @@ class SeedManager:
"""Sets the state of the seed manager for `parallel_mode`. """Sets the state of the seed manager for `parallel_mode`.
Args: Args:
parallel_mode (:class:`colossalai.context.ParallelMode`): The chosen parallel mode. parallel_mode (:class:`colossalai.legacy.context.ParallelMode`): The chosen parallel mode.
state (:class:`torch.Tensor`): the state to be set. state (:class:`torch.Tensor`): the state to be set.
Raises: Raises:
...@@ -49,7 +49,7 @@ class SeedManager: ...@@ -49,7 +49,7 @@ class SeedManager:
"""Sets the current mode of the seed manager. """Sets the current mode of the seed manager.
Args: Args:
parallel_mode (:class:`colossalai.context.ParallelMode`): The chosen parallel mode. parallel_mode (:class:`colossalai.legacy.context.ParallelMode`): The chosen parallel mode.
""" """
if self.current_mode: if self.current_mode:
# save the current state for current mode # save the current state for current mode
...@@ -63,12 +63,12 @@ class SeedManager: ...@@ -63,12 +63,12 @@ class SeedManager:
"""Adds a seed to the seed manager for `parallel_mode`. """Adds a seed to the seed manager for `parallel_mode`.
Args: Args:
parallel_mode (:class:`colossalai.context.ParallelMode`): The chosen parallel mode. parallel_mode (:class:`colossalai.legacy.context.ParallelMode`): The chosen parallel mode.
seed (int): The seed to be added. seed (int): The seed to be added.
overwrite (bool, optional): Whether allows to overwrite the seed that has been set already overwrite (bool, optional): Whether allows to overwrite the seed that has been set already
Raises: Raises:
AssertionError: Raises an AssertionError if `parallel_mode` is not an instance of :class:`colossalai.context.ParallelMode` AssertionError: Raises an AssertionError if `parallel_mode` is not an instance of :class:`colossalai.legacy.context.ParallelMode`
or the seed for `parallel_mode` has been added. or the seed for `parallel_mode` has been added.
""" """
assert isinstance(parallel_mode, ParallelMode), 'A valid ParallelMode must be provided' assert isinstance(parallel_mode, ParallelMode), 'A valid ParallelMode must be provided'
......
#!/usr/bin/env python #!/usr/bin/env python
# -*- encoding: utf-8 -*- # -*- encoding: utf-8 -*-
parallel = dict( from colossalai.legacy.context.parallel_context import global_context
pipeline=dict(size=2),
tensor=dict( __all__ = ['global_context']
size=4,
mode='2d'
)
)
...@@ -8,6 +8,7 @@ from torch import Tensor ...@@ -8,6 +8,7 @@ from torch import Tensor
from torch.nn import Module from torch.nn import Module
from torch.nn.modules.loss import _Loss from torch.nn.modules.loss import _Loss
from colossalai.interface import OptimizerWrapper
from colossalai.legacy.engine.gradient_handler import BaseGradientHandler from colossalai.legacy.engine.gradient_handler import BaseGradientHandler
from colossalai.legacy.engine.schedule import ( from colossalai.legacy.engine.schedule import (
BaseSchedule, BaseSchedule,
...@@ -15,9 +16,8 @@ from colossalai.legacy.engine.schedule import ( ...@@ -15,9 +16,8 @@ from colossalai.legacy.engine.schedule import (
NonPipelineSchedule, NonPipelineSchedule,
PipelineSchedule, PipelineSchedule,
) )
from colossalai.legacy.zero.gemini import BaseOpHook, register_ophooks_recursively
from colossalai.logging import get_dist_logger from colossalai.logging import get_dist_logger
from colossalai.nn.optimizer import ColossalaiOptimizer
from colossalai.zero.legacy.gemini import BaseOpHook, register_ophooks_recursively
class Engine: class Engine:
...@@ -27,7 +27,7 @@ class Engine: ...@@ -27,7 +27,7 @@ class Engine:
Args: Args:
model (``torch.nn.Module``): The neural network model. model (``torch.nn.Module``): The neural network model.
optimizer (``colossalai.nn.optimizer.ColossalaiOptimizer``): Optimizer for updating the parameters. optimizer (``colossalai.interface.OptimizerWrapper``): Optimizer for updating the parameters.
criterion (``torch.nn.modules.loss._Loss``, optional): Loss function for calculating loss. criterion (``torch.nn.modules.loss._Loss``, optional): Loss function for calculating loss.
gradient_handlers (List[``BaseGradientHandler``], optional): A list of gradient handler used in backward. gradient_handlers (List[``BaseGradientHandler``], optional): A list of gradient handler used in backward.
clip_grad_norm (float, optional): The norm of gradient clipping. clip_grad_norm (float, optional): The norm of gradient clipping.
...@@ -61,7 +61,7 @@ class Engine: ...@@ -61,7 +61,7 @@ class Engine:
def __init__(self, def __init__(self,
model: Module, model: Module,
optimizer: "ColossalaiOptimizer", optimizer: "OptimizerWrapper",
criterion: Optional[_Loss] = None, criterion: Optional[_Loss] = None,
gradient_handlers: Optional[List[BaseGradientHandler]] = None, gradient_handlers: Optional[List[BaseGradientHandler]] = None,
clip_grad_norm: float = 0.0, clip_grad_norm: float = 0.0,
...@@ -157,7 +157,7 @@ class Engine: ...@@ -157,7 +157,7 @@ class Engine:
"""Execute parameter update """Execute parameter update
""" """
self._all_reduce_gradients() self._all_reduce_gradients()
self.optimizer.clip_grad_norm(self.model, self._clip_grad_norm) self.optimizer.clip_grad_by_norm(self._clip_grad_norm)
return self.optimizer.step() return self.optimizer.step()
def backward(self, loss: Tensor): def backward(self, loss: Tensor):
......
...@@ -10,12 +10,12 @@ from torch.optim import Optimizer ...@@ -10,12 +10,12 @@ from torch.optim import Optimizer
from torch.optim.lr_scheduler import _LRScheduler from torch.optim.lr_scheduler import _LRScheduler
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from colossalai.interface import OptimizerWrapper
from colossalai.legacy.engine import BaseGradientHandler from colossalai.legacy.engine import BaseGradientHandler
from colossalai.nn.optimizer import ColossalaiOptimizer
from colossalai.utils import conditional_context from colossalai.utils import conditional_context
class GradAccumOptimizer(ColossalaiOptimizer): class GradAccumOptimizer(OptimizerWrapper):
"""A wrapper for the optimizer to enable gradient accumulation by skipping the steps """A wrapper for the optimizer to enable gradient accumulation by skipping the steps
before accumulation size is reached. before accumulation size is reached.
...@@ -74,7 +74,7 @@ class GradAccumOptimizer(ColossalaiOptimizer): ...@@ -74,7 +74,7 @@ class GradAccumOptimizer(ColossalaiOptimizer):
if self.accumulate_step < self.accumulate_size: if self.accumulate_step < self.accumulate_size:
pass pass
else: else:
self.optim.clip_grad_norm(model, max_norm) self.optim.clip_grad_by_norm(max_norm)
def backward(self, loss: Tensor) -> None: def backward(self, loss: Tensor) -> None:
"""Execute backward pass. """Execute backward pass.
......
from colossalai.context.parallel_mode import ParallelMode from colossalai.legacy.context.parallel_mode import ParallelMode
from colossalai.core import global_context as gpc from colossalai.legacy.core import global_context as gpc
from colossalai.legacy.registry import GRADIENT_HANDLER from colossalai.legacy.registry import GRADIENT_HANDLER
from ._base_gradient_handler import BaseGradientHandler from ._base_gradient_handler import BaseGradientHandler
......
from colossalai.context.moe_context import MOE_CONTEXT from colossalai.context.moe_context import MOE_CONTEXT
from colossalai.context.parallel_mode import ParallelMode from colossalai.legacy.context.parallel_mode import ParallelMode
from colossalai.core import global_context as gpc from colossalai.legacy.core import global_context as gpc
from colossalai.legacy.registry import GRADIENT_HANDLER from colossalai.legacy.registry import GRADIENT_HANDLER
from colossalai.utils.moe import get_moe_epsize_param_dict from colossalai.utils.moe import get_moe_epsize_param_dict
......
...@@ -6,7 +6,7 @@ import torch ...@@ -6,7 +6,7 @@ import torch
import torch.distributed as dist import torch.distributed as dist
from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors
from colossalai.core import global_context as gpc from colossalai.legacy.core import global_context as gpc
from colossalai.legacy.registry import GRADIENT_HANDLER from colossalai.legacy.registry import GRADIENT_HANDLER
from ._base_gradient_handler import BaseGradientHandler from ._base_gradient_handler import BaseGradientHandler
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment