"vscode:/vscode.git/clone" did not exist on "8f2c55f9c99012e4cbfefa422ab2e91dfece447e"
Unverified Commit b5f9e37c authored by Hongxin Liu's avatar Hongxin Liu Committed by GitHub
Browse files

[legacy] clean up legacy code (#4743)

* [legacy] remove outdated codes of pipeline (#4692)

* [legacy] remove cli of benchmark and update optim (#4690)

* [legacy] remove cli of benchmark and update optim

* [doc] fix cli doc test

* [legacy] fix engine clip grad norm

* [legacy] remove outdated colo tensor (#4694)

* [legacy] remove outdated colo tensor

* [test] fix test import

* [legacy] move outdated zero to legacy (#4696)

* [legacy] clean up utils (#4700)

* [legacy] clean up utils

* [example] update examples

* [legacy] clean up amp

* [legacy] fix amp module

* [legacy] clean up gpc (#4742)

* [legacy] clean up context

* [legacy] clean core, constants and global vars

* [legacy] refactor initialize

* [example] fix examples ci

* [example] fix examples ci

* [legacy] fix tests

* [example] fix gpt example

* [example] fix examples ci

* [devops] fix ci installation

* [example] fix examples ci
parent 32e7f994
......@@ -3,10 +3,10 @@ from .initializer_2d import Initializer_2D
from .initializer_2p5d import Initializer_2p5D
from .initializer_3d import Initializer_3D
from .initializer_data import Initializer_Data
from .initializer_model import Initializer_Model
from .initializer_pipeline import Initializer_Pipeline
from .initializer_sequence import Initializer_Sequence
from .initializer_tensor import Initializer_Tensor
from .initializer_model import Initializer_Model
from .process_group_initializer import ProcessGroupInitializer
__all__ = [
......
......@@ -3,7 +3,7 @@
import torch.distributed as dist
from colossalai.global_variables import tensor_parallel_env as env
from colossalai.legacy.global_variables import tensor_parallel_env as env
from colossalai.legacy.registry import DIST_GROUP_INITIALIZER
from ..parallel_mode import ParallelMode
......
......@@ -2,7 +2,7 @@ import math
import torch.distributed as dist
from colossalai.global_variables import tensor_parallel_env as env
from colossalai.legacy.global_variables import tensor_parallel_env as env
from colossalai.legacy.registry import DIST_GROUP_INITIALIZER
from ..parallel_mode import ParallelMode
......
......@@ -6,7 +6,7 @@ import math
import torch.distributed as dist
from colossalai.context import Config
from colossalai.global_variables import tensor_parallel_env as env
from colossalai.legacy.global_variables import tensor_parallel_env as env
from colossalai.legacy.registry import DIST_GROUP_INITIALIZER
from ..parallel_mode import ParallelMode
......
......@@ -5,7 +5,7 @@ import math
import torch.distributed as dist
from colossalai.global_variables import tensor_parallel_env as env
from colossalai.legacy.global_variables import tensor_parallel_env as env
from colossalai.legacy.registry import DIST_GROUP_INITIALIZER
from ..parallel_mode import ParallelMode
......
......@@ -7,8 +7,8 @@ from contextlib import contextmanager
import torch.cuda
from torch import Tensor
from .seed_manager import SeedManager
from ..parallel_mode import ParallelMode
from .seed_manager import SeedManager
_SEED_MANAGER = SeedManager()
......@@ -53,11 +53,11 @@ def add_seed(parallel_mode: ParallelMode, seed: int, overwrite: bool = False):
"""Adds a seed to the seed manager for `parallel_mode`.
Args:
parallel_mode (:class:`colossalai.context.ParallelMode`): The chosen parallel mode.
parallel_mode (:class:`colossalai.legacy.context.ParallelMode`): The chosen parallel mode.
seed (int): The seed to be added
Raises:
AssertionError: Raises an AssertionError if `parallel_mode` is not an instance of
:class:`colossalai.context.ParallelMode` or the seed for `parallel_mode` has been added.
:class:`colossalai.legacy.context.ParallelMode` or the seed for `parallel_mode` has been added.
Note:
The parallel_mode should be concluded in ``ParallelMode``. More details about ``ParallelMode`` could be found
......@@ -70,7 +70,7 @@ def set_mode(parallel_mode: ParallelMode):
"""Sets the current mode of the seed manager.
Args:
parallel_mode (:class:`colossalai.context.ParallelMode`): The chosen parallel mode.
parallel_mode (:class:`colossalai.legacy.context.ParallelMode`): The chosen parallel mode.
Note:
The parallel_mode should be concluded in ``ParallelMode``. More details about ``ParallelMode`` could be found
......@@ -83,7 +83,7 @@ def set_seed_states(parallel_mode: ParallelMode, state: Tensor):
"""Sets the state of the seed manager for `parallel_mode`.
Args:
parallel_mode (:class:`colossalai.context.ParallelMode`): The chosen parallel mode.
parallel_mode (:class:`colossalai.legacy.context.ParallelMode`): The chosen parallel mode.
state (:class:`torch.Tensor`): the state to be set.
Raises:
......@@ -161,7 +161,7 @@ def with_seed(func, parallel_mode: ParallelMode):
def moe_set_seed(seed):
if torch.cuda.is_available():
from colossalai.core import global_context as gpc
from colossalai.legacy.core import global_context as gpc
global_rank = gpc.get_global_rank()
diff_seed = seed + global_rank
add_seed(ParallelMode.TENSOR, diff_seed, True)
......
......@@ -4,7 +4,7 @@
import torch
from torch import Tensor
from colossalai.context.parallel_mode import ParallelMode
from colossalai.legacy.context.parallel_mode import ParallelMode
class SeedManager:
......@@ -36,7 +36,7 @@ class SeedManager:
"""Sets the state of the seed manager for `parallel_mode`.
Args:
parallel_mode (:class:`colossalai.context.ParallelMode`): The chosen parallel mode.
parallel_mode (:class:`colossalai.legacy.context.ParallelMode`): The chosen parallel mode.
state (:class:`torch.Tensor`): the state to be set.
Raises:
......@@ -49,7 +49,7 @@ class SeedManager:
"""Sets the current mode of the seed manager.
Args:
parallel_mode (:class:`colossalai.context.ParallelMode`): The chosen parallel mode.
parallel_mode (:class:`colossalai.legacy.context.ParallelMode`): The chosen parallel mode.
"""
if self.current_mode:
# save the current state for current mode
......@@ -63,12 +63,12 @@ class SeedManager:
"""Adds a seed to the seed manager for `parallel_mode`.
Args:
parallel_mode (:class:`colossalai.context.ParallelMode`): The chosen parallel mode.
parallel_mode (:class:`colossalai.legacy.context.ParallelMode`): The chosen parallel mode.
seed (int): The seed to be added.
overwrite (bool, optional): Whether allows to overwrite the seed that has been set already
Raises:
AssertionError: Raises an AssertionError if `parallel_mode` is not an instance of :class:`colossalai.context.ParallelMode`
AssertionError: Raises an AssertionError if `parallel_mode` is not an instance of :class:`colossalai.legacy.context.ParallelMode`
or the seed for `parallel_mode` has been added.
"""
assert isinstance(parallel_mode, ParallelMode), 'A valid ParallelMode must be provided'
......
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
parallel = dict(
pipeline=dict(size=2),
tensor=dict(
size=4,
mode='2d'
)
)
from colossalai.legacy.context.parallel_context import global_context
__all__ = ['global_context']
......@@ -8,6 +8,7 @@ from torch import Tensor
from torch.nn import Module
from torch.nn.modules.loss import _Loss
from colossalai.interface import OptimizerWrapper
from colossalai.legacy.engine.gradient_handler import BaseGradientHandler
from colossalai.legacy.engine.schedule import (
BaseSchedule,
......@@ -15,9 +16,8 @@ from colossalai.legacy.engine.schedule import (
NonPipelineSchedule,
PipelineSchedule,
)
from colossalai.legacy.zero.gemini import BaseOpHook, register_ophooks_recursively
from colossalai.logging import get_dist_logger
from colossalai.nn.optimizer import ColossalaiOptimizer
from colossalai.zero.legacy.gemini import BaseOpHook, register_ophooks_recursively
class Engine:
......@@ -27,7 +27,7 @@ class Engine:
Args:
model (``torch.nn.Module``): The neural network model.
optimizer (``colossalai.nn.optimizer.ColossalaiOptimizer``): Optimizer for updating the parameters.
optimizer (``colossalai.interface.OptimizerWrapper``): Optimizer for updating the parameters.
criterion (``torch.nn.modules.loss._Loss``, optional): Loss function for calculating loss.
gradient_handlers (List[``BaseGradientHandler``], optional): A list of gradient handler used in backward.
clip_grad_norm (float, optional): The norm of gradient clipping.
......@@ -61,7 +61,7 @@ class Engine:
def __init__(self,
model: Module,
optimizer: "ColossalaiOptimizer",
optimizer: "OptimizerWrapper",
criterion: Optional[_Loss] = None,
gradient_handlers: Optional[List[BaseGradientHandler]] = None,
clip_grad_norm: float = 0.0,
......@@ -157,7 +157,7 @@ class Engine:
"""Execute parameter update
"""
self._all_reduce_gradients()
self.optimizer.clip_grad_norm(self.model, self._clip_grad_norm)
self.optimizer.clip_grad_by_norm(self._clip_grad_norm)
return self.optimizer.step()
def backward(self, loss: Tensor):
......
......@@ -10,12 +10,12 @@ from torch.optim import Optimizer
from torch.optim.lr_scheduler import _LRScheduler
from torch.utils.data import DataLoader
from colossalai.interface import OptimizerWrapper
from colossalai.legacy.engine import BaseGradientHandler
from colossalai.nn.optimizer import ColossalaiOptimizer
from colossalai.utils import conditional_context
class GradAccumOptimizer(ColossalaiOptimizer):
class GradAccumOptimizer(OptimizerWrapper):
"""A wrapper for the optimizer to enable gradient accumulation by skipping the steps
before accumulation size is reached.
......@@ -74,7 +74,7 @@ class GradAccumOptimizer(ColossalaiOptimizer):
if self.accumulate_step < self.accumulate_size:
pass
else:
self.optim.clip_grad_norm(model, max_norm)
self.optim.clip_grad_by_norm(max_norm)
def backward(self, loss: Tensor) -> None:
"""Execute backward pass.
......
from colossalai.context.parallel_mode import ParallelMode
from colossalai.core import global_context as gpc
from colossalai.legacy.context.parallel_mode import ParallelMode
from colossalai.legacy.core import global_context as gpc
from colossalai.legacy.registry import GRADIENT_HANDLER
from ._base_gradient_handler import BaseGradientHandler
......
from colossalai.context.moe_context import MOE_CONTEXT
from colossalai.context.parallel_mode import ParallelMode
from colossalai.core import global_context as gpc
from colossalai.legacy.context.parallel_mode import ParallelMode
from colossalai.legacy.core import global_context as gpc
from colossalai.legacy.registry import GRADIENT_HANDLER
from colossalai.utils.moe import get_moe_epsize_param_dict
......
......@@ -6,7 +6,7 @@ import torch
import torch.distributed as dist
from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors
from colossalai.core import global_context as gpc
from colossalai.legacy.core import global_context as gpc
from colossalai.legacy.registry import GRADIENT_HANDLER
from ._base_gradient_handler import BaseGradientHandler
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment