Commit 8accecd5 authored by Hongxin Liu's avatar Hongxin Liu
Browse files

[legacy] move engine to legacy (#4560)

* [legacy] move engine to legacy

* [example] fix seq parallel example

* [example] fix seq parallel example

* [test] test gemini pluging hang

* [test] test gemini pluging hang

* [test] test gemini pluging hang

* [test] test gemini pluging hang

* [test] test gemini pluging hang

* [example] update seq parallel requirements
parent 89fe0277
...@@ -71,7 +71,7 @@ def build_gradient_handler(config, model, optimizer): ...@@ -71,7 +71,7 @@ def build_gradient_handler(config, model, optimizer):
optimizer (:class:`torch.optim.Optimizer`): An optimizer object containing parameters for the gradient handler optimizer (:class:`torch.optim.Optimizer`): An optimizer object containing parameters for the gradient handler
Returns: Returns:
An object of :class:`colossalai.engine.BaseGradientHandler` An object of :class:`colossalai.legacy.engine.BaseGradientHandler`
""" """
config_ = config.copy() config_ = config.copy()
config_['model'] = model config_['model'] = model
......
...@@ -21,9 +21,9 @@ from colossalai.builder.builder import build_gradient_handler ...@@ -21,9 +21,9 @@ from colossalai.builder.builder import build_gradient_handler
from colossalai.context import Config, ConfigException, ParallelMode from colossalai.context import Config, ConfigException, ParallelMode
from colossalai.context.moe_context import MOE_CONTEXT from colossalai.context.moe_context import MOE_CONTEXT
from colossalai.core import global_context as gpc from colossalai.core import global_context as gpc
from colossalai.engine import Engine from colossalai.legacy.engine import Engine
from colossalai.engine.gradient_accumulation import accumulate_gradient from colossalai.legacy.engine.gradient_accumulation import accumulate_gradient
from colossalai.engine.schedule import ( from colossalai.legacy.engine.schedule import (
InterleavedPipelineSchedule, InterleavedPipelineSchedule,
NonPipelineSchedule, NonPipelineSchedule,
PipelineSchedule, PipelineSchedule,
......
...@@ -8,11 +8,17 @@ from torch import Tensor ...@@ -8,11 +8,17 @@ from torch import Tensor
from torch.nn import Module from torch.nn import Module
from torch.nn.modules.loss import _Loss from torch.nn.modules.loss import _Loss
from colossalai.engine.gradient_handler import BaseGradientHandler from colossalai.legacy.engine.gradient_handler import BaseGradientHandler
from colossalai.engine.schedule import BaseSchedule, InterleavedPipelineSchedule, NonPipelineSchedule, PipelineSchedule from colossalai.legacy.engine.schedule import (
BaseSchedule,
InterleavedPipelineSchedule,
NonPipelineSchedule,
PipelineSchedule,
)
from colossalai.logging import get_dist_logger from colossalai.logging import get_dist_logger
from colossalai.zero.legacy.gemini import BaseOpHook, register_ophooks_recursively
from colossalai.nn.optimizer import ColossalaiOptimizer from colossalai.nn.optimizer import ColossalaiOptimizer
from colossalai.zero.legacy.gemini import BaseOpHook, register_ophooks_recursively
class Engine: class Engine:
"""Basic engine class for training and evaluation. It runs a specific process method """Basic engine class for training and evaluation. It runs a specific process method
......
...@@ -4,7 +4,7 @@ import torch.nn as nn ...@@ -4,7 +4,7 @@ import torch.nn as nn
from torch.optim import Optimizer from torch.optim import Optimizer
from torch.optim.lr_scheduler import _LRScheduler from torch.optim.lr_scheduler import _LRScheduler
from colossalai.engine import BaseGradientHandler from colossalai.legacy.engine import BaseGradientHandler
from ._gradient_accumulation import ( from ._gradient_accumulation import (
GradAccumDataloader, GradAccumDataloader,
...@@ -33,7 +33,7 @@ def accumulate_gradient(model: nn.Module, ...@@ -33,7 +33,7 @@ def accumulate_gradient(model: nn.Module,
dataloader (:class:`torch.utils.data.DataLoader` or iterable objects): dataloader (:class:`torch.utils.data.DataLoader` or iterable objects):
your dataloader object, would be called like iter(dataloader) your dataloader object, would be called like iter(dataloader)
accumulate_size (int): the number of steps to accumulate gradients accumulate_size (int): the number of steps to accumulate gradients
gradient_handlers (List[:class:`colossalai.engine.BaseGradientHandler`]): gradient_handlers (List[:class:`colossalai.legacy.engine.BaseGradientHandler`]):
list of gradient handler objects. Default is None. list of gradient handler objects. Default is None.
lr_scheduler (`torch.optim.lr_scheduler` or `colossalai.nn.lr_scheduler`): lr_scheduler (`torch.optim.lr_scheduler` or `colossalai.nn.lr_scheduler`):
your ``lr_scheduler`` object for gradient accumulation. Defaults to None. your ``lr_scheduler`` object for gradient accumulation. Defaults to None.
......
...@@ -10,7 +10,7 @@ from torch.optim import Optimizer ...@@ -10,7 +10,7 @@ from torch.optim import Optimizer
from torch.optim.lr_scheduler import _LRScheduler from torch.optim.lr_scheduler import _LRScheduler
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from colossalai.engine import BaseGradientHandler from colossalai.legacy.engine import BaseGradientHandler
from colossalai.nn.optimizer import ColossalaiOptimizer from colossalai.nn.optimizer import ColossalaiOptimizer
from colossalai.utils import conditional_context from colossalai.utils import conditional_context
...@@ -262,7 +262,7 @@ class GradAccumGradientHandler: ...@@ -262,7 +262,7 @@ class GradAccumGradientHandler:
before accumulation size is reached. before accumulation size is reached.
Args: Args:
grad_handler (:class:`colossalai.engine.BaseGradientHandler`): grad_handler (:class:`colossalai.legacy.engine.BaseGradientHandler`):
Your ``gradient_handler`` object for gradient accumulation, would be called when achieving `accumulate_size`. Your ``gradient_handler`` object for gradient accumulation, would be called when achieving `accumulate_size`.
accumulate_size (int): The number of steps to accumulate gradients. accumulate_size (int): The number of steps to accumulate gradients.
......
from colossalai.context.parallel_mode import ParallelMode
from colossalai.core import global_context as gpc from colossalai.core import global_context as gpc
from colossalai.registry import GRADIENT_HANDLER from colossalai.registry import GRADIENT_HANDLER
from ...context.parallel_mode import ParallelMode
from ._base_gradient_handler import BaseGradientHandler from ._base_gradient_handler import BaseGradientHandler
from .utils import bucket_allreduce from .utils import bucket_allreduce
......
from colossalai.context.moe_context import MOE_CONTEXT from colossalai.context.moe_context import MOE_CONTEXT
from colossalai.context.parallel_mode import ParallelMode
from colossalai.core import global_context as gpc from colossalai.core import global_context as gpc
from colossalai.registry import GRADIENT_HANDLER from colossalai.registry import GRADIENT_HANDLER
from colossalai.utils.moe import get_moe_epsize_param_dict from colossalai.utils.moe import get_moe_epsize_param_dict
from ...context.parallel_mode import ParallelMode
from ._base_gradient_handler import BaseGradientHandler from ._base_gradient_handler import BaseGradientHandler
from .utils import bucket_allreduce from .utils import bucket_allreduce
......
from colossalai.context.parallel_mode import ParallelMode
from colossalai.core import global_context as gpc from colossalai.core import global_context as gpc
from colossalai.registry import GRADIENT_HANDLER from colossalai.registry import GRADIENT_HANDLER
from ...context.parallel_mode import ParallelMode
from ._base_gradient_handler import BaseGradientHandler from ._base_gradient_handler import BaseGradientHandler
from .utils import bucket_allreduce from .utils import bucket_allreduce
......
...@@ -95,7 +95,7 @@ class BaseSchedule(ABC): ...@@ -95,7 +95,7 @@ class BaseSchedule(ABC):
"""The process function over a batch of dataset for training or evaluation. """The process function over a batch of dataset for training or evaluation.
Args: Args:
engine (colossalai.engine.Engine): Colossalai engine for training and inference. engine (colossalai.legacy.engine.Engine): Colossalai engine for training and inference.
data_iter (Iterable): Data iterator from which get a batch of data, obtained by calling iter(dataloader). data_iter (Iterable): Data iterator from which get a batch of data, obtained by calling iter(dataloader).
forward_only (bool): If True, the process won't include backward. forward_only (bool): If True, the process won't include backward.
return_loss (bool, optional): If False, the loss won't be returned. return_loss (bool, optional): If False, the loss won't be returned.
......
...@@ -54,7 +54,7 @@ class NonPipelineSchedule(BaseSchedule): ...@@ -54,7 +54,7 @@ class NonPipelineSchedule(BaseSchedule):
The returned labels and loss will None if :attr:`return_loss` is False. The returned labels and loss will None if :attr:`return_loss` is False.
Args: Args:
engine (colossalai.engine.Engine): Colossalai engine for training and inference. engine (colossalai.legacy.engine.Engine): Colossalai engine for training and inference.
data_iter (Iterable): Dataloader as the form of an iterator, obtained by calling iter(dataloader). data_iter (Iterable): Dataloader as the form of an iterator, obtained by calling iter(dataloader).
forward_only (bool, optional): forward_only (bool, optional):
If True, the model is run for the forward pass, else back propagation will be executed. If True, the model is run for the forward pass, else back propagation will be executed.
......
...@@ -236,7 +236,7 @@ class PipelineSchedule(BaseSchedule): ...@@ -236,7 +236,7 @@ class PipelineSchedule(BaseSchedule):
Returns output tensor. This is a helper function and can be ignored by users. Returns output tensor. This is a helper function and can be ignored by users.
Args: Args:
engine (colossalai.engine.Engine): Colossalai engine for training and inference. engine (colossalai.legacy.engine.Engine): Colossalai engine for training and inference.
input_obj (Union[:class:`torch.Tensor`, List[:class:`torch.Tensor`]]): Input tensor for this pipeline stage. input_obj (Union[:class:`torch.Tensor`, List[:class:`torch.Tensor`]]): Input tensor for this pipeline stage.
return_tensors (List[:class:`torch.Tensor`]): A list of tensors to return. return_tensors (List[:class:`torch.Tensor`]): A list of tensors to return.
return_output_label (bool, optional): Whether returns output labels. return_output_label (bool, optional): Whether returns output labels.
...@@ -274,7 +274,7 @@ class PipelineSchedule(BaseSchedule): ...@@ -274,7 +274,7 @@ class PipelineSchedule(BaseSchedule):
This is a helper function and can be ignored by users. This is a helper function and can be ignored by users.
Args: Args:
engine (colossalai.engine.Engine): Colossalai engine for training and inference. engine (colossalai.legacy.engine.Engine): Colossalai engine for training and inference.
input_obj (Union[:class:`torch.Tensor`, List[:class:`torch.Tensor`]]): input tensor for this pipeline stage. input_obj (Union[:class:`torch.Tensor`, List[:class:`torch.Tensor`]]): input tensor for this pipeline stage.
output_obj (Union[:class:`torch.Tensor`, List[:class:`torch.Tensor`]]): output tensor for this pipeline stage. output_obj (Union[:class:`torch.Tensor`, List[:class:`torch.Tensor`]]): output tensor for this pipeline stage.
output_obj_grad (Union[:class:`torch.Tensor`, List[:class:`torch.Tensor`]]): gradient of output tensor for this pipeline stage. output_obj_grad (Union[:class:`torch.Tensor`, List[:class:`torch.Tensor`]]): gradient of output tensor for this pipeline stage.
...@@ -314,7 +314,7 @@ class PipelineSchedule(BaseSchedule): ...@@ -314,7 +314,7 @@ class PipelineSchedule(BaseSchedule):
Returns a tuple with losses if the last stage, an empty tuple otherwise. Returns a tuple with losses if the last stage, an empty tuple otherwise.
Args: Args:
engine (colossalai.engine.Engine): Colossalai engine for training and inference. engine (colossalai.legacy.engine.Engine): Colossalai engine for training and inference.
data_iter (Iterable): Dataloader as the form of an iterator, obtained by calling iter(dataloader). data_iter (Iterable): Dataloader as the form of an iterator, obtained by calling iter(dataloader).
forward_only (bool, optional): forward_only (bool, optional):
Whether run forward step only. Default is false. If true, no backward will be run. Whether run forward step only. Default is false. If true, no backward will be run.
...@@ -518,7 +518,7 @@ class InterleavedPipelineSchedule(PipelineSchedule): ...@@ -518,7 +518,7 @@ class InterleavedPipelineSchedule(PipelineSchedule):
Returns output tensor. This is a helper function and can be ignored by users. Returns output tensor. This is a helper function and can be ignored by users.
Args: Args:
engine (colossalai.engine.Engine): Colossalai engine for training and inference. engine (colossalai.legacy.engine.Engine): Colossalai engine for training and inference.
model_chunk_id (int): The id of model chunks. model_chunk_id (int): The id of model chunks.
input_obj (Union[:class:`torch.Tensor`, List[:class:`torch.Tensor`]]): Input tensor for this pipeline stage. input_obj (Union[:class:`torch.Tensor`, List[:class:`torch.Tensor`]]): Input tensor for this pipeline stage.
return_tensors (List[:class:`torch.Tensor`]): A list of tensors to return. return_tensors (List[:class:`torch.Tensor`]): A list of tensors to return.
...@@ -555,7 +555,7 @@ class InterleavedPipelineSchedule(PipelineSchedule): ...@@ -555,7 +555,7 @@ class InterleavedPipelineSchedule(PipelineSchedule):
communication between pipeline stages as needed. communication between pipeline stages as needed.
Args: Args:
engine (colossalai.engine.Engine): Colossalai engine for training and inference. engine (colossalai.legacy.engine.Engine): Colossalai engine for training and inference.
data_iter (Iterable): Dataloader as the form of an iterator, obtained by calling iter(dataloader). data_iter (Iterable): Dataloader as the form of an iterator, obtained by calling iter(dataloader).
forward_only (bool, optional): forward_only (bool, optional):
Whether run forward step only. Default is false. If true, no backward will be run. Whether run forward step only. Default is false. If true, no backward will be run.
......
...@@ -69,7 +69,7 @@ class PipelineScheduleV2(PipelineSchedule): ...@@ -69,7 +69,7 @@ class PipelineScheduleV2(PipelineSchedule):
Returns a tuple with losses if the last stage, an empty tuple otherwise. Returns a tuple with losses if the last stage, an empty tuple otherwise.
Args: Args:
engine (colossalai.engine.Engine): Colossalai engine for training and inference. engine (colossalai.legacy.engine.Engine): Colossalai engine for training and inference.
data_iter (Iterable): Dataloader as the form of an iterator, obtained by calling iter(dataloader). data_iter (Iterable): Dataloader as the form of an iterator, obtained by calling iter(dataloader).
forward_only (bool, optional): forward_only (bool, optional):
Whether run forward step only. Default is false. If true, no backward will be run. Whether run forward step only. Default is false. If true, no backward will be run.
......
...@@ -4,7 +4,7 @@ import torch ...@@ -4,7 +4,7 @@ import torch
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from tqdm import tqdm from tqdm import tqdm
from colossalai.engine import Engine from colossalai.legacy.engine import Engine
from colossalai.legacy.trainer.hooks import BaseHook from colossalai.legacy.trainer.hooks import BaseHook
from colossalai.logging import DistributedLogger from colossalai.logging import DistributedLogger
from colossalai.utils import MultiTimer, is_dp_rank_0, is_no_pp_or_last_stage, is_tp_rank_0 from colossalai.utils import MultiTimer, is_dp_rank_0, is_no_pp_or_last_stage, is_tp_rank_0
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment