Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ColossalAI
Commits
fae6c92e
Unverified
Commit
fae6c92e
authored
Sep 05, 2023
by
Hongxin Liu
Committed by
GitHub
Sep 05, 2023
Browse files
Merge branch 'main' into feature/shardformer
parents
bd186784
ac178ca5
Changes
113
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
33 additions
and
29 deletions
+33
-29
colossalai/legacy/engine/gradient_accumulation/__init__.py
colossalai/legacy/engine/gradient_accumulation/__init__.py
+2
-2
colossalai/legacy/engine/gradient_accumulation/_gradient_accumulation.py
...cy/engine/gradient_accumulation/_gradient_accumulation.py
+2
-2
colossalai/legacy/engine/gradient_handler/__init__.py
colossalai/legacy/engine/gradient_handler/__init__.py
+0
-0
colossalai/legacy/engine/gradient_handler/_base_gradient_handler.py
.../legacy/engine/gradient_handler/_base_gradient_handler.py
+0
-0
colossalai/legacy/engine/gradient_handler/_data_parallel_gradient_handler.py
...ngine/gradient_handler/_data_parallel_gradient_handler.py
+2
-2
colossalai/legacy/engine/gradient_handler/_moe_gradient_handler.py
...i/legacy/engine/gradient_handler/_moe_gradient_handler.py
+2
-2
colossalai/legacy/engine/gradient_handler/_pipeline_parallel_gradient_handler.py
...e/gradient_handler/_pipeline_parallel_gradient_handler.py
+1
-1
colossalai/legacy/engine/gradient_handler/_sequence_parallel_gradient_handler.py
...e/gradient_handler/_sequence_parallel_gradient_handler.py
+2
-2
colossalai/legacy/engine/gradient_handler/_zero_gradient_handler.py
.../legacy/engine/gradient_handler/_zero_gradient_handler.py
+1
-1
colossalai/legacy/engine/gradient_handler/utils.py
colossalai/legacy/engine/gradient_handler/utils.py
+0
-0
colossalai/legacy/engine/schedule/__init__.py
colossalai/legacy/engine/schedule/__init__.py
+0
-0
colossalai/legacy/engine/schedule/_base_schedule.py
colossalai/legacy/engine/schedule/_base_schedule.py
+1
-1
colossalai/legacy/engine/schedule/_non_pipeline_schedule.py
colossalai/legacy/engine/schedule/_non_pipeline_schedule.py
+1
-1
colossalai/legacy/engine/schedule/_pipeline_schedule.py
colossalai/legacy/engine/schedule/_pipeline_schedule.py
+5
-5
colossalai/legacy/engine/schedule/_pipeline_schedule_v2.py
colossalai/legacy/engine/schedule/_pipeline_schedule_v2.py
+1
-1
colossalai/legacy/registry/__init__.py
colossalai/legacy/registry/__init__.py
+0
-0
colossalai/legacy/registry/registry.py
colossalai/legacy/registry/registry.py
+2
-2
colossalai/legacy/trainer/__init__.py
colossalai/legacy/trainer/__init__.py
+0
-0
colossalai/legacy/trainer/_trainer.py
colossalai/legacy/trainer/_trainer.py
+4
-5
colossalai/legacy/trainer/hooks/__init__.py
colossalai/legacy/trainer/hooks/__init__.py
+7
-2
No files found.
colossalai/engine/gradient_accumulation/__init__.py
→
colossalai/
legacy/
engine/gradient_accumulation/__init__.py
View file @
fae6c92e
...
...
@@ -4,7 +4,7 @@ import torch.nn as nn
from
torch.optim
import
Optimizer
from
torch.optim.lr_scheduler
import
_LRScheduler
from
colossalai.engine
import
BaseGradientHandler
from
colossalai.
legacy.
engine
import
BaseGradientHandler
from
._gradient_accumulation
import
(
GradAccumDataloader
,
...
...
@@ -33,7 +33,7 @@ def accumulate_gradient(model: nn.Module,
dataloader (:class:`torch.utils.data.DataLoader` or iterable objects):
your dataloader object, would be called like iter(dataloader)
accumulate_size (int): the number of steps to accumulate gradients
gradient_handlers (List[:class:`colossalai.engine.BaseGradientHandler`]):
gradient_handlers (List[:class:`colossalai.
legacy.
engine.BaseGradientHandler`]):
list of gradient handler objects. Default is None.
lr_scheduler (`torch.optim.lr_scheduler` or `colossalai.nn.lr_scheduler`):
your ``lr_scheduler`` object for gradient accumulation. Defaults to None.
...
...
colossalai/engine/gradient_accumulation/_gradient_accumulation.py
→
colossalai/
legacy/
engine/gradient_accumulation/_gradient_accumulation.py
View file @
fae6c92e
...
...
@@ -10,7 +10,7 @@ from torch.optim import Optimizer
from
torch.optim.lr_scheduler
import
_LRScheduler
from
torch.utils.data
import
DataLoader
from
colossalai.engine
import
BaseGradientHandler
from
colossalai.
legacy.
engine
import
BaseGradientHandler
from
colossalai.nn.optimizer
import
ColossalaiOptimizer
from
colossalai.utils
import
conditional_context
...
...
@@ -262,7 +262,7 @@ class GradAccumGradientHandler:
before accumulation size is reached.
Args:
grad_handler (:class:`colossalai.engine.BaseGradientHandler`):
grad_handler (:class:`colossalai.
legacy.
engine.BaseGradientHandler`):
Your ``gradient_handler`` object for gradient accumulation, would be called when achieving `accumulate_size`.
accumulate_size (int): The number of steps to accumulate gradients.
...
...
colossalai/engine/gradient_handler/__init__.py
→
colossalai/
legacy/
engine/gradient_handler/__init__.py
View file @
fae6c92e
File moved
colossalai/engine/gradient_handler/_base_gradient_handler.py
→
colossalai/
legacy/
engine/gradient_handler/_base_gradient_handler.py
View file @
fae6c92e
File moved
colossalai/engine/gradient_handler/_data_parallel_gradient_handler.py
→
colossalai/
legacy/
engine/gradient_handler/_data_parallel_gradient_handler.py
View file @
fae6c92e
from
colossalai.context.parallel_mode
import
ParallelMode
from
colossalai.core
import
global_context
as
gpc
from
colossalai.registry
import
GRADIENT_HANDLER
from
colossalai.
legacy.
registry
import
GRADIENT_HANDLER
from
...context.parallel_mode
import
ParallelMode
from
._base_gradient_handler
import
BaseGradientHandler
from
.utils
import
bucket_allreduce
...
...
colossalai/engine/gradient_handler/_moe_gradient_handler.py
→
colossalai/
legacy/
engine/gradient_handler/_moe_gradient_handler.py
View file @
fae6c92e
from
colossalai.context.moe_context
import
MOE_CONTEXT
from
colossalai.context.parallel_mode
import
ParallelMode
from
colossalai.core
import
global_context
as
gpc
from
colossalai.registry
import
GRADIENT_HANDLER
from
colossalai.
legacy.
registry
import
GRADIENT_HANDLER
from
colossalai.utils.moe
import
get_moe_epsize_param_dict
from
...context.parallel_mode
import
ParallelMode
from
._base_gradient_handler
import
BaseGradientHandler
from
.utils
import
bucket_allreduce
...
...
colossalai/engine/gradient_handler/_pipeline_parallel_gradient_handler.py
→
colossalai/
legacy/
engine/gradient_handler/_pipeline_parallel_gradient_handler.py
View file @
fae6c92e
...
...
@@ -7,7 +7,7 @@ import torch.distributed as dist
from
torch._utils
import
_flatten_dense_tensors
,
_unflatten_dense_tensors
from
colossalai.core
import
global_context
as
gpc
from
colossalai.registry
import
GRADIENT_HANDLER
from
colossalai.
legacy.
registry
import
GRADIENT_HANDLER
from
._base_gradient_handler
import
BaseGradientHandler
...
...
colossalai/engine/gradient_handler/_sequence_parallel_gradient_handler.py
→
colossalai/
legacy/
engine/gradient_handler/_sequence_parallel_gradient_handler.py
View file @
fae6c92e
from
colossalai.context.parallel_mode
import
ParallelMode
from
colossalai.core
import
global_context
as
gpc
from
colossalai.registry
import
GRADIENT_HANDLER
from
colossalai.
legacy.
registry
import
GRADIENT_HANDLER
from
...context.parallel_mode
import
ParallelMode
from
._base_gradient_handler
import
BaseGradientHandler
from
.utils
import
bucket_allreduce
...
...
colossalai/engine/gradient_handler/_zero_gradient_handler.py
→
colossalai/
legacy/
engine/gradient_handler/_zero_gradient_handler.py
View file @
fae6c92e
from
colossalai.registry
import
GRADIENT_HANDLER
from
colossalai.
legacy.
registry
import
GRADIENT_HANDLER
from
._base_gradient_handler
import
BaseGradientHandler
...
...
colossalai/engine/gradient_handler/utils.py
→
colossalai/
legacy/
engine/gradient_handler/utils.py
View file @
fae6c92e
File moved
colossalai/engine/schedule/__init__.py
→
colossalai/
legacy/
engine/schedule/__init__.py
View file @
fae6c92e
File moved
colossalai/engine/schedule/_base_schedule.py
→
colossalai/
legacy/
engine/schedule/_base_schedule.py
View file @
fae6c92e
...
...
@@ -95,7 +95,7 @@ class BaseSchedule(ABC):
"""The process function over a batch of dataset for training or evaluation.
Args:
engine (colossalai.engine.Engine): Colossalai engine for training and inference.
engine (colossalai.
legacy.
engine.Engine): Colossalai engine for training and inference.
data_iter (Iterable): Data iterator from which get a batch of data, obtained by calling iter(dataloader).
forward_only (bool): If True, the process won't include backward.
return_loss (bool, optional): If False, the loss won't be returned.
...
...
colossalai/engine/schedule/_non_pipeline_schedule.py
→
colossalai/
legacy/
engine/schedule/_non_pipeline_schedule.py
View file @
fae6c92e
...
...
@@ -54,7 +54,7 @@ class NonPipelineSchedule(BaseSchedule):
The returned labels and loss will None if :attr:`return_loss` is False.
Args:
engine (colossalai.engine.Engine): Colossalai engine for training and inference.
engine (colossalai.
legacy.
engine.Engine): Colossalai engine for training and inference.
data_iter (Iterable): Dataloader as the form of an iterator, obtained by calling iter(dataloader).
forward_only (bool, optional):
If True, the model is run for the forward pass, else back propagation will be executed.
...
...
colossalai/engine/schedule/_pipeline_schedule.py
→
colossalai/
legacy/
engine/schedule/_pipeline_schedule.py
View file @
fae6c92e
...
...
@@ -236,7 +236,7 @@ class PipelineSchedule(BaseSchedule):
Returns output tensor. This is a helper function and can be ignored by users.
Args:
engine (colossalai.engine.Engine): Colossalai engine for training and inference.
engine (colossalai.
legacy.
engine.Engine): Colossalai engine for training and inference.
input_obj (Union[:class:`torch.Tensor`, List[:class:`torch.Tensor`]]): Input tensor for this pipeline stage.
return_tensors (List[:class:`torch.Tensor`]): A list of tensors to return.
return_output_label (bool, optional): Whether returns output labels.
...
...
@@ -274,7 +274,7 @@ class PipelineSchedule(BaseSchedule):
This is a helper function and can be ignored by users.
Args:
engine (colossalai.engine.Engine): Colossalai engine for training and inference.
engine (colossalai.
legacy.
engine.Engine): Colossalai engine for training and inference.
input_obj (Union[:class:`torch.Tensor`, List[:class:`torch.Tensor`]]): input tensor for this pipeline stage.
output_obj (Union[:class:`torch.Tensor`, List[:class:`torch.Tensor`]]): output tensor for this pipeline stage.
output_obj_grad (Union[:class:`torch.Tensor`, List[:class:`torch.Tensor`]]): gradient of output tensor for this pipeline stage.
...
...
@@ -314,7 +314,7 @@ class PipelineSchedule(BaseSchedule):
Returns a tuple with losses if the last stage, an empty tuple otherwise.
Args:
engine (colossalai.engine.Engine): Colossalai engine for training and inference.
engine (colossalai.
legacy.
engine.Engine): Colossalai engine for training and inference.
data_iter (Iterable): Dataloader as the form of an iterator, obtained by calling iter(dataloader).
forward_only (bool, optional):
Whether run forward step only. Default is false. If true, no backward will be run.
...
...
@@ -518,7 +518,7 @@ class InterleavedPipelineSchedule(PipelineSchedule):
Returns output tensor. This is a helper function and can be ignored by users.
Args:
engine (colossalai.engine.Engine): Colossalai engine for training and inference.
engine (colossalai.
legacy.
engine.Engine): Colossalai engine for training and inference.
model_chunk_id (int): The id of model chunks.
input_obj (Union[:class:`torch.Tensor`, List[:class:`torch.Tensor`]]): Input tensor for this pipeline stage.
return_tensors (List[:class:`torch.Tensor`]): A list of tensors to return.
...
...
@@ -555,7 +555,7 @@ class InterleavedPipelineSchedule(PipelineSchedule):
communication between pipeline stages as needed.
Args:
engine (colossalai.engine.Engine): Colossalai engine for training and inference.
engine (colossalai.
legacy.
engine.Engine): Colossalai engine for training and inference.
data_iter (Iterable): Dataloader as the form of an iterator, obtained by calling iter(dataloader).
forward_only (bool, optional):
Whether run forward step only. Default is false. If true, no backward will be run.
...
...
colossalai/engine/schedule/_pipeline_schedule_v2.py
→
colossalai/
legacy/
engine/schedule/_pipeline_schedule_v2.py
View file @
fae6c92e
...
...
@@ -69,7 +69,7 @@ class PipelineScheduleV2(PipelineSchedule):
Returns a tuple with losses if the last stage, an empty tuple otherwise.
Args:
engine (colossalai.engine.Engine): Colossalai engine for training and inference.
engine (colossalai.
legacy.
engine.Engine): Colossalai engine for training and inference.
data_iter (Iterable): Dataloader as the form of an iterator, obtained by calling iter(dataloader).
forward_only (bool, optional):
Whether run forward step only. Default is false. If true, no backward will be run.
...
...
colossalai/registry/__init__.py
→
colossalai/
legacy/
registry/__init__.py
View file @
fae6c92e
File moved
colossalai/registry/registry.py
→
colossalai/
legacy/
registry/registry.py
View file @
fae6c92e
...
...
@@ -6,7 +6,7 @@ from typing import List
class
Registry
:
"""This is a registry class used to register classes and modules so that a universal
"""This is a registry class used to register classes and modules so that a universal
object builder can be enabled.
Args:
...
...
@@ -42,7 +42,7 @@ class Registry:
return
module_class
def
get_module
(
self
,
module_name
:
str
):
"""Retrieves a module with name `module_name` and returns the module if it has
"""Retrieves a module with name `module_name` and returns the module if it has
already been registered before.
Args:
...
...
colossalai/trainer/__init__.py
→
colossalai/
legacy/
trainer/__init__.py
View file @
fae6c92e
File moved
colossalai/trainer/_trainer.py
→
colossalai/
legacy/
trainer/_trainer.py
View file @
fae6c92e
from
typing
import
Union
,
List
,
Any
from
typing
import
Any
,
List
,
Union
import
torch
from
torch.utils.data
import
DataLoader
from
tqdm
import
tqdm
from
colossalai.engine
import
Engine
from
colossalai.legacy.engine
import
Engine
from
colossalai.legacy.trainer.hooks
import
BaseHook
from
colossalai.logging
import
DistributedLogger
from
colossalai.utils
import
MultiTimer
from
colossalai.utils
import
is_dp_rank_0
,
is_tp_rank_0
,
is_no_pp_or_last_stage
from
colossalai.trainer.hooks
import
BaseHook
from
colossalai.utils
import
MultiTimer
,
is_dp_rank_0
,
is_no_pp_or_last_stage
,
is_tp_rank_0
class
Trainer
:
...
...
colossalai/trainer/hooks/__init__.py
→
colossalai/
legacy/
trainer/hooks/__init__.py
View file @
fae6c92e
from
._base_hook
import
BaseHook
from
._checkpoint_hook
import
SaveCheckpointHook
from
._log_hook
import
(
LogMemoryByEpochHook
,
LogMetricByEpochHook
,
LogMetricByStepHook
,
LogTimingByEpochHook
,
TensorboardHook
)
from
._log_hook
import
(
LogMemoryByEpochHook
,
LogMetricByEpochHook
,
LogMetricByStepHook
,
LogTimingByEpochHook
,
TensorboardHook
,
)
from
._lr_scheduler_hook
import
LRSchedulerHook
from
._metric_hook
import
AccuracyHook
,
LossHook
,
MetricHook
,
ThroughputHook
...
...
Prev
1
2
3
4
5
6
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment