[legacy] clean up legacy code (#4743)

* [legacy] remove outdated codes of pipeline (#4692) * [legacy] remove cli of benchmark and update optim (#4690) * [legacy] remove cli of benchmark and update optim * [doc] fix cli doc test * [legacy] fix engine clip grad norm * [legacy] remove outdated colo tensor (#4694) * [legacy] remove outdated colo tensor * [test] fix test import * [legacy] move outdated zero to legacy (#4696) * [legacy] clean up utils (#4700) * [legacy] clean up utils * [example] update examples * [legacy] clean up amp * [legacy] fix amp module * [legacy] clean up gpc (#4742) * [legacy] clean up context * [legacy] clean core, constants and global vars * [legacy] refactor initialize * [example] fix examples ci * [example] fix examples ci * [legacy] fix tests * [example] fix gpt example * [example] fix examples ci * [devops] fix ci installation * [example] fix examples ci

[legacy] clean up legacy code (#4743)
* [legacy] remove outdated codes of pipeline (#4692) * [legacy] remove cli of benchmark and update optim (#4690) * [legacy] remove cli of benchmark and update optim * [doc] fix cli doc test * [legacy] fix engine clip grad norm * [legacy] remove outdated colo tensor (#4694) * [legacy] remove outdated colo tensor * [test] fix test import * [legacy] move outdated zero to legacy (#4696) * [legacy] clean up utils (#4700) * [legacy] clean up utils * [example] update examples * [legacy] clean up amp * [legacy] fix amp module * [legacy] clean up gpc (#4742) * [legacy] clean up context * [legacy] clean core, constants and global vars * [legacy] refactor initialize * [example] fix examples ci * [example] fix examples ci * [legacy] fix tests * [example] fix gpt example * [example] fix examples ci * [devops] fix ci installation * [example] fix examples ci
b5f9e37c · Hongxin Liu · GitHub · 32e7f994 · b5f9e37c · b5f9e37c
Unverified Commit b5f9e37c authored Sep 18, 2023 by Hongxin Liu Committed by GitHub Sep 18, 2023
20 changed files
--- a/colossalai/zero/legacy/gemini/ophooks/runtime_mem_tracer_hook.py
+++ b/colossalai/zero/legacy/gemini/ophooks/runtime_mem_tracer_hook.py
@@ -5,9 +5,9 @@ from typing import List

 import torch

+from colossalai.legacy.zero.gemini.tensor_utils import alloc_storage, free_storage
 from colossalai.tensor.param_op_hook import ColoParamOpHook
 from colossalai.zero.gemini.memory_tracer import MemStats, SyncCudaMemoryMonitor
-from colossalai.zero.legacy.gemini.tensor_utils import alloc_storage, free_storage


 class TrainingPhase(Enum):

--- a/colossalai/zero/legacy/gemini/ophooks/utils.py
+++ b/colossalai/zero/legacy/gemini/ophooks/utils.py
--- a/colossalai/zero/legacy/gemini/paramhooks/__init__.py
+++ b/colossalai/zero/legacy/gemini/paramhooks/__init__.py
--- a/colossalai/zero/legacy/gemini/paramhooks/_param_hookmgr.py
+++ b/colossalai/zero/legacy/gemini/paramhooks/_param_hookmgr.py
--- a/colossalai/zero/legacy/gemini/stateful_tensor.py
+++ b/colossalai/zero/legacy/gemini/stateful_tensor.py
--- a/colossalai/zero/legacy/gemini/stateful_tensor_mgr.py
+++ b/colossalai/zero/legacy/gemini/stateful_tensor_mgr.py
--- a/colossalai/zero/legacy/gemini/tensor_placement_policy.py
+++ b/colossalai/zero/legacy/gemini/tensor_placement_policy.py
@@ -5,8 +5,8 @@ from typing import List, Optional, Type

 import torch

+from colossalai.legacy.utils.memory import colo_device_memory_capacity
 from colossalai.utils import get_current_device
-from colossalai.utils.memory import colo_device_memory_capacity
 from colossalai.zero.gemini.memory_tracer import MemStatsCollector

 from .stateful_tensor import StatefulTensor

--- a/colossalai/zero/legacy/gemini/tensor_utils.py
+++ b/colossalai/zero/legacy/gemini/tensor_utils.py
--- a/colossalai/zero/legacy/init_ctx/__init__.py
+++ b/colossalai/zero/legacy/init_ctx/__init__.py
--- a/colossalai/zero/legacy/init_ctx/init_context.py
+++ b/colossalai/zero/legacy/init_ctx/init_context.py
@@ -8,15 +8,15 @@ import torch
 import torch.distributed as dist
 import torch.nn as nn

-from colossalai.context.parallel_mode import ParallelMode
 from colossalai.context.singleton_meta import SingletonMeta
-from colossalai.core import global_context as gpc
+from colossalai.legacy.context.parallel_mode import ParallelMode
+from colossalai.legacy.core import global_context as gpc
+from colossalai.legacy.zero.shard_utils import BaseShardStrategy
+from colossalai.legacy.zero.sharded_model._utils import cast_tensor_to_bf16, cast_tensor_to_fp16
+from colossalai.legacy.zero.sharded_model.sharded_model_v2 import ShardedModelV2
+from colossalai.legacy.zero.sharded_param import ShardedParamV2
 from colossalai.logging import get_dist_logger
 from colossalai.utils.model.utils import InsertPostInitMethodToModuleSubClasses
-from colossalai.zero.legacy.shard_utils import BaseShardStrategy
-from colossalai.zero.legacy.sharded_model._utils import cast_tensor_to_bf16, cast_tensor_to_fp16
-from colossalai.zero.legacy.sharded_model.sharded_model_v2 import ShardedModelV2
-from colossalai.zero.legacy.sharded_param import ShardedParamV2


 @dataclass

--- a/colossalai/zero/legacy/shard_utils/__init__.py
+++ b/colossalai/zero/legacy/shard_utils/__init__.py
--- a/colossalai/zero/legacy/shard_utils/base_shard_strategy.py
+++ b/colossalai/zero/legacy/shard_utils/base_shard_strategy.py
@@ -3,7 +3,7 @@ from typing import List, Optional

 import torch.distributed as dist

-from colossalai.zero.legacy.sharded_param.sharded_tensor import ShardedTensor
+from colossalai.legacy.zero.sharded_param.sharded_tensor import ShardedTensor


 class BaseShardStrategy(ABC):

--- a/colossalai/zero/legacy/shard_utils/bucket_tensor_shard_strategy.py
+++ b/colossalai/zero/legacy/shard_utils/bucket_tensor_shard_strategy.py
@@ -4,8 +4,8 @@ import torch
 import torch.distributed as dist
 from torch._utils import _flatten_dense_tensors as flatten

+from colossalai.legacy.zero.sharded_param.sharded_tensor import ShardedTensor
 from colossalai.utils import get_current_device
-from colossalai.zero.legacy.sharded_param.sharded_tensor import ShardedTensor

 from .tensor_shard_strategy import TensorShardStrategy


--- a/colossalai/zero/legacy/shard_utils/commons.py
+++ b/colossalai/zero/legacy/shard_utils/commons.py
--- a/colossalai/zero/legacy/shard_utils/tensor_shard_strategy.py
+++ b/colossalai/zero/legacy/shard_utils/tensor_shard_strategy.py
@@ -3,11 +3,11 @@ from typing import List, Optional
 import torch
 import torch.distributed as dist

+from colossalai.legacy.zero.gemini.tensor_utils import colo_model_data_tensor_move_inline
+from colossalai.legacy.zero.shard_utils import BaseShardStrategy
+from colossalai.legacy.zero.shard_utils.commons import get_shard
+from colossalai.legacy.zero.sharded_param.sharded_tensor import ShardedTensor
 from colossalai.utils import get_current_device
-from colossalai.zero.legacy.gemini.tensor_utils import colo_model_data_tensor_move_inline
-from colossalai.zero.legacy.shard_utils import BaseShardStrategy
-from colossalai.zero.legacy.shard_utils.commons import get_shard
-from colossalai.zero.legacy.sharded_param.sharded_tensor import ShardedTensor


 class TensorShardStrategy(BaseShardStrategy):

--- a/colossalai/zero/legacy/sharded_model/__init__.py
+++ b/colossalai/zero/legacy/sharded_model/__init__.py
--- a/colossalai/zero/legacy/sharded_model/_utils.py
+++ b/colossalai/zero/legacy/sharded_model/_utils.py
@@ -3,7 +3,7 @@ from typing import Any, Callable, List, Tuple, Union
 import torch
 import torch.nn.functional as F

-from colossalai.zero.legacy.gemini.stateful_tensor import StatefulTensor
+from colossalai.legacy.zero.gemini.stateful_tensor import StatefulTensor


 def get_gradient_predivide_factor(world_size: int) -> float:

--- a/colossalai/zero/legacy/sharded_model/reduce_scatter.py
+++ b/colossalai/zero/legacy/sharded_model/reduce_scatter.py
--- a/colossalai/zero/legacy/sharded_model/sharded_model_v2.py
+++ b/colossalai/zero/legacy/sharded_model/sharded_model_v2.py
@@ -11,20 +11,20 @@ import torch.nn as nn
 from torch.distributed import ProcessGroup
 from torch.nn.parameter import Parameter

-from colossalai.context.parallel_mode import ParallelMode
-from colossalai.core import global_context as gpc
+from colossalai.legacy.context.parallel_mode import ParallelMode
+from colossalai.legacy.core import global_context as gpc
+from colossalai.legacy.utils.memory import colo_device_memory_capacity
+from colossalai.legacy.zero.gemini.ophooks import register_ophooks_recursively
+from colossalai.legacy.zero.gemini.paramhooks import BaseParamHookMgr
+from colossalai.legacy.zero.gemini.stateful_tensor import TensorState
+from colossalai.legacy.zero.gemini.stateful_tensor_mgr import StatefulTensorMgr
+from colossalai.legacy.zero.gemini.tensor_placement_policy import TensorPlacementPolicy, TensorPlacementPolicyFactory
+from colossalai.legacy.zero.gemini.tensor_utils import colo_model_data_move_to_cpu
+from colossalai.legacy.zero.shard_utils import BaseShardStrategy
+from colossalai.legacy.zero.sharded_model.reduce_scatter import ReduceScatterBucketer
 from colossalai.logging import get_dist_logger
 from colossalai.utils import disposable, get_current_device
-from colossalai.utils.memory import colo_device_memory_capacity
-from colossalai.zero.gemini.memory_tracer import MemStatsCollector, StaticMemStatsCollector
-from colossalai.zero.legacy.gemini.ophooks import register_ophooks_recursively
-from colossalai.zero.legacy.gemini.paramhooks import BaseParamHookMgr
-from colossalai.zero.legacy.gemini.stateful_tensor import TensorState
-from colossalai.zero.legacy.gemini.stateful_tensor_mgr import StatefulTensorMgr
-from colossalai.zero.legacy.gemini.tensor_placement_policy import TensorPlacementPolicy, TensorPlacementPolicyFactory
-from colossalai.zero.legacy.gemini.tensor_utils import colo_model_data_move_to_cpu
-from colossalai.zero.legacy.shard_utils import BaseShardStrategy
-from colossalai.zero.legacy.sharded_model.reduce_scatter import ReduceScatterBucketer
+from colossalai.zero.gemini.memory_tracer import MemStatsCollector

 from ._utils import (
    cast_float_arguments,

--- a/colossalai/zero/legacy/sharded_model/utils.py
+++ b/colossalai/zero/legacy/sharded_model/utils.py
@@ -2,7 +2,7 @@ import copy

 import torch

-from colossalai.zero.legacy.sharded_model import ShardedModelV2
+from colossalai.legacy.zero.sharded_model import ShardedModelV2


 def col_model_deepcopy(sharded_model: ShardedModelV2, other_model: torch.nn.Module):