Unverified Commit fae6c92e authored by Hongxin Liu's avatar Hongxin Liu Committed by GitHub
Browse files

Merge branch 'main' into feature/shardformer

parents bd186784 ac178ca5
...@@ -11,8 +11,8 @@ import colossalai ...@@ -11,8 +11,8 @@ import colossalai
from colossalai.amp import AMP_TYPE from colossalai.amp import AMP_TYPE
from colossalai.context.parallel_mode import ParallelMode from colossalai.context.parallel_mode import ParallelMode
from colossalai.core import global_context as gpc from colossalai.core import global_context as gpc
from colossalai.engine.schedule import PipelineSchedule
from colossalai.kernel import LayerNorm from colossalai.kernel import LayerNorm
from colossalai.legacy.engine.schedule import PipelineSchedule
from colossalai.logging import get_dist_logger from colossalai.logging import get_dist_logger
from colossalai.nn.optimizer import FusedAdam from colossalai.nn.optimizer import FusedAdam
from colossalai.utils import MultiTimer, is_using_pp from colossalai.utils import MultiTimer, is_using_pp
......
...@@ -2,4 +2,4 @@ ...@@ -2,4 +2,4 @@
markers = markers =
dist: tests which are run in a multi-GPU or multi-machine environment (at least 4 GPUs) dist: tests which are run in a multi-GPU or multi-machine environment (at least 4 GPUs)
largedist: tests which are run in a multi-GPU or multi-machine environment (at least 8 GPUs) largedist: tests which are run in a multi-GPU or multi-machine environment (at least 8 GPUs)
addopts = --ignore=tests/test_analyzer --ignore=tests/test_auto_parallel --ignore=tests/test_autochunk --ignore=tests/test_moe --ignore=tests/test_fx addopts = --ignore=tests/test_analyzer --ignore=tests/test_auto_parallel --ignore=tests/test_autochunk --ignore=tests/test_moe --ignore=tests/test_fx --ignore=tests/test_legacy
\ No newline at end of file
...@@ -98,7 +98,7 @@ def check_gemini_plugin(subset: str, init_method: str = 'none', early_stop: bool ...@@ -98,7 +98,7 @@ def check_gemini_plugin(subset: str, init_method: str = 'none', early_stop: bool
]: ]:
continue continue
err = run_fn(init_method, model_fn, data_gen_fn, output_transform_fn) err = run_fn(init_method, model_fn, data_gen_fn, output_transform_fn)
torch.cuda.empty_cache()
if err is None: if err is None:
passed_models.append(name) passed_models.append(name)
else: else:
......
...@@ -14,6 +14,7 @@ from colossalai.testing import ( ...@@ -14,6 +14,7 @@ from colossalai.testing import (
rerun_if_address_is_in_use, rerun_if_address_is_in_use,
spawn, spawn,
) )
from colossalai.zero import LowLevelZeroOptimizer
# stage 1 and 2 process the optimizer/mode the same way # stage 1 and 2 process the optimizer/mode the same way
...@@ -50,6 +51,17 @@ def check_low_level_zero_checkpointIO(stage: int, shard: bool, offload: bool): ...@@ -50,6 +51,17 @@ def check_low_level_zero_checkpointIO(stage: int, shard: bool, offload: bool):
booster.load_model(new_model, model_ckpt_path) booster.load_model(new_model, model_ckpt_path)
check_state_dict_equal(model.state_dict(), new_model.state_dict(), False) check_state_dict_equal(model.state_dict(), new_model.state_dict(), False)
# check master weight
assert isinstance(new_optimizer, LowLevelZeroOptimizer)
working_param_id_set = set(id(p) for p in new_model.parameters())
for p_id, master_param in new_optimizer._param_store.working_to_master_param.items():
assert p_id in working_param_id_set
working_param = new_optimizer._param_store.master_to_working_param[id(master_param)]
padding = new_optimizer._param_store.get_param_padding_size(working_param)
padded_param = torch.nn.functional.pad(working_param.data.view(-1), (0, padding))
working_shard = padded_param.chunk(dist.get_world_size())[dist.get_rank()]
assert torch.equal(working_shard,
master_param.data.view(-1).to(dtype=padded_param.dtype, device=padded_param.device))
booster.load_optimizer(new_optimizer, optimizer_ckpt_path) booster.load_optimizer(new_optimizer, optimizer_ckpt_path)
check_state_dict_equal(optimizer.optim.state_dict(), new_optimizer.optim.state_dict(), False) check_state_dict_equal(optimizer.optim.state_dict(), new_optimizer.optim.state_dict(), False)
......
import os
from pathlib import Path
import pytest
import torch
from torchvision import transforms
from torchvision.datasets import CIFAR10
import colossalai
from colossalai.amp import AMP_TYPE
from colossalai.context import ParallelMode
from colossalai.core import global_context as gpc
from colossalai.logging import get_dist_logger
from colossalai.nn import CrossEntropyLoss
from colossalai.nn.lr_scheduler import CosineAnnealingWarmupLR
from colossalai.pipeline.pipelinable import PipelinableContext
from colossalai.testing import rerun_if_address_is_in_use, skip_if_not_enough_gpus, spawn
from colossalai.trainer import Trainer, hooks
from colossalai.utils import get_dataloader
BATCH_SIZE = 4
NUM_EPOCHS = 60
WARMUP_EPOCHS = 5
CONFIG = dict(NUM_MICRO_BATCHES=2,
parallel=dict(pipeline=2, tensor=dict(size=2, mode='1d')),
fp16=dict(mode=AMP_TYPE.NAIVE),
gradient_accumulation=2)
def run_trainer(rank, world_size, port):
colossalai.launch(config=CONFIG, rank=rank, world_size=world_size, host='localhost', port=port, backend='nccl')
logger = get_dist_logger()
# get logger
logger = get_dist_logger()
pipelinable = PipelinableContext()
try:
from titans.model.vit import vit_tiny_patch4_32
except ImportError:
logger.warning('skip the test_cifar_with_data_pipeline_tensor test because titan is not installed')
logger.warning('please install titan from https://github.com/hpcaitech/Titans')
return
with pipelinable:
model = vit_tiny_patch4_32()
pipelinable.to_layer_list()
pipelinable.policy = "uniform"
model = pipelinable.partition(1, gpc.pipeline_parallel_size, gpc.get_local_rank(ParallelMode.PIPELINE))
# create dataloaders
root = Path(os.environ['DATA'])
transform_train = transforms.Compose([
transforms.RandomCrop(32, padding=4, pad_if_needed=True),
transforms.AutoAugment(policy=transforms.AutoAugmentPolicy.CIFAR10),
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
train_dataset = CIFAR10(root=root, train=True, download=True, transform=transform_train)
train_dataloader = get_dataloader(dataset=train_dataset, shuffle=True, batch_size=BATCH_SIZE, pin_memory=True)
# create loss function
criterion = CrossEntropyLoss(label_smoothing=0.1)
# create optimizer
optimizer = torch.optim.AdamW(model.parameters(), lr=0.001, weight_decay=0)
# create lr scheduler
lr_scheduler = CosineAnnealingWarmupLR(optimizer=optimizer, total_steps=NUM_EPOCHS, warmup_steps=WARMUP_EPOCHS)
# initialize
engine, train_dataloader, *_ = colossalai.initialize(model=model,
optimizer=optimizer,
criterion=criterion,
train_dataloader=train_dataloader)
logger = get_dist_logger()
trainer = Trainer(engine=engine, logger=logger)
hook_list = [
hooks.LRSchedulerHook(lr_scheduler=lr_scheduler, by_epoch=False),
]
trainer.fit(train_dataloader=train_dataloader,
epochs=NUM_EPOCHS,
max_steps=2,
hooks=hook_list,
display_progress=True)
@pytest.mark.dist
@skip_if_not_enough_gpus(min_gpus=8)
@rerun_if_address_is_in_use()
def test_hybrid_parallel():
spawn(run_trainer, 8)
if __name__ == '__main__':
test_hybrid_parallel()
...@@ -3,9 +3,9 @@ import torch ...@@ -3,9 +3,9 @@ import torch
import colossalai import colossalai
from colossalai.amp.amp_type import AMP_TYPE from colossalai.amp.amp_type import AMP_TYPE
from colossalai.legacy.trainer import Trainer
from colossalai.logging import get_dist_logger from colossalai.logging import get_dist_logger
from colossalai.testing import parameterize, rerun_if_address_is_in_use, spawn from colossalai.testing import parameterize, rerun_if_address_is_in_use, spawn
from colossalai.trainer import Trainer
from colossalai.utils import MultiTimer from colossalai.utils import MultiTimer
from tests.components_to_test.registry import non_distributed_component_funcs from tests.components_to_test.registry import non_distributed_component_funcs
......
...@@ -12,9 +12,9 @@ from torchvision.models import resnet18 ...@@ -12,9 +12,9 @@ from torchvision.models import resnet18
import colossalai import colossalai
from colossalai.context.parallel_mode import ParallelMode from colossalai.context.parallel_mode import ParallelMode
from colossalai.core import global_context as gpc from colossalai.core import global_context as gpc
from colossalai.legacy.trainer import Trainer
from colossalai.logging import get_dist_logger from colossalai.logging import get_dist_logger
from colossalai.testing import rerun_if_address_is_in_use, spawn from colossalai.testing import rerun_if_address_is_in_use, spawn
from colossalai.trainer import Trainer
from colossalai.utils import MultiTimer, get_dataloader from colossalai.utils import MultiTimer, get_dataloader
BATCH_SIZE = 4 BATCH_SIZE = 4
......
...@@ -5,7 +5,7 @@ import torch.nn as nn ...@@ -5,7 +5,7 @@ import torch.nn as nn
import colossalai import colossalai
from colossalai.context.moe_context import MOE_CONTEXT from colossalai.context.moe_context import MOE_CONTEXT
from colossalai.engine.gradient_handler import MoeGradientHandler from colossalai.legacy.engine.gradient_handler import MoeGradientHandler
from colossalai.nn.layer.moe import Experts, MoeLayer, Top1Router, UniformNoiseGenerator from colossalai.nn.layer.moe import Experts, MoeLayer, Top1Router, UniformNoiseGenerator
from colossalai.testing import assert_equal_in_group, rerun_if_address_is_in_use, spawn from colossalai.testing import assert_equal_in_group, rerun_if_address_is_in_use, spawn
from colossalai.utils import get_current_device from colossalai.utils import get_current_device
......
...@@ -3,7 +3,7 @@ import torch ...@@ -3,7 +3,7 @@ import torch
import colossalai import colossalai
from colossalai.context import MOE_CONTEXT from colossalai.context import MOE_CONTEXT
from colossalai.engine.gradient_handler import MoeGradientHandler from colossalai.legacy.engine.gradient_handler import MoeGradientHandler
from colossalai.nn import MoeLoss from colossalai.nn import MoeLoss
from colossalai.testing import assert_equal_in_group, parameterize, rerun_if_address_is_in_use, spawn from colossalai.testing import assert_equal_in_group, parameterize, rerun_if_address_is_in_use, spawn
from colossalai.zero.legacy.init_ctx import ZeroInitContext from colossalai.zero.legacy.init_ctx import ZeroInitContext
......
...@@ -4,7 +4,7 @@ import torch ...@@ -4,7 +4,7 @@ import torch
import colossalai import colossalai
from colossalai.amp import convert_to_apex_amp from colossalai.amp import convert_to_apex_amp
from colossalai.context import MOE_CONTEXT from colossalai.context import MOE_CONTEXT
from colossalai.engine.gradient_handler import MoeGradientHandler from colossalai.legacy.engine.gradient_handler import MoeGradientHandler
from colossalai.nn import MoeLoss from colossalai.nn import MoeLoss
from colossalai.nn.optimizer import CPUAdam from colossalai.nn.optimizer import CPUAdam
from colossalai.testing import assert_equal_in_group, parameterize, rerun_if_address_is_in_use, spawn from colossalai.testing import assert_equal_in_group, parameterize, rerun_if_address_is_in_use, spawn
......
import os import os
from typing import Callable, List, Optional, Type, Union
import time import time
import pytest import pytest
import torch import torch
import torch.nn as nn import torch.nn as nn
from rpc_test_utils import parse_args, rpc_run
from titans.dataloader.cifar10 import build_cifar from titans.dataloader.cifar10 import build_cifar
from torchvision.models import resnet50 from torchvision.models import resnet50
from torchvision.models.resnet import BasicBlock, Bottleneck, conv1x1
from tqdm import tqdm from tqdm import tqdm
from rpc_test_utils import rpc_run, parse_args from colossalai.pipeline.pipelinable import PipelinableContext
import colossalai from colossalai.pipeline.rpc import OneFOneBPipelineEngine
import colossalai.nn as col_nn
from colossalai.logging import disable_existing_loggers, get_dist_logger
from colossalai.trainer import Trainer, hooks
from colossalai.utils import MultiTimer, get_dataloader
from colossalai.context import ParallelMode
from colossalai.pipeline.pipelinable import PipelinableContext, PipelinableModel
from colossalai.pipeline.rpc import OneFOneBPipelineEngine, ChimeraPipelineEngine
from colossalai.pipeline.pipeline_process_group import ppg
def flatten(x): def flatten(x):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment