Unverified Commit b5f9e37c authored by Hongxin Liu's avatar Hongxin Liu Committed by GitHub
Browse files

[legacy] clean up legacy code (#4743)

* [legacy] remove outdated codes of pipeline (#4692)

* [legacy] remove cli of benchmark and update optim (#4690)

* [legacy] remove cli of benchmark and update optim

* [doc] fix cli doc test

* [legacy] fix engine clip grad norm

* [legacy] remove outdated colo tensor (#4694)

* [legacy] remove outdated colo tensor

* [test] fix test import

* [legacy] move outdated zero to legacy (#4696)

* [legacy] clean up utils (#4700)

* [legacy] clean up utils

* [example] update examples

* [legacy] clean up amp

* [legacy] fix amp module

* [legacy] clean up gpc (#4742)

* [legacy] clean up context

* [legacy] clean core, constants and global vars

* [legacy] refactor initialize

* [example] fix examples ci

* [example] fix examples ci

* [legacy] fix tests

* [example] fix gpt example

* [example] fix examples ci

* [devops] fix ci installation

* [example] fix examples ci
parent 32e7f994
#!/bin/bash #!/bin/bash
set -xue set -xue
echo "this test is outdated"
# pip install -r requirements.txt
pip install -r requirements.txt # BS=4
# MEMCAP=0
# GPUNUM=4
# MODLE="facebook/opt-125m"
BS=4 # torchrun \
MEMCAP=0 # --nproc_per_node ${GPUNUM} \
GPUNUM=4 # --master_port 19198 \
MODLE="facebook/opt-125m" # run_clm.py \
# -s \
torchrun \ # --output_dir $PWD \
--nproc_per_node ${GPUNUM} \ # --mem_cap ${MEMCAP} \
--master_port 19198 \ # --model_name_or_path ${MODLE} \
run_clm.py \ # --per_device_train_batch_size ${BS} \
-s \ # --num_train_epochs 1
--output_dir $PWD \
--mem_cap ${MEMCAP} \
--model_name_or_path ${MODLE} \
--per_device_train_batch_size ${BS} \
--num_train_epochs 1
from colossalai.amp import AMP_TYPE from colossalai.legacy.amp import AMP_TYPE
# hyper-parameters # hyper-parameters
TRAIN_ITERS = 10 TRAIN_ITERS = 10
......
from colossalai.context.parallel_context import ParallelContext import torch
from colossalai.core import global_context as gpc
from colossalai.legacy.context import ParallelMode
from colossalai.legacy.context.parallel_context import ParallelContext
from colossalai.legacy.core import global_context as gpc
from colossalai.logging import get_dist_logger from colossalai.logging import get_dist_logger
from colossalai.context import ParallelMode
from .datasets.data_samplers import build_pretraining_data_loader
from .datasets.builder import build_train_valid_test_datasets from .datasets.builder import build_train_valid_test_datasets
import torch from .datasets.data_samplers import build_pretraining_data_loader
def cyclic_iter(iter): def cyclic_iter(iter):
...@@ -18,8 +20,7 @@ def build_train_valid_test_data_iterators(train_iters, ...@@ -18,8 +20,7 @@ def build_train_valid_test_data_iterators(train_iters,
eval_interval, eval_interval,
eval_iters, eval_iters,
dataloader_type='single', dataloader_type='single',
**kwargs **kwargs):
):
(train_dataloader, valid_dataloader, test_dataloader) = (None, None, None) (train_dataloader, valid_dataloader, test_dataloader) = (None, None, None)
logger = get_dist_logger() logger = get_dist_logger()
...@@ -42,9 +43,7 @@ def build_train_valid_test_data_iterators(train_iters, ...@@ -42,9 +43,7 @@ def build_train_valid_test_data_iterators(train_iters,
train_samples = train_iters * global_batch_size train_samples = train_iters * global_batch_size
eval_iters_ = (train_iters // eval_interval + 1) * eval_iters eval_iters_ = (train_iters // eval_interval + 1) * eval_iters
test_iters = eval_iters test_iters = eval_iters
train_val_test_num_samples = [train_samples, train_val_test_num_samples = [train_samples, eval_iters_ * global_batch_size, test_iters * global_batch_size]
eval_iters_ * global_batch_size,
test_iters * global_batch_size]
logger.info(' > datasets target sizes (minimum size):') logger.info(' > datasets target sizes (minimum size):')
logger.info(' train: {}'.format(train_val_test_num_samples[0]), ranks=[0]) logger.info(' train: {}'.format(train_val_test_num_samples[0]), ranks=[0])
logger.info(' validation: {}'.format(train_val_test_num_samples[1]), ranks=[0]) logger.info(' validation: {}'.format(train_val_test_num_samples[1]), ranks=[0])
...@@ -56,19 +55,20 @@ def build_train_valid_test_data_iterators(train_iters, ...@@ -56,19 +55,20 @@ def build_train_valid_test_data_iterators(train_iters,
# Build dataloaders. # Build dataloaders.
dp_size = gpc.get_world_size(ParallelMode.DATA) dp_size = gpc.get_world_size(ParallelMode.DATA)
train_dataloader = build_pretraining_data_loader( train_dataloader = build_pretraining_data_loader(train_ds,
train_ds, consumed_samples=0, micro_batch_size=global_batch_size//dp_size) consumed_samples=0,
valid_dataloader = build_pretraining_data_loader( micro_batch_size=global_batch_size // dp_size)
valid_ds, consumed_samples=0, micro_batch_size=global_batch_size//dp_size) valid_dataloader = build_pretraining_data_loader(valid_ds,
test_dataloader = build_pretraining_data_loader(test_ds, 0, micro_batch_size=global_batch_size//dp_size) consumed_samples=0,
micro_batch_size=global_batch_size // dp_size)
test_dataloader = build_pretraining_data_loader(test_ds, 0, micro_batch_size=global_batch_size // dp_size)
# Flags to know if we need to do training/validation/testing. # Flags to know if we need to do training/validation/testing.
do_train = train_dataloader is not None and train_iters > 0 do_train = train_dataloader is not None and train_iters > 0
do_valid = valid_dataloader is not None and eval_iters > 0 do_valid = valid_dataloader is not None and eval_iters > 0
do_test = test_dataloader is not None and eval_iters > 0 do_test = test_dataloader is not None and eval_iters > 0
# Need to broadcast num_tokens and num_type_tokens. # Need to broadcast num_tokens and num_type_tokens.
flags = torch.cuda.LongTensor( flags = torch.cuda.LongTensor([int(do_train), int(do_valid), int(do_test)])
[int(do_train), int(do_valid), int(do_test)])
else: else:
flags = torch.cuda.LongTensor([0, 0, 0]) flags = torch.cuda.LongTensor([0, 0, 0])
......
from colossalai.core import global_context as gpc
from colossalai.context import ParallelMode
import torch import torch
from colossalai.legacy.context import ParallelMode
from colossalai.legacy.core import global_context as gpc
_MAX_DATA_DIM = 5 _MAX_DATA_DIM = 5
...@@ -22,7 +23,8 @@ def _build_key_size_numel_dictionaries(keys, data): ...@@ -22,7 +23,8 @@ def _build_key_size_numel_dictionaries(keys, data):
# Move to GPU and broadcast. # Move to GPU and broadcast.
sizes_cuda = torch.cuda.LongTensor(sizes) sizes_cuda = torch.cuda.LongTensor(sizes)
torch.distributed.broadcast(sizes_cuda, gpc.get_ranks_in_group(ParallelMode.TENSOR)[0], torch.distributed.broadcast(sizes_cuda,
gpc.get_ranks_in_group(ParallelMode.TENSOR)[0],
group=gpc.get_group(ParallelMode.TENSOR)) group=gpc.get_group(ParallelMode.TENSOR))
# Move back to cpu and unpack. # Move back to cpu and unpack.
...@@ -60,19 +62,15 @@ def broadcast_data(keys, data, datatype): ...@@ -60,19 +62,15 @@ def broadcast_data(keys, data, datatype):
""" """
# Build (key, size) and (key, number of elements) dictionaries along # Build (key, size) and (key, number of elements) dictionaries along
# with the total number of elements on all ranks. # with the total number of elements on all ranks.
key_size, key_numel, total_numel = _build_key_size_numel_dictionaries(keys, key_size, key_numel, total_numel = _build_key_size_numel_dictionaries(keys, data)
data)
# Pack on rank zero. # Pack on rank zero.
if not gpc.is_initialized(ParallelMode.TENSOR) or gpc.get_local_rank(ParallelMode.TENSOR) == 0: if not gpc.is_initialized(ParallelMode.TENSOR) or gpc.get_local_rank(ParallelMode.TENSOR) == 0:
# Check that all keys have the same data type. # Check that all keys have the same data type.
# Flatten the data associated with the keys # Flatten the data associated with the keys
flatten_data = torch.cat( flatten_data = torch.cat([data[key].contiguous().view(-1) for key in keys], dim=0).cuda()
[data[key].contiguous().view(-1) for key in keys], dim=0).cuda()
else: else:
flatten_data = torch.empty(total_numel, flatten_data = torch.empty(total_numel, device=torch.cuda.current_device(), dtype=datatype)
device=torch.cuda.current_device(),
dtype=datatype)
# Broadcast # Broadcast
torch.distributed.broadcast(flatten_data, torch.distributed.broadcast(flatten_data,
...@@ -139,7 +137,7 @@ def get_batch_for_sequence_parallel(data_iterator): ...@@ -139,7 +137,7 @@ def get_batch_for_sequence_parallel(data_iterator):
seq_length = data_b['text'].size(1) seq_length = data_b['text'].size(1)
sub_seq_length = seq_length // local_world_size sub_seq_length = seq_length // local_world_size
sub_seq_start = local_rank * sub_seq_length sub_seq_start = local_rank * sub_seq_length
sub_seq_end = (local_rank+1) * sub_seq_length sub_seq_end = (local_rank + 1) * sub_seq_length
# #
# # Unpack. # # Unpack.
tokens = data_b['text'][:, sub_seq_start:sub_seq_end].long() tokens = data_b['text'][:, sub_seq_start:sub_seq_end].long()
...@@ -156,10 +154,9 @@ class SequenceParallelDataIterator: ...@@ -156,10 +154,9 @@ class SequenceParallelDataIterator:
def __init__(self, data_iter): def __init__(self, data_iter):
self.data_iter = data_iter self.data_iter = data_iter
def __iter__(self): def __iter__(self):
return self.data_iter return self.data_iter
def __next__(self): def __next__(self):
return get_batch_for_sequence_parallel(self.data_iter) return get_batch_for_sequence_parallel(self.data_iter)
\ No newline at end of file
...@@ -21,8 +21,8 @@ import numpy as np ...@@ -21,8 +21,8 @@ import numpy as np
import torch import torch
from torch.utils.data import Dataset from torch.utils.data import Dataset
from colossalai.context import ParallelMode from colossalai.legacy.context import ParallelMode
from colossalai.core import global_context as gpc from colossalai.legacy.core import global_context as gpc
from colossalai.logging import get_dist_logger from colossalai.logging import get_dist_logger
from ..tokenizer import get_tokenizer from ..tokenizer import get_tokenizer
......
...@@ -14,10 +14,12 @@ ...@@ -14,10 +14,12 @@
# limitations under the License. # limitations under the License.
"""Dataloaders.""" """Dataloaders."""
import torch
import random import random
from colossalai.core import global_context as gpc
from colossalai.context import ParallelMode import torch
from colossalai.legacy.context import ParallelMode
from colossalai.legacy.core import global_context as gpc
def build_pretraining_data_loader(dataset, consumed_samples, micro_batch_size, dataloader_type='single', num_workers=0): def build_pretraining_data_loader(dataset, consumed_samples, micro_batch_size, dataloader_type='single', num_workers=0):
......
...@@ -12,13 +12,12 @@ ...@@ -12,13 +12,12 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""Megatron tokenizers.""" """Megatron tokenizers."""
from abc import ABC from abc import ABC, abstractmethod
from abc import abstractmethod
from colossalai.core import global_context as gpc from colossalai.legacy.context import ParallelMode
from colossalai.context import ParallelMode from colossalai.legacy.core import global_context as gpc
from .bert_tokenization import FullTokenizer as FullBertTokenizer from .bert_tokenization import FullTokenizer as FullBertTokenizer
...@@ -26,18 +25,13 @@ from .bert_tokenization import FullTokenizer as FullBertTokenizer ...@@ -26,18 +25,13 @@ from .bert_tokenization import FullTokenizer as FullBertTokenizer
def build_tokenizer(vocab_file, tokenizer_type, vocab_extra_ids=0): def build_tokenizer(vocab_file, tokenizer_type, vocab_extra_ids=0):
"""Initialize tokenizer.""" """Initialize tokenizer."""
if not gpc.is_initialized(ParallelMode.GLOBAL) or gpc.get_global_rank() == 0: if not gpc.is_initialized(ParallelMode.GLOBAL) or gpc.get_global_rank() == 0:
print('> building {} tokenizer ...'.format(tokenizer_type), print('> building {} tokenizer ...'.format(tokenizer_type), flush=True)
flush=True)
# Select and instantiate the tokenizer. # Select and instantiate the tokenizer.
if tokenizer_type == 'BertWordPieceLowerCase': if tokenizer_type == 'BertWordPieceLowerCase':
tokenizer = _BertWordPieceTokenizer(vocab_file=vocab_file, tokenizer = _BertWordPieceTokenizer(vocab_file=vocab_file, lower_case=True, vocab_extra_ids=vocab_extra_ids)
lower_case=True,
vocab_extra_ids=vocab_extra_ids)
elif tokenizer_type == 'BertWordPieceCase': elif tokenizer_type == 'BertWordPieceCase':
tokenizer = _BertWordPieceTokenizer(vocab_file=vocab_file, tokenizer = _BertWordPieceTokenizer(vocab_file=vocab_file, lower_case=False, vocab_extra_ids=vocab_extra_ids)
lower_case=False,
vocab_extra_ids=vocab_extra_ids)
else: else:
raise NotImplementedError('{} tokenizer is not ' raise NotImplementedError('{} tokenizer is not '
'implemented.'.format(tokenizer_type)) 'implemented.'.format(tokenizer_type))
...@@ -62,8 +56,8 @@ def _vocab_size_with_padding(orig_vocab_size, make_vocab_size_divisible_by=128): ...@@ -62,8 +56,8 @@ def _vocab_size_with_padding(orig_vocab_size, make_vocab_size_divisible_by=128):
after += 1 after += 1
if not gpc.is_initialized(ParallelMode.GLOBAL) or gpc.get_global_rank() == 0: if not gpc.is_initialized(ParallelMode.GLOBAL) or gpc.get_global_rank() == 0:
print(' > padded vocab (size: {}) with {} dummy tokens ' print(' > padded vocab (size: {}) with {} dummy tokens '
'(new size: {})'.format( '(new size: {})'.format(orig_vocab_size, after - orig_vocab_size, after),
orig_vocab_size, after - orig_vocab_size, after), flush=True) flush=True)
return after return after
...@@ -142,8 +136,7 @@ class _BertWordPieceTokenizer(AbstractTokenizer): ...@@ -142,8 +136,7 @@ class _BertWordPieceTokenizer(AbstractTokenizer):
self._additional_special_tokens = [] self._additional_special_tokens = []
# (dsachan) Add BOS and EOS tokens # (dsachan) Add BOS and EOS tokens
SPECIAL_TOKENS = {'eos_token': '[EOS]', SPECIAL_TOKENS = {'eos_token': '[EOS]', 'bos_token': '[BOS]'}
'bos_token': '[BOS]'}
self._bos_token = '[BOS]' self._bos_token = '[BOS]'
self.add_token(self._bos_token) self.add_token(self._bos_token)
self._bos_token_id = self.vocab.get(self._bos_token) self._bos_token_id = self.vocab.get(self._bos_token)
...@@ -155,8 +148,7 @@ class _BertWordPieceTokenizer(AbstractTokenizer): ...@@ -155,8 +148,7 @@ class _BertWordPieceTokenizer(AbstractTokenizer):
# (dsachan) Add additional special tokens # (dsachan) Add additional special tokens
# These can be used as sentinel tokens in T5 model inputs # These can be used as sentinel tokens in T5 model inputs
additional_special_tokens = [] additional_special_tokens = []
additional_special_tokens.extend( additional_special_tokens.extend(["<extra_id_{}>".format(i) for i in range(vocab_extra_ids)])
["<extra_id_{}>".format(i) for i in range(vocab_extra_ids)])
self.add_additional_special_tokens(additional_special_tokens) self.add_additional_special_tokens(additional_special_tokens)
def add_token(self, token): def add_token(self, token):
......
import torch import torch
import torch.distributed as dist
import torch.nn as nn import torch.nn as nn
from colossalai.core import global_context as gpc
from colossalai.context import ParallelMode
from colossalai.logging import get_dist_logger
import torch.nn.functional as F import torch.nn.functional as F
import torch.distributed as dist
from colossalai.legacy.context import ParallelMode
from colossalai.legacy.core import global_context as gpc
from colossalai.logging import get_dist_logger
from .cross_entropy import vocab_cross_entropy from .cross_entropy import vocab_cross_entropy
class BertLoss(nn.Module): class BertLoss(nn.Module):
def forward(self, def forward(self, lm_loss, sop_logits, loss_mask, sentence_order):
lm_loss,
sop_logits,
loss_mask,
sentence_order):
lm_loss_ = lm_loss.float() lm_loss_ = lm_loss.float()
loss_mask = loss_mask.float() loss_mask = loss_mask.float()
loss_mask_sum = loss_mask.sum() loss_mask_sum = loss_mask.sum()
lm_loss = torch.sum( lm_loss = torch.sum(lm_loss_.view(-1) * loss_mask.reshape(-1))
lm_loss_.view(-1) * loss_mask.reshape(-1))
lm_loss /= loss_mask_sum lm_loss /= loss_mask_sum
torch.distributed.all_reduce( torch.distributed.all_reduce(lm_loss, group=gpc.get_group(ParallelMode.SEQUENCE))
lm_loss,
group=gpc.get_group(ParallelMode.SEQUENCE)
)
if sop_logits is not None: if sop_logits is not None:
sop_loss = F.cross_entropy(sop_logits.view(-1, 2).float(), sop_loss = F.cross_entropy(sop_logits.view(-1, 2).float(), sentence_order.view(-1), ignore_index=-1)
sentence_order.view(-1),
ignore_index=-1)
sop_loss = sop_loss.float() sop_loss = sop_loss.float()
loss = lm_loss + sop_loss * gpc.get_world_size(ParallelMode.SEQUENCE) loss = lm_loss + sop_loss * gpc.get_world_size(ParallelMode.SEQUENCE)
else: else:
......
from colossalai.context.parallel_mode import ParallelMode
import torch import torch
from torch.cuda.amp import custom_bwd, custom_fwd from torch.cuda.amp import custom_bwd, custom_fwd
from colossalai.legacy.context.parallel_mode import ParallelMode
class _VocabCrossEntropy(torch.autograd.Function): class _VocabCrossEntropy(torch.autograd.Function):
...@@ -24,8 +25,7 @@ class _VocabCrossEntropy(torch.autograd.Function): ...@@ -24,8 +25,7 @@ class _VocabCrossEntropy(torch.autograd.Function):
# [*, partition-vocab-size] and target to a 1-D tensor of size [*]. # [*, partition-vocab-size] and target to a 1-D tensor of size [*].
logits_2d = vocab_parallel_logits.view(-1, vocab_parallel_logits.size(-1)) logits_2d = vocab_parallel_logits.view(-1, vocab_parallel_logits.size(-1))
masked_target_1d = masked_target.view(-1) masked_target_1d = masked_target.view(-1)
arange_1d = torch.arange(start=0, end=logits_2d.size()[0], arange_1d = torch.arange(start=0, end=logits_2d.size()[0], device=logits_2d.device)
device=logits_2d.device)
predicted_logits_1d = logits_2d[arange_1d, masked_target_1d] predicted_logits_1d = logits_2d[arange_1d, masked_target_1d]
predicted_logits_1d = predicted_logits_1d.clone().contiguous() predicted_logits_1d = predicted_logits_1d.clone().contiguous()
predicted_logits = predicted_logits_1d.view_as(target) predicted_logits = predicted_logits_1d.view_as(target)
...@@ -58,10 +58,8 @@ class _VocabCrossEntropy(torch.autograd.Function): ...@@ -58,10 +58,8 @@ class _VocabCrossEntropy(torch.autograd.Function):
grad_2d = grad_input.view(-1, partition_vocab_size) grad_2d = grad_input.view(-1, partition_vocab_size)
# Add the gradient from matching classes. # Add the gradient from matching classes.
arange_1d = torch.arange(start=0, end=grad_2d.size()[0], arange_1d = torch.arange(start=0, end=grad_2d.size()[0], device=grad_2d.device)
device=grad_2d.device) grad_2d[arange_1d, masked_target_1d] -= (1.0 - target_mask.view(-1).float())
grad_2d[arange_1d, masked_target_1d] -= (
1.0 - target_mask.view(-1).float())
# Finally elementwise multiplication with the output gradients. # Finally elementwise multiplication with the output gradients.
grad_input.mul_(grad_output.unsqueeze(dim=-1)) grad_input.mul_(grad_output.unsqueeze(dim=-1))
......
...@@ -3,13 +3,13 @@ import inspect ...@@ -3,13 +3,13 @@ import inspect
import torch import torch
import torch.nn as nn import torch.nn as nn
from colossalai.context import ParallelMode
from colossalai.context.parallel_mode import ParallelMode
from colossalai.core import global_context as gpc
from colossalai.kernel import LayerNorm from colossalai.kernel import LayerNorm
from colossalai.legacy.context import ParallelMode
from colossalai.legacy.context.parallel_mode import ParallelMode
from colossalai.legacy.core import global_context as gpc
from colossalai.legacy.nn.layer.wrapper import PipelineSharedModuleWrapper from colossalai.legacy.nn.layer.wrapper import PipelineSharedModuleWrapper
from colossalai.legacy.pipeline.utils import partition_uniform
from colossalai.logging import get_dist_logger from colossalai.logging import get_dist_logger
from colossalai.pipeline.utils import partition_uniform
from .layers import BertDualHead, BertLayer, Embedding, PreProcessor, VocabEmbedding from .layers import BertDualHead, BertLayer, Embedding, PreProcessor, VocabEmbedding
from .layers.init_method import init_normal, output_init_normal from .layers.init_method import init_normal, output_init_normal
......
import colossalai
import torch import torch
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
from .pooler import Pooler
from .linear import Linear
from .embedding import VocabEmbedding
from colossalai.core import global_context as gpc
from colossalai.context import ParallelMode
from colossalai.kernel import LayerNorm
from loss_func.cross_entropy import vocab_cross_entropy from loss_func.cross_entropy import vocab_cross_entropy
import colossalai
from colossalai.kernel import LayerNorm
from colossalai.legacy.context import ParallelMode
from colossalai.legacy.core import global_context as gpc
from .embedding import VocabEmbedding
from .linear import Linear
from .pooler import Pooler
class BertLMHead(nn.Module): class BertLMHead(nn.Module):
"""Masked LM head for Bert """Masked LM head for Bert
...@@ -19,10 +21,11 @@ class BertLMHead(nn.Module): ...@@ -19,10 +21,11 @@ class BertLMHead(nn.Module):
layernorm_epsilon: tolerance for layer norm divisions layernorm_epsilon: tolerance for layer norm divisions
""" """
def __init__(self, def __init__(
vocab_size, self,
hidden_size, vocab_size,
): hidden_size,
):
super(BertLMHead, self).__init__() super(BertLMHead, self).__init__()
self.bias = torch.nn.Parameter(torch.zeros(vocab_size)) self.bias = torch.nn.Parameter(torch.zeros(vocab_size))
......
from colossalai.context.parallel_mode import ParallelMode
import torch import torch
import torch.nn as nn import torch.nn as nn
from colossalai.core import global_context as gpc
from colossalai.legacy.context.parallel_mode import ParallelMode
from colossalai.legacy.core import global_context as gpc
class PreProcessor(nn.Module): class PreProcessor(nn.Module):
...@@ -14,8 +15,8 @@ class PreProcessor(nn.Module): ...@@ -14,8 +15,8 @@ class PreProcessor(nn.Module):
# Create position ids # Create position ids
seq_length = token_ids.size(1) seq_length = token_ids.size(1)
local_rank = gpc.get_local_rank(ParallelMode.SEQUENCE) local_rank = gpc.get_local_rank(ParallelMode.SEQUENCE)
position_ids = torch.arange(seq_length*local_rank, position_ids = torch.arange(seq_length * local_rank,
seq_length * (local_rank+1), seq_length * (local_rank + 1),
dtype=torch.long, dtype=torch.long,
device=token_ids.device) device=token_ids.device)
position_ids = position_ids.unsqueeze(0).expand_as(token_ids) position_ids = position_ids.unsqueeze(0).expand_as(token_ids)
......
#!/bin/bash #!/bin/bash
set -euxo pipefail set -euxo pipefail
pip install -r requirements.txt echo "this test is outdated"
# pip install -r requirements.txt
# run test # run test
colossalai run --nproc_per_node 4 train.py # colossalai run --nproc_per_node 4 train.py
...@@ -8,14 +8,15 @@ from lr_scheduler import AnnealingLR ...@@ -8,14 +8,15 @@ from lr_scheduler import AnnealingLR
from model.bert import BertForPretrain, build_pipeline_bert from model.bert import BertForPretrain, build_pipeline_bert
import colossalai import colossalai
from colossalai.amp import AMP_TYPE
from colossalai.context.parallel_mode import ParallelMode
from colossalai.core import global_context as gpc
from colossalai.kernel import LayerNorm from colossalai.kernel import LayerNorm
from colossalai.legacy.amp import AMP_TYPE
from colossalai.legacy.context.parallel_mode import ParallelMode
from colossalai.legacy.core import global_context as gpc
from colossalai.legacy.engine.schedule import PipelineSchedule from colossalai.legacy.engine.schedule import PipelineSchedule
from colossalai.legacy.utils import is_using_pp
from colossalai.logging import get_dist_logger from colossalai.logging import get_dist_logger
from colossalai.nn.optimizer import FusedAdam from colossalai.nn.optimizer import FusedAdam
from colossalai.utils import MultiTimer, is_using_pp from colossalai.utils import MultiTimer
def process_batch_data(batch_data): def process_batch_data(batch_data):
......
from torchvision.models import resnet18
from .registry import non_distributed_component_funcs
from pathlib import Path
import os import os
from pathlib import Path
import torch import torch
from torchvision.transforms import transforms
from torchvision.datasets import CIFAR10 from torchvision.datasets import CIFAR10
from colossalai.utils import get_dataloader from torchvision.models import resnet18
from torchvision.transforms import transforms
from colossalai.legacy.utils import get_dataloader
from .registry import non_distributed_component_funcs
def get_cifar10_dataloader(train): def get_cifar10_dataloader(train):
......
...@@ -6,12 +6,12 @@ import torch.fx ...@@ -6,12 +6,12 @@ import torch.fx
import torchvision.models as tm import torchvision.models as tm
import colossalai import colossalai
from colossalai.core import global_context as gpc
from colossalai.fx import ColoGraphModule, ColoTracer from colossalai.fx import ColoGraphModule, ColoTracer
from colossalai.fx._compatibility import is_compatible_with_meta from colossalai.fx._compatibility import is_compatible_with_meta
# from colossalai.fx.passes.algorithms import solver_rotor # from colossalai.fx.passes.algorithms import solver_rotor
# from colossalai.fx.passes.algorithms.operation import Sequence # from colossalai.fx.passes.algorithms.operation import Sequence
from colossalai.fx.passes.meta_info_prop import MetaInfoProp from colossalai.fx.passes.meta_info_prop import MetaInfoProp
from colossalai.legacy.core import global_context as gpc
from colossalai.testing import rerun_if_address_is_in_use, spawn from colossalai.testing import rerun_if_address_is_in_use, spawn
if is_compatible_with_meta(): if is_compatible_with_meta():
......
...@@ -8,12 +8,12 @@ import torchvision.models as tm ...@@ -8,12 +8,12 @@ import torchvision.models as tm
from torch.fx import GraphModule from torch.fx import GraphModule
import colossalai import colossalai
from colossalai.core import global_context as gpc
from colossalai.fx import ColoTracer from colossalai.fx import ColoTracer
from colossalai.fx._compatibility import is_compatible_with_meta from colossalai.fx._compatibility import is_compatible_with_meta
from colossalai.fx.graph_module import ColoGraphModule from colossalai.fx.graph_module import ColoGraphModule
# from colossalai.fx.passes.algorithms import chen_greedy, solver_rotor # from colossalai.fx.passes.algorithms import chen_greedy, solver_rotor
from colossalai.fx.passes.meta_info_prop import MetaInfoProp from colossalai.fx.passes.meta_info_prop import MetaInfoProp
from colossalai.legacy.core import global_context as gpc
from colossalai.testing import rerun_if_address_is_in_use, spawn from colossalai.testing import rerun_if_address_is_in_use, spawn
if is_compatible_with_meta(): if is_compatible_with_meta():
......
...@@ -13,10 +13,9 @@ from colossalai.device.device_mesh import DeviceMesh ...@@ -13,10 +13,9 @@ from colossalai.device.device_mesh import DeviceMesh
from colossalai.initialize import launch from colossalai.initialize import launch
from colossalai.logging import disable_existing_loggers from colossalai.logging import disable_existing_loggers
from colossalai.nn.optimizer import HybridAdam from colossalai.nn.optimizer import HybridAdam
from colossalai.tensor.process_group import ProcessGroup
from colossalai.testing import assert_close, rerun_if_address_is_in_use, run_on_environment_flag, spawn from colossalai.testing import assert_close, rerun_if_address_is_in_use, run_on_environment_flag, spawn
from colossalai.utils import get_current_device from colossalai.utils import get_current_device
from colossalai.zero import post_process_colo_init_ctx, zero_model_wrapper, zero_optim_wrapper from colossalai.zero import zero_model_wrapper, zero_optim_wrapper
class MLP(torch.nn.Module): class MLP(torch.nn.Module):
...@@ -70,14 +69,12 @@ def check_auto_parallel_with_gemini(rank, world_size, port): ...@@ -70,14 +69,12 @@ def check_auto_parallel_with_gemini(rank, world_size, port):
print(strategy) print(strategy)
print('=' * msg_length) print('=' * msg_length)
dp_process_group = ProcessGroup(rank=rank, ranks=[0, 1, 2, 3], tp_degree=2, dp_degree=2)
gemini_config = dict(strict_ddp_mode=False, gemini_config = dict(strict_ddp_mode=False,
device=get_current_device(), device=get_current_device(),
placement_policy='cpu', placement_policy='cpu',
pin_memory=True, pin_memory=True,
search_range_m=128) search_range_m=128)
post_process_colo_init_ctx(gm, device=get_current_device(), default_pg=dp_process_group)
gm = zero_model_wrapper(gm, zero_stage=3, gemini_config=gemini_config) gm = zero_model_wrapper(gm, zero_stage=3, gemini_config=gemini_config)
optimizer = HybridAdam(gm.parameters(), betas=(0, 0)) optimizer = HybridAdam(gm.parameters(), betas=(0, 0))
optimizer = zero_optim_wrapper(gm, optimizer, initial_scale=1) optimizer = zero_optim_wrapper(gm, optimizer, initial_scale=1)
......
...@@ -6,9 +6,9 @@ import torch.fx ...@@ -6,9 +6,9 @@ import torch.fx
import colossalai import colossalai
from colossalai.autochunk.autochunk_codegen import AUTOCHUNK_AVAILABLE from colossalai.autochunk.autochunk_codegen import AUTOCHUNK_AVAILABLE
from colossalai.autochunk.utils import flat_list from colossalai.autochunk.utils import flat_list
from colossalai.core import global_context as gpc
from colossalai.fx.graph_module import ColoGraphModule from colossalai.fx.graph_module import ColoGraphModule
from colossalai.fx.passes.meta_info_prop import MetaInfoProp from colossalai.fx.passes.meta_info_prop import MetaInfoProp
from colossalai.legacy.core import global_context as gpc
from colossalai.testing import free_port from colossalai.testing import free_port
if AUTOCHUNK_AVAILABLE: if AUTOCHUNK_AVAILABLE:
......
...@@ -5,9 +5,9 @@ import torch.fx ...@@ -5,9 +5,9 @@ import torch.fx
import colossalai import colossalai
from colossalai.autochunk.autochunk_codegen import AUTOCHUNK_AVAILABLE from colossalai.autochunk.autochunk_codegen import AUTOCHUNK_AVAILABLE
from colossalai.core import global_context as gpc
from colossalai.fx.graph_module import ColoGraphModule from colossalai.fx.graph_module import ColoGraphModule
from colossalai.fx.passes.meta_info_prop import MetaInfoProp from colossalai.fx.passes.meta_info_prop import MetaInfoProp
from colossalai.legacy.core import global_context as gpc
if AUTOCHUNK_AVAILABLE: if AUTOCHUNK_AVAILABLE:
from colossalai.autochunk.autochunk_codegen import AutoChunkCodeGen from colossalai.autochunk.autochunk_codegen import AutoChunkCodeGen
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment