Commit 404ecbdc authored by zbian's avatar zbian
Browse files

Migrated project

parent 2ebaefc5
# modified from https://github.com/lucidrains/mlp-mixer-pytorch/blob/main/mlp_mixer_pytorch/mlp_mixer_pytorch.py
from functools import partial
from colossalai.context import ParallelMode
from colossalai.registry import MODELS
from torch import nn
from colossalai import nn as col_nn
from colossalai.nn.layer.parallel_3d._utils import get_depth_from_env
from einops.layers.torch import Rearrange, Reduce
__all__ = [
'MLPMixer',
]
class PreNormResidual(nn.Module):
def __init__(self, dim, fn, depth_3d):
super().__init__()
self.fn = fn
self.norm = col_nn.LayerNorm3D(
dim, depth_3d, ParallelMode.PARALLEL_3D_INPUT, ParallelMode.PARALLEL_3D_WEIGHT)
def forward(self, x):
return self.fn(self.norm(x)) + x
def FeedForward(dim, depth_3d, expansion_factor=4, dropout=0., dense=None):
if dense is None:
dense = partial(col_nn.Linear3D, depth=depth_3d, input_parallel_mode=ParallelMode.PARALLEL_3D_INPUT,
weight_parallel_mode=ParallelMode.PARALLEL_3D_WEIGHT)
return nn.Sequential(
dense(dim, dim * expansion_factor),
nn.GELU(),
nn.Dropout(dropout),
dense(dim * expansion_factor, dim),
nn.Dropout(dropout)
)
@MODELS.register_module
def MLPMixer(image_size, channels, patch_size, dim, depth, num_classes, expansion_factor=4, dropout=0.):
assert (image_size % patch_size) == 0, 'image must be divisible by patch size'
num_patches = (image_size // patch_size) ** 2
depth_3d = get_depth_from_env()
linear = partial(col_nn.Linear3D, depth=depth_3d, input_parallel_mode=ParallelMode.PARALLEL_3D_INPUT,
weight_parallel_mode=ParallelMode.PARALLEL_3D_WEIGHT)
norm_layer = partial(col_nn.LayerNorm3D, depth=depth_3d, input_parallel_mode=ParallelMode.PARALLEL_3D_INPUT,
weight_parallel_mode=ParallelMode.PARALLEL_3D_WEIGHT)
chan_first, chan_last = partial(nn.Conv1d, kernel_size=1), linear
return nn.Sequential(
Rearrange('b c (h p1) (w p2) -> b (h w) (p1 p2 c)',
p1=patch_size, p2=patch_size),
linear((patch_size ** 2) * channels, dim),
*[nn.Sequential(
PreNormResidual(dim, FeedForward(
num_patches, expansion_factor, dropout, chan_first)),
PreNormResidual(dim, FeedForward(
dim, expansion_factor, dropout, chan_last))
) for _ in range(depth)],
norm_layer(dim),
Reduce('b n c -> b c', 'mean'),
linear(dim, num_classes)
)
from .parallel_2d import *
from .parallel_3d import *
from .vit import *
\ No newline at end of file
from colossalai.context import ParallelMode, seed
from colossalai import nn as clsl_nn
from colossalai.registry import MODELS
from torch import nn
import torch
__all__ = [
'VisionTransformer2D',
'vit_tiny_2d_patch4_32',
'vit_tiny_2d_patch16_224',
'vit_tiny_2d_patch16_384',
'vit_small_2d_patch16_224',
'vit_small_2d_patch16_384',
'vit_small_2d_patch32_224',
'vit_small_2d_patch32_384',
'vit_base_2d_patch16_224',
'vit_base_2d_patch16_384',
'vit_base_2d_patch32_224',
'vit_base_2d_patch32_384',
'vit_large_2d_patch16_224',
'vit_large_2d_patch16_384',
'vit_large_2d_patch32_224',
'vit_large_2d_patch32_384',
]
class ViTBlock2D(nn.Module):
def __init__(self,
dim: int,
num_heads: int,
mlp_ratio: int = 4,
drop: float = 0.,
attn_drop: float = 0.,
drop_path: float = 0.,
act_layer: str = 'gelu'):
super().__init__()
self.norm1 = clsl_nn.LayerNorm2D(dim, eps=1e-6)
self.attn = clsl_nn.ViTSelfAttention2D(dim, num_heads, attn_drop, drop)
self.drop_path = clsl_nn.VanillaViTDropPath(drop_path) if drop_path > 0. \
else nn.Identity()
self.norm2 = clsl_nn.LayerNorm2D(dim, eps=1e-6)
self.mlp = clsl_nn.ViTMLP2D(dim, mlp_ratio, act_layer, drop)
def forward(self, x):
y = self.attn(self.norm1(x))
with seed(ParallelMode.TENSOR):
x = x + self.drop_path(y)
y = self.mlp(self.norm2(x))
with seed(ParallelMode.TENSOR):
x = x + self.drop_path(y)
return x
@MODELS.register_module
class VisionTransformer2D(nn.Module):
def __init__(self,
img_size: int = 224,
patch_size: int = 16,
in_chans: int = 3,
num_classes: int = 1000,
embed_dim: int = 768,
depth: int = 12,
num_heads: int = 12,
mlp_ratio: int = 4,
drop_rate: float = 0.,
attn_drop_rate: float = 0.,
drop_path_rate: float = 0.,
act_layer: str = 'gelu'):
super().__init__()
self.num_classes = num_classes
self.num_features = self.embed_dim = embed_dim
self.patch_embed = clsl_nn.ViTPatchEmbedding2D(
img_size, patch_size, embed_dim, in_chans
)
self.splitter = clsl_nn.ViTInputSplitter2D()
self.token_fuser = clsl_nn.ViTTokenFuser2D(
img_size, patch_size, embed_dim, drop_rate
)
dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)]
self.blocks = nn.Sequential(*[
ViTBlock2D(embed_dim, num_heads, mlp_ratio, drop_rate,
attn_drop_rate, dpr[i], act_layer)
for i in range(depth)
])
self.norm = clsl_nn.LayerNorm2D(embed_dim, eps=1e-6)
self.head = clsl_nn.ViTHead2D(self.num_features, num_classes) if num_classes > 0 \
else nn.Identity()
self.init_weights()
def init_weights(self):
pass
def forward(self, x):
x = self.patch_embed(x)
x = self.splitter(x)
x = self.token_fuser(x)
x = self.blocks(x)
x = self.norm(x)
x = self.head(x)
return x
def _create_vit_model(**model_kwargs):
model = VisionTransformer2D(**model_kwargs)
return model
@MODELS.register_module
def vit_tiny_2d_patch4_32(**kwargs):
model_kwargs = dict(img_size=32, patch_size=4, embed_dim=512,
depth=6, num_heads=8, **kwargs)
return _create_vit_model(**model_kwargs)
@MODELS.register_module
def vit_tiny_2d_patch16_224(**kwargs):
model_kwargs = dict(patch_size=16, embed_dim=192,
depth=12, num_heads=3, **kwargs)
return _create_vit_model(**model_kwargs)
@MODELS.register_module
def vit_tiny_2d_patch16_384(**kwargs):
model_kwargs = dict(img_size=384, patch_size=16, embed_dim=192,
depth=12, num_heads=3, **kwargs)
return _create_vit_model(**model_kwargs)
@MODELS.register_module
def vit_small_2d_patch16_224(**kwargs):
model_kwargs = dict(patch_size=16, embed_dim=384,
depth=12, num_heads=6, **kwargs)
return _create_vit_model(**model_kwargs)
@MODELS.register_module
def vit_small_2d_patch16_384(**kwargs):
model_kwargs = dict(img_size=384, patch_size=16, embed_dim=384,
depth=12, num_heads=6, **kwargs)
return _create_vit_model(**model_kwargs)
@MODELS.register_module
def vit_small_2d_patch32_224(**kwargs):
model_kwargs = dict(patch_size=32, embed_dim=384,
depth=12, num_heads=6, **kwargs)
return _create_vit_model(**model_kwargs)
@MODELS.register_module
def vit_small_2d_patch32_384(**kwargs):
model_kwargs = dict(img_size=384, patch_size=32, embed_dim=384,
depth=12, num_heads=6, **kwargs)
return _create_vit_model(**model_kwargs)
@MODELS.register_module
def vit_base_2d_patch16_224(**kwargs):
model_kwargs = dict(patch_size=16, embed_dim=768,
depth=12, num_heads=12, **kwargs)
return _create_vit_model(**model_kwargs)
@MODELS.register_module
def vit_base_2d_patch16_384(**kwargs):
model_kwargs = dict(img_size=384, patch_size=16, embed_dim=768,
depth=12, num_heads=12, **kwargs)
return _create_vit_model(**model_kwargs)
@MODELS.register_module
def vit_base_2d_patch32_224(**kwargs):
model_kwargs = dict(patch_size=32, embed_dim=768,
depth=12, num_heads=12, **kwargs)
return _create_vit_model(**model_kwargs)
@MODELS.register_module
def vit_base_2d_patch32_384(**kwargs):
model_kwargs = dict(img_size=384, patch_size=32, embed_dim=768,
depth=12, num_heads=12, **kwargs)
return _create_vit_model(**model_kwargs)
@MODELS.register_module
def vit_large_2d_patch16_224(**kwargs):
model_kwargs = dict(patch_size=16, embed_dim=1024,
depth=24, num_heads=16, **kwargs)
return _create_vit_model(**model_kwargs)
@MODELS.register_module
def vit_large_2d_patch16_384(**kwargs):
model_kwargs = dict(img_size=384, patch_size=16, embed_dim=1024,
depth=24, num_heads=16, **kwargs)
return _create_vit_model(**model_kwargs)
@MODELS.register_module
def vit_large_2d_patch32_224(**kwargs):
model_kwargs = dict(patch_size=32, embed_dim=1024,
depth=24, num_heads=16, **kwargs)
return _create_vit_model(**model_kwargs)
@MODELS.register_module
def vit_large_2d_patch32_384(**kwargs):
model_kwargs = dict(img_size=384, patch_size=32, embed_dim=1024,
depth=24, num_heads=16, **kwargs)
return _create_vit_model(**model_kwargs)
\ No newline at end of file
import torch
from torch import nn
from colossalai import nn as col_nn
from colossalai.context import ParallelMode
from colossalai.registry import MODELS
__all__ = [
'VisionTransformer3D',
'vit_tiny_3d_patch4_32',
'vit_tiny_3d_patch16_224',
'vit_tiny_3d_patch16_384',
'vit_small_3d_patch16_224',
'vit_small_3d_patch16_384',
'vit_small_3d_patch32_224',
'vit_small_3d_patch32_384',
'vit_base_3d_patch16_224',
'vit_base_3d_patch16_384',
'vit_base_3d_patch32_224',
'vit_base_3d_patch32_384',
'vit_large_3d_patch16_224',
'vit_large_3d_patch16_384',
'vit_large_3d_patch32_224',
'vit_large_3d_patch32_384',
]
class ViTBlock3D(nn.Module):
def __init__(self,
dim: int,
num_heads: int,
hidden_dim: int,
drop: float = 0.,
attn_drop: float = 0.,
drop_path: float = 0.):
super().__init__()
self.norm1 = col_nn.LayerNorm3D(
dim, ParallelMode.PARALLEL_3D_INPUT, ParallelMode.PARALLEL_3D_WEIGHT, eps=1e-6)
self.attn = col_nn.ViTSelfAttention3D(dim, num_heads, attn_drop, drop)
self.drop_path = col_nn.VanillaViTDropPath(
drop_path) if drop_path > 0. else nn.Identity()
self.norm2 = col_nn.LayerNorm3D(dim, ParallelMode.PARALLEL_3D_INPUT, ParallelMode.PARALLEL_3D_WEIGHT, eps=1e-6)
self.mlp = col_nn.ViTMLP3D(hidden_dim, 1, drop, 'gelu')
def forward(self, x):
x = x + self.drop_path(self.attn(self.norm1(x)))
x = x + self.drop_path(self.mlp(self.norm2(x)))
return x
@MODELS.register_module
class VisionTransformer3D(nn.Module):
def __init__(self,
img_size: int = 224,
patch_size: int = 16,
in_chans: int = 3,
num_classes: int = 1000,
depth: int = 12,
num_heads: int = 12,
embed_dim: int = 768,
hidden_dim: int = 3072,
drop_rate: float = 0.,
attn_drop_rate: float = 0.,
drop_path_rate: float = 0.):
super().__init__()
self.num_classes = num_classes
self.num_features = self.embed_dim = embed_dim
self.patch_embed = col_nn.ViTPatchEmbedding3D(
img_size,
patch_size,
in_chans,
embed_dim,
drop_rate,
)
# stochastic depth decay rule
dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)]
self.blocks = nn.Sequential(*[
ViTBlock3D(embed_dim, num_heads, hidden_dim,
drop_rate, attn_drop_rate, dpr[i])
for i in range(depth)
])
self.norm = col_nn.LayerNorm3D(embed_dim, ParallelMode.PARALLEL_3D_INPUT,
ParallelMode.PARALLEL_3D_WEIGHT)
self.head = col_nn.ViTHead3D(hidden_dim, num_classes)
self.init_weights()
def init_weights(self):
pass
def forward(self, x):
x = self.patch_embed(x)
x = self.blocks(x)
x = self.norm(x)
x = self.head(x)
return x
def _create_vit_model(**model_kwargs):
model = VisionTransformer3D(**model_kwargs)
return model
@MODELS.register_module
def vit_tiny_3d_patch4_32(**kwargs):
model_kwargs = dict(img_size=32, patch_size=4, embed_dim=512,
depth=6, num_heads=8, hidden_dim=512, num_classes=10, **kwargs)
return _create_vit_model(**model_kwargs)
@MODELS.register_module
def vit_tiny_3d_patch16_224(**kwargs):
model_kwargs = dict(patch_size=16, embed_dim=192,
depth=12, num_heads=3, hidden_dim=768, **kwargs)
return _create_vit_model(**model_kwargs)
@MODELS.register_module
def vit_tiny_3d_patch16_384(**kwargs):
model_kwargs = dict(img_size=384, patch_size=16,
embed_dim=192, depth=12, num_heads=3, hidden_dim=768, **kwargs)
return _create_vit_model(**model_kwargs)
@MODELS.register_module
def vit_small_3d_patch16_224(**kwargs):
model_kwargs = dict(patch_size=16, embed_dim=384,
depth=12, num_heads=6, hidden_dim=1536, **kwargs)
return _create_vit_model(**model_kwargs)
@MODELS.register_module
def vit_small_3d_patch16_384(**kwargs):
model_kwargs = dict(img_size=384, patch_size=16,
embed_dim=384, depth=12, num_heads=6, hidden_dim=1536, **kwargs)
return _create_vit_model(**model_kwargs)
@MODELS.register_module
def vit_small_3d_patch32_224(**kwargs):
model_kwargs = dict(patch_size=32, embed_dim=384,
depth=12, num_heads=6, hidden_dim=1536, **kwargs)
return _create_vit_model(**model_kwargs)
@MODELS.register_module
def vit_small_3d_patch32_384(**kwargs):
model_kwargs = dict(img_size=384, patch_size=32,
embed_dim=384, depth=12, num_heads=6, hidden_dim=1536, **kwargs)
return _create_vit_model(**model_kwargs)
@MODELS.register_module
def vit_base_3d_patch16_224(**kwargs):
model_kwargs = dict(patch_size=16, embed_dim=768,
depth=12, num_heads=12, hidden_dim=3072, **kwargs)
return _create_vit_model(**model_kwargs)
@MODELS.register_module
def vit_base_3d_patch16_384(**kwargs):
model_kwargs = dict(img_size=384, patch_size=16,
embed_dim=768, depth=12, num_heads=12, hidden_dim=3072, **kwargs)
return _create_vit_model(**model_kwargs)
@MODELS.register_module
def vit_base_3d_patch32_224(**kwargs):
model_kwargs = dict(patch_size=32, embed_dim=768,
depth=12, num_heads=12, hidden_dim=3072, **kwargs)
return _create_vit_model(**model_kwargs)
@MODELS.register_module
def vit_base_3d_patch32_384(**kwargs):
model_kwargs = dict(img_size=384, patch_size=32,
embed_dim=768, depth=12, num_heads=12, hidden_dim=3072, **kwargs)
return _create_vit_model(**model_kwargs)
@MODELS.register_module
def vit_large_3d_patch16_224(**kwargs):
model_kwargs = dict(patch_size=16, embed_dim=1024,
depth=24, num_heads=16, hidden_dim=4096, **kwargs)
return _create_vit_model(**model_kwargs)
@MODELS.register_module
def vit_large_3d_patch16_384(**kwargs):
model_kwargs = dict(img_size=384, patch_size=16,
embed_dim=1024, depth=24, num_heads=16, hidden_dim=4096, **kwargs)
return _create_vit_model(**model_kwargs)
@MODELS.register_module
def vit_large_3d_patch32_224(**kwargs):
model_kwargs = dict(patch_size=32, embed_dim=1024,
depth=24, num_heads=16, hidden_dim=4096, **kwargs)
return _create_vit_model(**model_kwargs)
@MODELS.register_module
def vit_large_3d_patch32_384(**kwargs):
model_kwargs = dict(img_size=384, patch_size=32,
embed_dim=1024, depth=24, num_heads=16, hidden_dim=4096, **kwargs)
return _create_vit_model(**model_kwargs)
[pytest]
markers =
cpu: tests which can run on CPU
gpu: tests which requires a single GPU
dist: tests which are run in a multi-GPU or multi-machine environment
experiment: tests for experimental features
\ No newline at end of file
pytest
rpyc
matplotlib
\ No newline at end of file
torch>=1.8
torchvision>=0.9
numpy
tqdm
psutil
tensorboardX
packaging
\ No newline at end of file
#!/usr/bin/env sh
main_file=$1
config_file=$2
python $main_file --local_rank $SLURM_PROCID --world_size $SLURM_NPROCS --host $HOST --port 29500 --config $config_file
# how to run this script
# exmaple:
# HOST=IP_ADDR srun ./scripts/slurm_dist_train.sh ./examples/train_vit_2d.py ./configs/vit/vit_2d.py
\ No newline at end of file
import os
import subprocess
import sys
import warnings
import torch
from setuptools import setup, find_packages
from torch.utils.cpp_extension import BuildExtension, CUDAExtension, CUDA_HOME
# ninja build does not work unless include_dirs are abs path
this_dir = os.path.dirname(os.path.abspath(__file__))
def get_cuda_bare_metal_version(cuda_dir):
raw_output = subprocess.check_output(
[cuda_dir + "/bin/nvcc", "-V"], universal_newlines=True)
output = raw_output.split()
release_idx = output.index("release") + 1
release = output[release_idx].split(".")
bare_metal_major = release[0]
bare_metal_minor = release[1][0]
return raw_output, bare_metal_major, bare_metal_minor
if not torch.cuda.is_available():
# https://github.com/NVIDIA/apex/issues/486
# Extension builds after https://github.com/pytorch/pytorch/pull/23408 attempt to query torch.cuda.get_device_capability(),
# which will fail if you are compiling in an environment without visible GPUs (e.g. during an nvidia-docker build command).
print('\nWarning: Torch did not find available GPUs on this system.\n',
'If your intention is to cross-compile, this is not an error.\n'
'By default, Apex will cross-compile for Pascal (compute capabilities 6.0, 6.1, 6.2),\n'
'Volta (compute capability 7.0), Turing (compute capability 7.5),\n'
'and, if the CUDA version is >= 11.0, Ampere (compute capability 8.0).\n'
'If you wish to cross-compile for a single specific architecture,\n'
'export TORCH_CUDA_ARCH_LIST="compute capability" before running setup.py.\n')
if os.environ.get("TORCH_CUDA_ARCH_LIST", None) is None:
_, bare_metal_major, _ = get_cuda_bare_metal_version(CUDA_HOME)
if int(bare_metal_major) == 11:
os.environ["TORCH_CUDA_ARCH_LIST"] = "6.0;6.1;6.2;7.0;7.5;8.0"
else:
os.environ["TORCH_CUDA_ARCH_LIST"] = "6.0;6.1;6.2;7.0;7.5"
print("\n\ntorch.__version__ = {}\n\n".format(torch.__version__))
TORCH_MAJOR = int(torch.__version__.split('.')[0])
TORCH_MINOR = int(torch.__version__.split('.')[1])
if TORCH_MAJOR == 0 and TORCH_MINOR < 4:
raise RuntimeError("Apex requires Pytorch 0.4 or newer.\n" +
"The latest stable release can be obtained from https://pytorch.org/")
cmdclass = {}
ext_modules = []
extras = {}
if "--pyprof" in sys.argv:
string = "\n\nPyprof has been moved to its own dedicated repository and will " + \
"soon be removed from Apex. Please visit\n" + \
"https://github.com/NVIDIA/PyProf\n" + \
"for the latest version."
warnings.warn(string, DeprecationWarning)
with open('requirements.txt') as f:
required_packages = f.read().splitlines()
extras['pyprof'] = required_packages
try:
sys.argv.remove("--pyprof")
except:
pass
else:
warnings.warn(
"Option --pyprof not specified. Not installing PyProf dependencies!")
if "--cuda_ext" in sys.argv:
if TORCH_MAJOR == 0:
raise RuntimeError("--cuda_ext requires Pytorch 1.0 or later, "
"found torch.__version__ = {}".format(torch.__version__))
def get_cuda_bare_metal_version(cuda_dir):
raw_output = subprocess.check_output(
[cuda_dir + "/bin/nvcc", "-V"], universal_newlines=True)
output = raw_output.split()
release_idx = output.index("release") + 1
release = output[release_idx].split(".")
bare_metal_major = release[0]
bare_metal_minor = release[1][0]
return raw_output, bare_metal_major, bare_metal_minor
def check_cuda_torch_binary_vs_bare_metal(cuda_dir):
raw_output, bare_metal_major, bare_metal_minor = get_cuda_bare_metal_version(
cuda_dir)
torch_binary_major = torch.version.cuda.split(".")[0]
torch_binary_minor = torch.version.cuda.split(".")[1]
print("\nCompiling cuda extensions with")
print(raw_output + "from " + cuda_dir + "/bin\n")
if (bare_metal_major != torch_binary_major) or (bare_metal_minor != torch_binary_minor):
raise RuntimeError("Cuda extensions are being compiled with a version of Cuda that does " +
"not match the version used to compile Pytorch binaries. " +
"Pytorch binaries were compiled with Cuda {}.\n".format(torch.version.cuda) +
"In some cases, a minor-version mismatch will not cause later errors: " +
"https://github.com/NVIDIA/apex/pull/323#discussion_r287021798. "
"You can try commenting out this check (at your own risk).")
# Set up macros for forward/backward compatibility hack around
# https://github.com/pytorch/pytorch/commit/4404762d7dd955383acee92e6f06b48144a0742e
# and
# https://github.com/NVIDIA/apex/issues/456
# https://github.com/pytorch/pytorch/commit/eb7b39e02f7d75c26d8a795ea8c7fd911334da7e#diff-4632522f237f1e4e728cb824300403ac
version_ge_1_1 = []
if (TORCH_MAJOR > 1) or (TORCH_MAJOR == 1 and TORCH_MINOR > 0):
version_ge_1_1 = ['-DVERSION_GE_1_1']
version_ge_1_3 = []
if (TORCH_MAJOR > 1) or (TORCH_MAJOR == 1 and TORCH_MINOR > 2):
version_ge_1_3 = ['-DVERSION_GE_1_3']
version_ge_1_5 = []
if (TORCH_MAJOR > 1) or (TORCH_MAJOR == 1 and TORCH_MINOR > 4):
version_ge_1_5 = ['-DVERSION_GE_1_5']
version_dependent_macros = version_ge_1_1 + version_ge_1_3 + version_ge_1_5
if "--cuda_ext" in sys.argv:
sys.argv.remove("--cuda_ext")
if CUDA_HOME is None:
raise RuntimeError(
"--cuda_ext was requested, but nvcc was not found. Are you sure your environment has nvcc available? If you're installing within a container from https://hub.docker.com/r/pytorch/pytorch, only images whose names contain 'devel' will provide nvcc.")
else:
check_cuda_torch_binary_vs_bare_metal(CUDA_HOME)
ext_modules.append(
CUDAExtension(name='colossal_C',
sources=['csrc/colossal_C_frontend.cpp',
'csrc/multi_tensor_sgd_kernel.cu',
'csrc/multi_tensor_scale_kernel.cu',
'csrc/multi_tensor_adam.cu',
'csrc/multi_tensor_l2norm_kernel.cu',
'csrc/multi_tensor_lamb.cu'],
extra_compile_args={'cxx': ['-O3'] + version_dependent_macros,
'nvcc': ['-lineinfo',
'-O3',
# '--resource-usage',
'--use_fast_math'] + version_dependent_macros}))
# Check, if ATen/CUDAGenerator.h is found, otherwise use the new ATen/CUDAGeneratorImpl.h, due to breaking change in https://github.com/pytorch/pytorch/pull/36026
generator_flag = []
torch_dir = torch.__path__[0]
if os.path.exists(os.path.join(torch_dir, 'include', 'ATen', 'CUDAGenerator.h')):
generator_flag = ['-DOLD_GENERATOR']
def fetch_requirements(path):
with open(path, 'r') as fd:
return [r.strip() for r in fd.readlines()]
install_requires = fetch_requirements('requirements/requirements.txt')
setup(
name='colossal-ai',
version='0.0.1-beta',
packages=find_packages(exclude=('csrc',
'tests',
'docs',
'tests',
'*.egg-info',)),
description='An integrated large-scale model training framework with efficient parallelization techniques',
ext_modules=ext_modules,
cmdclass={'build_ext': BuildExtension} if ext_modules else {},
extras_require=extras,
install_requires=install_requires,
)
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
import os
from pathlib import Path
train_data = dict(
dataset=dict(
type='CIFAR10Dataset',
root=Path(os.environ['DATA']),
download=True,
transform_pipeline=[
dict(type='RandomResizedCrop', size=224),
dict(type='RandomHorizontalFlip'),
dict(type='ToTensor'),
dict(type='Normalize', mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
]
),
dataloader=dict(
batch_size=64,
pin_memory=True,
num_workers=4,
sampler=dict(
type='DataParallelSampler',
shuffle=True,
)
)
)
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
from pathlib import Path
import pytest
from colossalai.context.config import Config
@pytest.mark.cpu
def test_load_config():
filename = Path(__file__).parent.joinpath('sample_config.py')
config = Config.from_file(filename)
assert config.train_data, 'cannot access train data as attribute'
assert config.train_data.dataset, 'cannot access grandchild attribute'
assert isinstance(config.train_data.dataset.transform_pipeline[0], dict), \
f'expected attribute transform_pipeline elements to be a dict, but found {type(config.train_data.dataset.transform_pipeline)}'
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
parallel = dict(
pipeline=dict(size=2),
tensor=dict(
size=4,
mode='2d'
)
)
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
parallel = dict(
pipeline=dict(size=2),
tensor=dict(
size=8,
depth=2,
mode='2.5d'
)
)
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
parallel = dict(
pipeline=dict(size=2),
tensor=dict(
size=8,
mode='3d'
)
)
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
from functools import partial
from pathlib import Path
import pytest
import torch.multiprocessing as mp
from colossalai import init_dist
from colossalai.context.parallel_mode import ParallelMode
from colossalai.core import global_context as gpc
CONFIG_PATH = Path(__file__).parent.joinpath('configs/parallel_2d_init.py').absolute()
def check_data_parallel_rank(rank):
if rank in [0, 1, 2, 3, 4, 5, 6, 7]:
assert gpc.get_local_rank(ParallelMode.DATA) == 0
elif rank in [8, 9, 10, 11, 12, 13, 14, 15]:
assert gpc.get_local_rank(ParallelMode.DATA) == 1
def check_pipeline_parallel_rank(rank):
if rank in [0, 1, 2, 3]:
assert gpc.get_local_rank(ParallelMode.PIPELINE) == 0
elif rank in [4, 5, 6, 7]:
assert gpc.get_local_rank(ParallelMode.PIPELINE) == 1
elif rank in [8, 9, 10, 11]:
assert gpc.get_local_rank(ParallelMode.PIPELINE) == 0
elif rank in [12, 13, 14, 15]:
assert gpc.get_local_rank(ParallelMode.PIPELINE) == 1
def check_tensor_parallel_rank(rank):
if rank in [0, 4, 8, 12]:
assert gpc.get_local_rank(ParallelMode.TENSOR) == 0
elif rank in [1, 5, 9, 13]:
assert gpc.get_local_rank(ParallelMode.TENSOR) == 1
elif rank in [2, 6, 10, 14]:
assert gpc.get_local_rank(ParallelMode.TENSOR) == 2
elif rank in [3, 7, 11, 15]:
assert gpc.get_local_rank(ParallelMode.TENSOR) == 3
def check_2d_parallel_rank(rank):
if rank in [0, 4, 8, 12]:
assert gpc.get_local_rank(ParallelMode.PARALLEL_2D_COL) == 0
assert gpc.get_local_rank(ParallelMode.PARALLEL_2D_ROW) == 0
elif rank in [1, 5, 9, 13]:
assert gpc.get_local_rank(ParallelMode.PARALLEL_2D_COL) == 0
assert gpc.get_local_rank(ParallelMode.PARALLEL_2D_ROW) == 1
elif rank in [2, 6, 10, 14]:
assert gpc.get_local_rank(ParallelMode.PARALLEL_2D_COL) == 1
assert gpc.get_local_rank(ParallelMode.PARALLEL_2D_ROW) == 0
elif rank in [3, 7, 11, 15]:
assert gpc.get_local_rank(ParallelMode.PARALLEL_2D_COL) == 1
assert gpc.get_local_rank(ParallelMode.PARALLEL_2D_ROW) == 1
def init_2d(local_rank, world_size, backend, port, host):
dist_args = dict(
config=CONFIG_PATH,
local_rank=local_rank,
world_size=world_size,
backend=backend,
port=port,
host=host
)
init_dist(**dist_args)
check_tensor_parallel_rank(local_rank)
check_data_parallel_rank(local_rank)
check_2d_parallel_rank(local_rank)
check_pipeline_parallel_rank(local_rank)
gpc.destroy()
@pytest.mark.cpu
def test_2d_init():
"""
As no computation or communication is done, we can run this test on CPU.
"""
world_size = 16
test_fn = partial(init_2d,
world_size=world_size,
backend='gloo',
port='29500',
host='localhost'
)
mp.spawn(test_fn, nprocs=world_size)
if __name__ == '__main__':
test_2d_init()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment