Unverified Commit ca4ae52d authored by Frank Lee's avatar Frank Lee Committed by GitHub
Browse files

Set examples as submodule (#162)

* remove examples folder

* added examples as submodule

* update .gitmodules
parent 17ce8569
......@@ -2,3 +2,7 @@
path = benchmark
url = https://github.com/hpcaitech/ColossalAI-Benchmark.git
branch = main
[submodule "examples"]
path = examples
url = https://github.com/FrankLeeeee/ColossalAI-Examples.git
branch = main
Subproject commit 217ac4600172ddbc020596587a0fe1af5e1287e8
# Train ResNet34 on CIFAR10
## Prepare Dataset
In the script, we used CIFAR10 dataset provided by the `torchvision` library. The code snippet is shown below:
```python
train_dataset = CIFAR10(
root=Path(os.environ['DATA']),
download=True,
transform=transforms.Compose(
[
transforms.RandomCrop(size=32, padding=4),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[
0.2023, 0.1994, 0.2010]),
]
)
)
```
Firstly, you need to specify where you want to store your CIFAR10 dataset by setting the environment variable `DATA`.
```bash
export DATA=/path/to/data
# example
# this will store the data in the current directory
export DATA=$PWD/data
```
The `torchvison` module will download the data automatically for you into the specified directory.
## Run training
We provide two examples of training resnet 34 on the CIFAR10 dataset. One example is with engine and the other is
with the trainer. You can invoke the training script by the following command. This batch size and learning rate
are for a single GPU. Thus, in the following command, `nproc_per_node` is 1, which means there is only one process
invoked. If you change `nproc_per_node`, you will have to change the learning rate accordingly as the global batch
size has changed.
```bash
# with engine
python -m torch.distributed.launch --nproc_per_node 1 run_resnet_cifar10_with_engine.py
# with trainer
python -m torch.distributed.launch --nproc_per_node 1 run_resnet_cifar10_with_trainer.py
```
\ No newline at end of file
from colossalai.amp import AMP_TYPE
BATCH_SIZE = 128
NUM_EPOCHS = 200
CONFIG = dict(
fp16=dict(
mode=AMP_TYPE.TORCH
)
)
from pathlib import Path
from colossalai.logging import get_dist_logger
import colossalai
import torch
import os
from colossalai.core import global_context as gpc
from colossalai.utils import get_dataloader
from torchvision import transforms
from colossalai.nn.lr_scheduler import CosineAnnealingLR
from torchvision.datasets import CIFAR10
from torchvision.models import resnet34
from tqdm import tqdm
def main():
colossalai.launch_from_torch(config='./config.py')
logger = get_dist_logger()
# build resnet
model = resnet34(num_classes=10)
# build dataloaders
train_dataset = CIFAR10(
root=Path(os.environ['DATA']),
download=True,
transform=transforms.Compose(
[
transforms.RandomCrop(size=32, padding=4),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[
0.2023, 0.1994, 0.2010]),
]
)
)
test_dataset = CIFAR10(
root=Path(os.environ['DATA']),
train=False,
transform=transforms.Compose(
[
transforms.ToTensor(),
transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[
0.2023, 0.1994, 0.2010]),
]
)
)
train_dataloader = get_dataloader(dataset=train_dataset,
shuffle=True,
batch_size=gpc.config.BATCH_SIZE,
num_workers=1,
pin_memory=True,
)
test_dataloader = get_dataloader(dataset=test_dataset,
add_sampler=False,
batch_size=gpc.config.BATCH_SIZE,
num_workers=1,
pin_memory=True,
)
# build criterion
criterion = torch.nn.CrossEntropyLoss()
# optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
# lr_scheduler
lr_scheduler = CosineAnnealingLR(optimizer, total_steps=gpc.config.NUM_EPOCHS)
engine, train_dataloader, test_dataloader, _ = colossalai.initialize(model,
optimizer,
criterion,
train_dataloader,
test_dataloader,
)
for epoch in range(gpc.config.NUM_EPOCHS):
engine.train()
if gpc.get_global_rank() == 0:
train_dl = tqdm(train_dataloader)
else:
train_dl = train_dataloader
for img, label in train_dl:
img = img.cuda()
label = label.cuda()
engine.zero_grad()
output = engine(img)
train_loss = engine.criterion(output, label)
engine.backward(train_loss)
engine.step()
lr_scheduler.step()
engine.eval()
correct = 0
total = 0
for img, label in test_dataloader:
img = img.cuda()
label = label.cuda()
with torch.no_grad():
output = engine(img)
test_loss = engine.criterion(output, label)
pred = torch.argmax(output, dim=-1)
correct += torch.sum(pred == label)
total += img.size(0)
logger.info(
f"Epoch {epoch} - train loss: {train_loss:.5}, test loss: {test_loss:.5}, acc: {correct / total:.5}, lr: {lr_scheduler.get_last_lr()[0]:.5g}", ranks=[0])
if __name__ == '__main__':
main()
import os
from pathlib import Path
import colossalai
import torch
from colossalai.core import global_context as gpc
from colossalai.logging import get_dist_logger
from colossalai.nn import CosineAnnealingLR
from colossalai.nn.metric import Accuracy
from colossalai.trainer import Trainer, hooks
from colossalai.utils import MultiTimer, get_dataloader
from torchvision import transforms
from torchvision.datasets import CIFAR10
from torchvision.models import resnet34
from tqdm import tqdm
def main():
colossalai.launch_from_torch(config='./config.py')
logger = get_dist_logger()
# build resnet
model = resnet34(num_classes=10)
# build dataloaders
train_dataset = CIFAR10(
root=Path(os.environ['DATA']),
download=True,
transform=transforms.Compose(
[
transforms.RandomCrop(size=32, padding=4),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[
0.2023, 0.1994, 0.2010]),
]
)
)
test_dataset = CIFAR10(
root=Path(os.environ['DATA']),
train=False,
transform=transforms.Compose(
[
transforms.ToTensor(),
transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[
0.2023, 0.1994, 0.2010]),
]
)
)
train_dataloader = get_dataloader(dataset=train_dataset,
shuffle=True,
batch_size=gpc.config.BATCH_SIZE,
num_workers=1,
pin_memory=True,
)
test_dataloader = get_dataloader(dataset=test_dataset,
add_sampler=False,
batch_size=gpc.config.BATCH_SIZE,
num_workers=1,
pin_memory=True,
)
# build criterion
criterion = torch.nn.CrossEntropyLoss()
# optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
# lr_scheduler
lr_scheduler = CosineAnnealingLR(optimizer, total_steps=gpc.config.NUM_EPOCHS)
engine, train_dataloader, test_dataloader, _ = colossalai.initialize(model,
optimizer,
criterion,
train_dataloader,
test_dataloader,
)
# build a timer to measure time
timer = MultiTimer()
# create a trainer object
trainer = Trainer(
engine=engine,
timer=timer,
logger=logger
)
# define the hooks to attach to the trainer
hook_list = [
hooks.LossHook(),
hooks.LRSchedulerHook(lr_scheduler=lr_scheduler, by_epoch=True),
hooks.AccuracyHook(accuracy_func=Accuracy()),
hooks.LogMetricByEpochHook(logger),
hooks.LogMemoryByEpochHook(logger),
hooks.LogTimingByEpochHook(timer, logger),
# you can uncomment these lines if you wish to use them
# hooks.TensorboardHook(log_dir='./tb_logs', ranks=[0]),
# hooks.SaveCheckpointHook(checkpoint_dir='./ckpt')
]
# start training
trainer.fit(
train_dataloader=train_dataloader,
epochs=gpc.config.NUM_EPOCHS,
test_dataloader=test_dataloader,
test_interval=1,
hooks=hook_list,
display_progress=True
)
if __name__ == '__main__':
main()
import torch
import torch.nn as nn
import torch.nn.functional as F
from colossalai.registry import LOSSES
from torch.nn.modules.linear import Linear
@LOSSES.register_module
class NT_Xentloss(nn.Module):
def __init__(self, temperature=0.5):
super().__init__()
self.temperature = temperature
def forward(self, z1, z2, label):
z1 = F.normalize(z1, dim=1)
z2 = F.normalize(z2, dim=1)
N, Z = z1.shape
device = z1.device
representations = torch.cat([z1, z2], dim=0)
similarity_matrix = F.cosine_similarity(representations.unsqueeze(1), representations.unsqueeze(0), dim=-1)
l_pos = torch.diag(similarity_matrix, N)
r_pos = torch.diag(similarity_matrix, -N)
positives = torch.cat([l_pos, r_pos]).view(2 * N, 1)
diag = torch.eye(2*N, dtype=torch.bool, device=device)
diag[N:,:N] = diag[:N,N:] = diag[:N,:N]
negatives = similarity_matrix[~diag].view(2*N, -1)
logits = torch.cat([positives, negatives], dim=1)
logits /= self.temperature
labels = torch.zeros(2*N, device=device, dtype=torch.int64)
loss = F.cross_entropy(logits, labels, reduction='sum')
return loss / (2 * N)
if __name__=='__main__':
criterion = NT_Xentloss()
net = Linear(256,512)
output = [net(torch.randn(512,256)), net(torch.randn(512,256))]
label = [torch.randn(512)]
loss = criterion(*output, *label)
print(loss)
\ No newline at end of file
# Overview
Here is an example of applying [PreAct-ResNet18](https://arxiv.org/abs/1603.05027) to train [SimCLR](https://arxiv.org/abs/2002.05709) on CIFAR10.
SimCLR is a kind of self-supervised representation learning algorithm which learns generic representations of images on an unlabeled dataset. The generic representations are learned by simultaneously maximizing agreement between differently transformed views of the same image and minimizing agreement between transformed views of different images, following a method called contrastive learning. Updating the parameters of a neural network using this contrastive objective causes representations of corresponding views to “attract” each other, while representations of non-corresponding views “repel” each other. A more detailed description of SimCLR is available [here](https://ai.googleblog.com/2020/04/advancing-self-supervised-and-semi.html).
The training process consists of two phases: (1) self-supervised representation learning: the model which acts as a feature extractor is trained exactly as described above; and (2) linear evaluation: to evaluate how well representations are learned, generally a linear classifier is added on top of the trained feature extractor in phase 1. The linear classifier is trained with a labeled dataset in a conventional supervised manner, while parameters of the feature extractor keep fixed. This process is called linear evaluation.
# How to run
The training commands are specified in:
```shell
bash train.sh
```
Before running, you can specify the experiment name (folders with the same name will be created in `ckpt` to save checkpoints and in `tb_logs` to save the tensorboard file) and other training hyperparameters in `config.py`. By default CIFAR10 dataset will be downloaded automatically and saved in `./dataset`. Note that `LOG_NAME` in `le_config.py` should be the same as that in `config.py`.
Besides linear evaluation, you can also visualize the distribution of learned representations. A script is provided which first extracts representations and then visualizes them with [t-SNE](https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html). t-SNE is a good tool to visualize high-dimensional data. It converts similarities between data points to joint probabilities and tries to minimize the Kullback-Leibler divergence between the joint probabilities of the low-dimensional embedding and the high-dimensional data. You can directly run the script by (remember modifying `log_name` and `epoch` to specify the model in which experiment folder and of which training epoch to load):
```python
python visualization.py
```
# Results
The loss curve of SimCLR self-supervised training is as follows:
![SimCLR Loss Curve](./results/ssl_loss.png)
The loss curve of linear evaluation is as follows:
![Linear Evaluation Loss Curve](./results/linear_eval_loss.png)
The accuracy curve of linear evaluation is as follows:
![Linear Evaluation Accuracy](./results/linear_eval_acc.png)
The t-SNE of the training set of CIFAR10 is as follows:
![train tSNE](./results/train_tsne.png)
The t-SNE of the test set of CIFAR10 is as follows:
![test tSNE](./results/test_tsne.png)
\ No newline at end of file
from torchvision.transforms import transforms
class SimCLRTransform():
def __init__(self):
self.transform = transforms.Compose([
transforms.RandomResizedCrop(size=32, scale=(0.2, 1.0)),
transforms.RandomHorizontalFlip(),
transforms.RandomApply([transforms.ColorJitter(0.8, 0.8, 0.8, 0.2)], p=0.8),
transforms.RandomGrayscale(p=0.2),
transforms.RandomApply([transforms.GaussianBlur(kernel_size=32//20*2+1, sigma=(0.1, 2.0))], p=0.5),
transforms.ToTensor(),
transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010])
])
def __call__(self, x):
x1 = self.transform(x)
x2 = self.transform(x)
return x1, x2
class LeTransform():
def __init__(self):
self.transform = transforms.Compose([
transforms.RandomResizedCrop(size=32, scale=(0.2, 1.0)),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010])
])
def __call__(self, x):
x = self.transform(x)
return x
\ No newline at end of file
from colossalai.amp import AMP_TYPE
LOG_NAME = 'cifar-simclr'
BATCH_SIZE = 512
NUM_EPOCHS = 801
LEARNING_RATE = 0.03*BATCH_SIZE/256
WEIGHT_DECAY = 0.0005
MOMENTUM = 0.9
fp16 = dict(
mode=AMP_TYPE.TORCH,
)
dataset = dict(
root='./dataset',
)
gradient_accumulation=2
clip_grad_norm=1.0
from colossalai.amp import AMP_TYPE
LOG_NAME = 'cifar-simclr'
EPOCH = 800
BATCH_SIZE = 512
NUM_EPOCHS = 51
LEARNING_RATE = 0.03*BATCH_SIZE/256
WEIGHT_DECAY = 0.0005
MOMENTUM = 0.9
fp16 = dict(
mode=AMP_TYPE.TORCH,
)
dataset = dict(
root='./dataset',
)
gradient_accumulation=1
clip_grad_norm=1.0
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision.models import resnet34, resnet50, resnet101, resnet152
def backbone(model, **kwargs):
assert model in ['resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152'], "current version only support resnet18 ~ resnet152"
if model == 'resnet18':
net = ResNet(PreActBlock, [2,2,2,2], **kwargs)
else:
net = eval(f"{model}(**kwargs)")
net.output_dim = net.fc.in_features
net.fc = torch.nn.Identity()
return net
def conv3x3(in_planes, out_planes, stride=1):
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, in_planes, planes, stride=1):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(in_planes, planes, stride)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = conv3x3(planes, planes)
self.bn2 = nn.BatchNorm2d(planes)
self.shortcut = nn.Sequential()
if stride != 1 or in_planes != self.expansion*planes:
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(self.expansion*planes)
)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.bn2(self.conv2(out))
out += self.shortcut(x)
out = F.relu(out)
return out
class PreActBlock(nn.Module):
'''Pre-activation version of the BasicBlock.'''
expansion = 1
def __init__(self, in_planes, planes, stride=1):
super(PreActBlock, self).__init__()
self.bn1 = nn.BatchNorm2d(in_planes)
self.conv1 = conv3x3(in_planes, planes, stride)
self.bn2 = nn.BatchNorm2d(planes)
self.conv2 = conv3x3(planes, planes)
self.shortcut = nn.Sequential()
if stride != 1 or in_planes != self.expansion*planes:
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False)
)
def forward(self, x):
out = F.relu(self.bn1(x))
shortcut = self.shortcut(out)
out = self.conv1(out)
out = self.conv2(F.relu(self.bn2(out)))
out += shortcut
return out
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, in_planes, planes, stride=1):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(self.expansion*planes)
self.shortcut = nn.Sequential()
if stride != 1 or in_planes != self.expansion*planes:
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(self.expansion*planes)
)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = F.relu(self.bn2(self.conv2(out)))
out = self.bn3(self.conv3(out))
out += self.shortcut(x)
out = F.relu(out)
return out
class PreActBottleneck(nn.Module):
'''Pre-activation version of the original Bottleneck module.'''
expansion = 4
def __init__(self, in_planes, planes, stride=1):
super(PreActBottleneck, self).__init__()
self.bn1 = nn.BatchNorm2d(in_planes)
self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn3 = nn.BatchNorm2d(planes)
self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
self.shortcut = nn.Sequential()
if stride != 1 or in_planes != self.expansion*planes:
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False)
)
def forward(self, x):
out = F.relu(self.bn1(x))
shortcut = self.shortcut(out)
out = self.conv1(out)
out = self.conv2(F.relu(self.bn2(out)))
out = self.conv3(F.relu(self.bn3(out)))
out += shortcut
return out
class ResNet(nn.Module):
def __init__(self, block, num_blocks, num_classes=10):
super(ResNet, self).__init__()
self.in_planes = 64
self.conv1 = conv3x3(3,64)
self.bn1 = nn.BatchNorm2d(64)
self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
self.fc = nn.Linear(512*block.expansion, num_classes)
def _make_layer(self, block, planes, num_blocks, stride):
strides = [stride] + [1]*(num_blocks-1)
layers = []
for stride in strides:
layers.append(block(self.in_planes, planes, stride))
self.in_planes = planes * block.expansion
return nn.Sequential(*layers)
def forward(self, x, lin=0, lout=5):
out = x
if lin < 1 and lout > -1:
out = self.conv1(out)
out = self.bn1(out)
out = F.relu(out)
if lin < 2 and lout > 0:
out = self.layer1(out)
if lin < 3 and lout > 1:
out = self.layer2(out)
if lin < 4 and lout > 2:
out = self.layer3(out)
if lin < 5 and lout > 3:
out = self.layer4(out)
if lout > 4:
out = F.avg_pool2d(out, 4)
out = out.view(out.size(0), -1)
out = self.fc(out)
return out
def debug():
net = backbone('resnet18', pretrained=True)
x = torch.randn(4,3,32,32)
y = net(x)
print(y.size())
if __name__ == '__main__':
debug()
\ No newline at end of file
import torch
import torch.nn as nn
import torch.nn.functional as F
from .Backbone import backbone
class Linear_eval(nn.Module):
def __init__(self, model='resnet18', class_num=10, **kwargs):
super().__init__()
self.backbone = backbone(model, **kwargs)
self.backbone.requires_grad_(False)
self.fc = nn.Linear(self.backbone.output_dim, class_num)
def forward(self, x):
out = self.backbone(x)
out = self.fc(out)
return out
import torch
import torch.nn as nn
import torch.nn.functional as F
from .Backbone import backbone
class projection_MLP(nn.Module):
def __init__(self, in_dim, out_dim=256):
super().__init__()
hidden_dim = in_dim
self.layer1 = nn.Sequential(
nn.Linear(in_dim, hidden_dim),
nn.ReLU(inplace=True)
)
self.layer2 = nn.Linear(hidden_dim, out_dim)
def forward(self, x):
x = self.layer1(x)
x = self.layer2(x)
return x
class SimCLR(nn.Module):
def __init__(self, model='resnet18', **kwargs):
super().__init__()
self.backbone = backbone(model, **kwargs)
self.projector = projection_MLP(self.backbone.output_dim)
self.encoder = nn.Sequential(
self.backbone,
self.projector
)
def forward(self, x1, x2):
z1 = self.encoder(x1)
z2 = self.encoder(x2)
return z1, z2
\ No newline at end of file
from colossalai.trainer.hooks import BaseHook
from colossalai.core import global_context as gpc
from colossalai.context import ParallelMode
from colossalai.logging import get_dist_logger
class TotalBatchsizeHook(BaseHook):
def __init__(self, priority: int = 2) -> None:
super().__init__(priority)
self.logger = get_dist_logger()
def before_train(self, trainer):
total_batch_size = gpc.config.BATCH_SIZE * \
gpc.config.gradient_accumulation * gpc.get_world_size(ParallelMode.DATA)
self.logger.info(f'Total batch size = {total_batch_size}', ranks=[0])
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment